{ "status": "ok", "training_mode": "full", "completed_models": 1, "failed_or_skipped_models": 0, "models": [ { "run_id": "qwen-qwen2-5-3b-instruct", "training_mode": "full", "model_id": "Qwen/Qwen2.5-3B-Instruct", "label": "Qwen2.5-3B", "status": "completed", "error": "", "sft_backend": "trl_transformers", "sft_examples": 2000, "sft_train_loss": 0.15688225453009363, "sft_runtime": 715.2908, "grpo_backend": "trl_transformers", "grpo_records": 2000, "grpo_avg_reward": 0.767, "sft_inference_reward": 0.781, "sft_valid_rate": 1.0, "sft_latency_seconds": 2.863, "grpo_inference_reward": 0.726, "grpo_valid_rate": 1.0, "grpo_latency_seconds": 3.681, "train_holdout_gap": 0.041, "fallback_detected": false, "reward_range_ok": true, "reward_range_failures": [], "exploit_rate": 0.411, "legal_rate": 0.93, "candidate_diversity": 0.003, "top_candidate_rate": 0.668, "reward_components": { "format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.929, "safety_delta_score": 0.497, "burden_improvement_score": 0.469, "disease_stability_score": 0.861, "dosing_quality_score": 0.526, "abstention_quality_score": 0.56, "efficiency_score": 0.849, "process_fidelity_score": 0.856, "explanation_grounding_score": 0.795, "anti_cheat_score": 0.589, "uncertainty_calibration_score": 0.747 }, "primary_reward_channels": { "safety_legality": 0.816, "clinical_improvement": 0.609, "dosing_quality": 0.543, "process_integrity": 0.875 }, "artifact_paths": { "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", "grpo": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter" } } ], "charts": { "sft_vs_grpo_reward": "outputs/plots/sft_vs_grpo_reward.png", "sft_loss_curves": "outputs/plots/sft_loss_curves.png", "qwen_model_sft_reward": "outputs/plots/qwen_model_sft_reward.png", "qwen_model_sft_loss": "outputs/plots/qwen_model_sft_loss.png", "sft_validity_reward": "outputs/plots/sft_validity_reward.png", "grpo_reward_curves": "outputs/plots/grpo_reward_curves.png", "qwen_model_grpo_reward": "outputs/plots/qwen_model_grpo_reward.png", "reward_component_bars": "outputs/plots/reward_component_bars.png", "anti_cheat_failure_rates": "outputs/plots/anti_cheat_failure_rates.png", "train_holdout_gap": "outputs/plots/train_holdout_gap.png", "inference_validity_reward": "outputs/plots/inference_validity_reward.png", "inference_latency_validity": "outputs/plots/inference_latency_validity.png" } }