adithya9903's picture
Upload PolyGuard training artifacts: docs/results
b5cc210 verified
{
"status": "ok",
"training_mode": "full",
"completed_models": 1,
"failed_or_skipped_models": 0,
"models": [
{
"run_id": "qwen-qwen2-5-3b-instruct",
"training_mode": "full",
"model_id": "Qwen/Qwen2.5-3B-Instruct",
"label": "Qwen2.5-3B",
"status": "completed",
"error": "",
"sft_backend": "trl_transformers",
"sft_examples": 2000,
"sft_train_loss": 0.15688225453009363,
"sft_runtime": 715.2908,
"grpo_backend": "trl_transformers",
"grpo_records": 2000,
"grpo_avg_reward": 0.767,
"sft_inference_reward": 0.781,
"sft_valid_rate": 1.0,
"sft_latency_seconds": 2.863,
"grpo_inference_reward": 0.726,
"grpo_valid_rate": 1.0,
"grpo_latency_seconds": 3.681,
"train_holdout_gap": 0.041,
"fallback_detected": false,
"reward_range_ok": true,
"reward_range_failures": [],
"exploit_rate": 0.411,
"legal_rate": 0.93,
"candidate_diversity": 0.003,
"top_candidate_rate": 0.668,
"reward_components": {
"format_compliance_score": 0.999,
"candidate_alignment_score": 0.999,
"legality_score": 0.929,
"safety_delta_score": 0.497,
"burden_improvement_score": 0.469,
"disease_stability_score": 0.861,
"dosing_quality_score": 0.526,
"abstention_quality_score": 0.56,
"efficiency_score": 0.849,
"process_fidelity_score": 0.856,
"explanation_grounding_score": 0.795,
"anti_cheat_score": 0.589,
"uncertainty_calibration_score": 0.747
},
"primary_reward_channels": {
"safety_legality": 0.816,
"clinical_improvement": 0.609,
"dosing_quality": 0.543,
"process_integrity": 0.875
},
"artifact_paths": {
"sft": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter",
"grpo": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter"
}
}
],
"charts": {
"sft_vs_grpo_reward": "outputs/plots/sft_vs_grpo_reward.png",
"sft_loss_curves": "outputs/plots/sft_loss_curves.png",
"qwen_model_sft_reward": "outputs/plots/qwen_model_sft_reward.png",
"qwen_model_sft_loss": "outputs/plots/qwen_model_sft_loss.png",
"sft_validity_reward": "outputs/plots/sft_validity_reward.png",
"grpo_reward_curves": "outputs/plots/grpo_reward_curves.png",
"qwen_model_grpo_reward": "outputs/plots/qwen_model_grpo_reward.png",
"reward_component_bars": "outputs/plots/reward_component_bars.png",
"anti_cheat_failure_rates": "outputs/plots/anti_cheat_failure_rates.png",
"train_holdout_gap": "outputs/plots/train_holdout_gap.png",
"inference_validity_reward": "outputs/plots/inference_validity_reward.png",
"inference_latency_validity": "outputs/plots/inference_latency_validity.png"
}
}