| { |
| "status": "ok", |
| "training_mode": "sft-baseline", |
| "completed_models": 3, |
| "failed_or_skipped_models": 0, |
| "models": [ |
| { |
| "run_id": "qwen-qwen2-5-0-5b-instruct", |
| "training_mode": "sft-baseline", |
| "model_id": "Qwen/Qwen2.5-0.5B-Instruct", |
| "label": "Qwen2.5-0.5B", |
| "status": "completed", |
| "error": "", |
| "sft_backend": "trl_transformers", |
| "sft_examples": 2000, |
| "sft_train_loss": 0.19233327957964502, |
| "sft_runtime": 234.6302, |
| "grpo_backend": "", |
| "grpo_records": 0, |
| "grpo_avg_reward": 0.726, |
| "sft_inference_reward": 0.726, |
| "sft_valid_rate": 1.0, |
| "sft_latency_seconds": 1.839, |
| "grpo_inference_reward": 0.726, |
| "grpo_valid_rate": 1.0, |
| "grpo_latency_seconds": 0.0, |
| "train_holdout_gap": 0.0, |
| "fallback_detected": false, |
| "reward_range_ok": true, |
| "reward_range_failures": [], |
| "exploit_rate": 0.0, |
| "legal_rate": 0.0, |
| "candidate_diversity": 0.0, |
| "top_candidate_rate": 0.0, |
| "reward_components": {}, |
| "primary_reward_channels": {}, |
| "artifact_paths": { |
| "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", |
| "grpo": "" |
| } |
| }, |
| { |
| "run_id": "qwen-qwen2-5-1-5b-instruct", |
| "training_mode": "sft-baseline", |
| "model_id": "Qwen/Qwen2.5-1.5B-Instruct", |
| "label": "Qwen2.5-1.5B", |
| "status": "completed", |
| "error": "", |
| "sft_backend": "trl_transformers", |
| "sft_examples": 2000, |
| "sft_train_loss": 0.11515871361242898, |
| "sft_runtime": 483.7085, |
| "grpo_backend": "", |
| "grpo_records": 0, |
| "grpo_avg_reward": 0.726, |
| "sft_inference_reward": 0.726, |
| "sft_valid_rate": 1.0, |
| "sft_latency_seconds": 2.158, |
| "grpo_inference_reward": 0.726, |
| "grpo_valid_rate": 1.0, |
| "grpo_latency_seconds": 0.0, |
| "train_holdout_gap": 0.0, |
| "fallback_detected": false, |
| "reward_range_ok": true, |
| "reward_range_failures": [], |
| "exploit_rate": 0.0, |
| "legal_rate": 0.0, |
| "candidate_diversity": 0.0, |
| "top_candidate_rate": 0.0, |
| "reward_components": {}, |
| "primary_reward_channels": {}, |
| "artifact_paths": { |
| "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", |
| "grpo": "" |
| } |
| }, |
| { |
| "run_id": "qwen-qwen2-5-3b-instruct", |
| "training_mode": "sft-baseline", |
| "model_id": "Qwen/Qwen2.5-3B-Instruct", |
| "label": "Qwen2.5-3B", |
| "status": "completed", |
| "error": "", |
| "sft_backend": "trl_transformers", |
| "sft_examples": 2000, |
| "sft_train_loss": 0.18184852770145518, |
| "sft_runtime": 372.1845, |
| "grpo_backend": "", |
| "grpo_records": 0, |
| "grpo_avg_reward": 0.762, |
| "sft_inference_reward": 0.762, |
| "sft_valid_rate": 1.0, |
| "sft_latency_seconds": 2.748, |
| "grpo_inference_reward": 0.762, |
| "grpo_valid_rate": 1.0, |
| "grpo_latency_seconds": 0.0, |
| "train_holdout_gap": 0.0, |
| "fallback_detected": false, |
| "reward_range_ok": true, |
| "reward_range_failures": [], |
| "exploit_rate": 0.0, |
| "legal_rate": 0.0, |
| "candidate_diversity": 0.0, |
| "top_candidate_rate": 0.0, |
| "reward_components": {}, |
| "primary_reward_channels": {}, |
| "artifact_paths": { |
| "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", |
| "grpo": "" |
| } |
| } |
| ], |
| "charts": { |
| "sft_vs_grpo_reward": "outputs/plots/sft_vs_grpo_reward.png", |
| "sft_loss_curves": "outputs/plots/sft_loss_curves.png", |
| "qwen_model_sft_reward": "outputs/plots/qwen_model_sft_reward.png", |
| "qwen_model_sft_loss": "outputs/plots/qwen_model_sft_loss.png", |
| "sft_validity_reward": "outputs/plots/sft_validity_reward.png", |
| "grpo_reward_curves": "outputs/plots/grpo_reward_curves.png", |
| "qwen_model_grpo_reward": "outputs/plots/qwen_model_grpo_reward.png", |
| "reward_component_bars": "outputs/plots/reward_component_bars.png", |
| "anti_cheat_failure_rates": "outputs/plots/anti_cheat_failure_rates.png", |
| "train_holdout_gap": "outputs/plots/train_holdout_gap.png", |
| "inference_validity_reward": "outputs/plots/inference_validity_reward.png", |
| "inference_latency_validity": "outputs/plots/inference_latency_validity.png" |
| } |
| } |