File size: 4,324 Bytes
b5cc210 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | {
"status": "ok",
"training_mode": "sft-baseline",
"completed_models": 3,
"failed_or_skipped_models": 0,
"models": [
{
"run_id": "qwen-qwen2-5-0-5b-instruct",
"training_mode": "sft-baseline",
"model_id": "Qwen/Qwen2.5-0.5B-Instruct",
"label": "Qwen2.5-0.5B",
"status": "completed",
"error": "",
"sft_backend": "trl_transformers",
"sft_examples": 2000,
"sft_train_loss": 0.19233327957964502,
"sft_runtime": 234.6302,
"grpo_backend": "",
"grpo_records": 0,
"grpo_avg_reward": 0.726,
"sft_inference_reward": 0.726,
"sft_valid_rate": 1.0,
"sft_latency_seconds": 1.839,
"grpo_inference_reward": 0.726,
"grpo_valid_rate": 1.0,
"grpo_latency_seconds": 0.0,
"train_holdout_gap": 0.0,
"fallback_detected": false,
"reward_range_ok": true,
"reward_range_failures": [],
"exploit_rate": 0.0,
"legal_rate": 0.0,
"candidate_diversity": 0.0,
"top_candidate_rate": 0.0,
"reward_components": {},
"primary_reward_channels": {},
"artifact_paths": {
"sft": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter",
"grpo": ""
}
},
{
"run_id": "qwen-qwen2-5-1-5b-instruct",
"training_mode": "sft-baseline",
"model_id": "Qwen/Qwen2.5-1.5B-Instruct",
"label": "Qwen2.5-1.5B",
"status": "completed",
"error": "",
"sft_backend": "trl_transformers",
"sft_examples": 2000,
"sft_train_loss": 0.11515871361242898,
"sft_runtime": 483.7085,
"grpo_backend": "",
"grpo_records": 0,
"grpo_avg_reward": 0.726,
"sft_inference_reward": 0.726,
"sft_valid_rate": 1.0,
"sft_latency_seconds": 2.158,
"grpo_inference_reward": 0.726,
"grpo_valid_rate": 1.0,
"grpo_latency_seconds": 0.0,
"train_holdout_gap": 0.0,
"fallback_detected": false,
"reward_range_ok": true,
"reward_range_failures": [],
"exploit_rate": 0.0,
"legal_rate": 0.0,
"candidate_diversity": 0.0,
"top_candidate_rate": 0.0,
"reward_components": {},
"primary_reward_channels": {},
"artifact_paths": {
"sft": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter",
"grpo": ""
}
},
{
"run_id": "qwen-qwen2-5-3b-instruct",
"training_mode": "sft-baseline",
"model_id": "Qwen/Qwen2.5-3B-Instruct",
"label": "Qwen2.5-3B",
"status": "completed",
"error": "",
"sft_backend": "trl_transformers",
"sft_examples": 2000,
"sft_train_loss": 0.18184852770145518,
"sft_runtime": 372.1845,
"grpo_backend": "",
"grpo_records": 0,
"grpo_avg_reward": 0.762,
"sft_inference_reward": 0.762,
"sft_valid_rate": 1.0,
"sft_latency_seconds": 2.748,
"grpo_inference_reward": 0.762,
"grpo_valid_rate": 1.0,
"grpo_latency_seconds": 0.0,
"train_holdout_gap": 0.0,
"fallback_detected": false,
"reward_range_ok": true,
"reward_range_failures": [],
"exploit_rate": 0.0,
"legal_rate": 0.0,
"candidate_diversity": 0.0,
"top_candidate_rate": 0.0,
"reward_components": {},
"primary_reward_channels": {},
"artifact_paths": {
"sft": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter",
"grpo": ""
}
}
],
"charts": {
"sft_vs_grpo_reward": "outputs/plots/sft_vs_grpo_reward.png",
"sft_loss_curves": "outputs/plots/sft_loss_curves.png",
"qwen_model_sft_reward": "outputs/plots/qwen_model_sft_reward.png",
"qwen_model_sft_loss": "outputs/plots/qwen_model_sft_loss.png",
"sft_validity_reward": "outputs/plots/sft_validity_reward.png",
"grpo_reward_curves": "outputs/plots/grpo_reward_curves.png",
"qwen_model_grpo_reward": "outputs/plots/qwen_model_grpo_reward.png",
"reward_component_bars": "outputs/plots/reward_component_bars.png",
"anti_cheat_failure_rates": "outputs/plots/anti_cheat_failure_rates.png",
"train_holdout_gap": "outputs/plots/train_holdout_gap.png",
"inference_validity_reward": "outputs/plots/inference_validity_reward.png",
"inference_latency_validity": "outputs/plots/inference_latency_validity.png"
}
} |