| { |
| "status": "ok", |
| "backend": "trl_transformers", |
| "model_id": "Qwen/Qwen2.5-0.5B-Instruct", |
| "records": 2000, |
| "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl", |
| "reward_summary": { |
| "count": 4000, |
| "avg_reward": 0.782178, |
| "avg_reward_components": { |
| "format_compliance_score": 0.999, |
| "candidate_alignment_score": 0.999, |
| "legality_score": 0.985277, |
| "safety_delta_score": 0.496104, |
| "burden_improvement_score": 0.494346, |
| "disease_stability_score": 0.8912, |
| "dosing_quality_score": 0.511938, |
| "abstention_quality_score": 0.56, |
| "efficiency_score": 0.84942, |
| "process_fidelity_score": 0.905268, |
| "explanation_grounding_score": 0.800248, |
| "anti_cheat_score": 0.48004, |
| "uncertainty_calibration_score": 0.730195 |
| }, |
| "avg_primary_reward_channels": { |
| "safety_legality": 0.798661, |
| "clinical_improvement": 0.62689, |
| "dosing_quality": 0.535969, |
| "process_integrity": 0.888448 |
| } |
| }, |
| "reward_log": "/app/checkpoints/grpo_reward_components.jsonl", |
| "train_metrics": { |
| "train_runtime": 6960.8084, |
| "train_samples_per_second": 0.287, |
| "train_steps_per_second": 0.287, |
| "total_flos": 0.0, |
| "train_loss": 2.3633859725151752e-06 |
| }, |
| "artifact_path": "/app/checkpoints/grpo_adapter", |
| "unsloth_available": false |
| } |