{ "status": "ok", "backend": "trl_transformers", "model_id": "hf-internal-testing/tiny-random-gpt2", "records": 4, "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", "reward_summary": { "count": 2, "avg_reward": 0.764, "avg_reward_components": { "format_compliance_score": 0.999, "candidate_alignment_score": 0.999, "legality_score": 0.999, "safety_delta_score": 0.5, "burden_improvement_score": 0.5, "disease_stability_score": 0.9, "dosing_quality_score": 0.5, "abstention_quality_score": 0.56, "efficiency_score": 0.857, "process_fidelity_score": 0.92, "explanation_grounding_score": 0.8, "anti_cheat_score": 0.001, "uncertainty_calibration_score": 0.7 }, "avg_primary_reward_channels": { "safety_legality": 0.675, "clinical_improvement": 0.633, "dosing_quality": 0.53, "process_integrity": 0.894 } }, "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", "train_metrics": { "train_runtime": 1.0826, "train_samples_per_second": 1.847, "train_steps_per_second": 0.924, "total_flos": 0.0, "train_loss": 9.910791413858533e-05 }, "artifact_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_adapter", "unsloth_available": false }