Spaces:
Running
Running
| { | |
| "status": "fallback", | |
| "backend": "env_reward_fallback", | |
| "model_id": "Qwen/Qwen2.5-1.5B-Instruct", | |
| "records": 1, | |
| "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl", | |
| "reward_summary": { | |
| "count": 1, | |
| "avg_reward": 0.764, | |
| "avg_reward_components": { | |
| "format_compliance_score": 0.999, | |
| "candidate_alignment_score": 0.999, | |
| "legality_score": 0.999, | |
| "safety_delta_score": 0.5, | |
| "burden_improvement_score": 0.5, | |
| "disease_stability_score": 0.9, | |
| "dosing_quality_score": 0.5, | |
| "abstention_quality_score": 0.56, | |
| "efficiency_score": 0.857, | |
| "process_fidelity_score": 0.92, | |
| "explanation_grounding_score": 0.8, | |
| "anti_cheat_score": 0.001, | |
| "uncertainty_calibration_score": 0.7 | |
| }, | |
| "avg_primary_reward_channels": { | |
| "safety_legality": 0.675, | |
| "clinical_improvement": 0.633, | |
| "dosing_quality": 0.53, | |
| "process_integrity": 0.894 | |
| } | |
| }, | |
| "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl", | |
| "train_metrics": { | |
| "steps_executed": 1.0 | |
| }, | |
| "artifact_path": "", | |
| "unsloth_available": false, | |
| "trl_runtime_error": "forced_fallback" | |
| } |