{ "base_url": "https://aniketasla-debatefloor.hf.space", "rows": [ { "policy": "naive_high_no_investigation", "task_id": "clean_claim", "seed": 42, "steps": 1, "done": true, "reward": 0.7998, "final_decision": "approve_claim", "agent_confidence": "HIGH", "calibration_score": 1.0, "decision_accuracy": 1.0, "fraud_detection_score": 1.0, "evidence_quality_score": 1.0 }, { "policy": "naive_high_no_investigation", "task_id": "contradictory_claim", "seed": 42, "steps": 1, "done": true, "reward": 0.0, "final_decision": "approve_claim", "agent_confidence": "HIGH", "calibration_score": -0.8, "decision_accuracy": 0.0, "fraud_detection_score": 0.0, "evidence_quality_score": 0.0 }, { "policy": "naive_high_no_investigation", "task_id": "distribution_shift_claim", "seed": 42, "steps": 1, "done": true, "reward": 0.0, "final_decision": "approve_claim", "agent_confidence": "HIGH", "calibration_score": -0.8, "decision_accuracy": 0.0, "fraud_detection_score": 0.0, "evidence_quality_score": 0.0 }, { "policy": "calibrated_scripted_investigator", "task_id": "clean_claim", "seed": 42, "steps": 4, "done": true, "reward": 0.7623, "final_decision": "approve_claim", "agent_confidence": "HIGH", "calibration_score": 1.0, "decision_accuracy": 1.0, "fraud_detection_score": 1.0, "evidence_quality_score": 1.0 }, { "policy": "calibrated_scripted_investigator", "task_id": "contradictory_claim", "seed": 42, "steps": 7, "done": true, "reward": 0.5468, "final_decision": "deny_claim", "agent_confidence": "MED", "calibration_score": 0.6, "decision_accuracy": 1.0, "fraud_detection_score": 0.75, "evidence_quality_score": 0.0 }, { "policy": "calibrated_scripted_investigator", "task_id": "distribution_shift_claim", "seed": 42, "steps": 8, "done": true, "reward": 0.3522, "final_decision": "escalate_to_human", "agent_confidence": "LOW", "calibration_score": 0.1, "decision_accuracy": 1.0, "fraud_detection_score": 0.0, "evidence_quality_score": 0.0 } ] }