"""Standalone reward functions with strict [0.001, 0.999] output.""" from __future__ import annotations from app.common.normalization import clamp_reward def format_compliance_score(valid: bool) -> float: """Schema validity: valid->0.999, invalid->0.001.""" return clamp_reward(0.999 if valid else 0.001) def candidate_alignment_score(aligned: bool) -> float: """Whether selected action references legal candidate set.""" return clamp_reward(0.999 if aligned else 0.001) def legality_score(legal: bool) -> float: """Hard constraint satisfaction score.""" return clamp_reward(0.999 if legal else 0.001) def safety_delta_score(delta: float) -> float: """Risk-delta mapping where positive delta means lower safety risk.""" return clamp_reward(0.5 + delta * 0.4) def burden_improvement_score(delta: float) -> float: """Burden reduction score; positive delta indicates lower burden.""" return clamp_reward(0.5 + delta * 0.4) def disease_stability_score(stability: float) -> float: """Stability proxy in [0,1], default caller-side imputation when missing.""" return clamp_reward(stability) def dosing_quality_score(quality: float) -> float: """Dose quality proxy in [0,1], neutral caller default for non-dose scenarios.""" return clamp_reward(quality) def abstention_quality_score(good_abstain: bool) -> float: """Judges abstention quality; not merely abstaining.""" return clamp_reward(0.8 if good_abstain else 0.3) def efficiency_score(step_fraction: float) -> float: """Shorter successful trajectories receive higher score.""" return clamp_reward(1.0 - step_fraction) def process_fidelity_score(fidelity: float) -> float: """Process-supervision score for valid clinical decision sequence.""" return clamp_reward(fidelity) def explanation_grounding_score(grounded: float) -> float: """Grounded explanation support score.""" return clamp_reward(grounded) def anti_cheat_score(exploit: bool) -> float: """Exploit-like behavior gets floor score.""" return clamp_reward(0.001 if exploit else 0.999) def uncertainty_calibration_score(calibration: float) -> float: """Confidence calibration score.""" return clamp_reward(calibration)