Spaces:
Running
Running
| """Standalone reward functions with strict [0.001, 0.999] output.""" | |
| from __future__ import annotations | |
| from app.common.normalization import clamp_reward | |
| def format_compliance_score(valid: bool) -> float: | |
| """Schema validity: valid->0.999, invalid->0.001.""" | |
| return clamp_reward(0.999 if valid else 0.001) | |
| def candidate_alignment_score(aligned: bool) -> float: | |
| """Whether selected action references legal candidate set.""" | |
| return clamp_reward(0.999 if aligned else 0.001) | |
| def legality_score(legal: bool) -> float: | |
| """Hard constraint satisfaction score.""" | |
| return clamp_reward(0.999 if legal else 0.001) | |
| def safety_delta_score(delta: float) -> float: | |
| """Risk-delta mapping where positive delta means lower safety risk.""" | |
| return clamp_reward(0.5 + delta * 0.4) | |
| def burden_improvement_score(delta: float) -> float: | |
| """Burden reduction score; positive delta indicates lower burden.""" | |
| return clamp_reward(0.5 + delta * 0.4) | |
| def disease_stability_score(stability: float) -> float: | |
| """Stability proxy in [0,1], default caller-side imputation when missing.""" | |
| return clamp_reward(stability) | |
| def dosing_quality_score(quality: float) -> float: | |
| """Dose quality proxy in [0,1], neutral caller default for non-dose scenarios.""" | |
| return clamp_reward(quality) | |
| def abstention_quality_score(good_abstain: bool) -> float: | |
| """Judges abstention quality; not merely abstaining.""" | |
| return clamp_reward(0.8 if good_abstain else 0.3) | |
| def efficiency_score(step_fraction: float) -> float: | |
| """Shorter successful trajectories receive higher score.""" | |
| return clamp_reward(1.0 - step_fraction) | |
| def process_fidelity_score(fidelity: float) -> float: | |
| """Process-supervision score for valid clinical decision sequence.""" | |
| return clamp_reward(fidelity) | |
| def explanation_grounding_score(grounded: float) -> float: | |
| """Grounded explanation support score.""" | |
| return clamp_reward(grounded) | |
| def anti_cheat_score(exploit: bool) -> float: | |
| """Exploit-like behavior gets floor score.""" | |
| return clamp_reward(0.001 if exploit else 0.999) | |
| def uncertainty_calibration_score(calibration: float) -> float: | |
| """Confidence calibration score.""" | |
| return clamp_reward(calibration) | |