Spaces:
Running
Running
File size: 2,256 Bytes
877add7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | """Standalone reward functions with strict [0.001, 0.999] output."""
from __future__ import annotations
from app.common.normalization import clamp_reward
def format_compliance_score(valid: bool) -> float:
"""Schema validity: valid->0.999, invalid->0.001."""
return clamp_reward(0.999 if valid else 0.001)
def candidate_alignment_score(aligned: bool) -> float:
"""Whether selected action references legal candidate set."""
return clamp_reward(0.999 if aligned else 0.001)
def legality_score(legal: bool) -> float:
"""Hard constraint satisfaction score."""
return clamp_reward(0.999 if legal else 0.001)
def safety_delta_score(delta: float) -> float:
"""Risk-delta mapping where positive delta means lower safety risk."""
return clamp_reward(0.5 + delta * 0.4)
def burden_improvement_score(delta: float) -> float:
"""Burden reduction score; positive delta indicates lower burden."""
return clamp_reward(0.5 + delta * 0.4)
def disease_stability_score(stability: float) -> float:
"""Stability proxy in [0,1], default caller-side imputation when missing."""
return clamp_reward(stability)
def dosing_quality_score(quality: float) -> float:
"""Dose quality proxy in [0,1], neutral caller default for non-dose scenarios."""
return clamp_reward(quality)
def abstention_quality_score(good_abstain: bool) -> float:
"""Judges abstention quality; not merely abstaining."""
return clamp_reward(0.8 if good_abstain else 0.3)
def efficiency_score(step_fraction: float) -> float:
"""Shorter successful trajectories receive higher score."""
return clamp_reward(1.0 - step_fraction)
def process_fidelity_score(fidelity: float) -> float:
"""Process-supervision score for valid clinical decision sequence."""
return clamp_reward(fidelity)
def explanation_grounding_score(grounded: float) -> float:
"""Grounded explanation support score."""
return clamp_reward(grounded)
def anti_cheat_score(exploit: bool) -> float:
"""Exploit-like behavior gets floor score."""
return clamp_reward(0.001 if exploit else 0.999)
def uncertainty_calibration_score(calibration: float) -> float:
"""Confidence calibration score."""
return clamp_reward(calibration)
|