File size: 1,123 Bytes
21c7db9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | """Shared constants for POLYGUARD-RL."""
from __future__ import annotations
REWARD_MIN: float = 0.001
REWARD_MAX: float = 0.999
REWARD_PRECISION: int = 3
DEFAULT_SEED: int = 42
DEFAULT_MAX_STEPS: int = 10
MAX_REPEATED_ACTIONS: int = 3
MAX_KEEP_REGIMEN_RATIO: float = 0.6
MAX_REVIEW_RATIO: float = 0.5
DEFAULT_STEP_TIMEOUT_SECONDS: float = 2.5
DEFAULT_EPISODE_TIMEOUT_SECONDS: float = 45.0
DEFAULT_REWARD_WEIGHTS: dict[str, float] = {
"format_compliance_score": 0.08,
"candidate_alignment_score": 0.08,
"legality_score": 0.12,
"safety_delta_score": 0.15,
"burden_improvement_score": 0.08,
"disease_stability_score": 0.10,
"dosing_quality_score": 0.08,
"abstention_quality_score": 0.06,
"efficiency_score": 0.06,
"process_fidelity_score": 0.06,
"explanation_grounding_score": 0.03,
"anti_cheat_score": 0.06,
"uncertainty_calibration_score": 0.04,
}
REQUIRED_REWARD_KEYS: tuple[str, ...] = tuple(DEFAULT_REWARD_WEIGHTS.keys())
PRIMARY_REWARD_KEYS: tuple[str, ...] = (
"safety_legality",
"clinical_improvement",
"dosing_quality",
"process_integrity",
)
|