"""Shared constants for POLYGUARD-RL.""" from __future__ import annotations REWARD_MIN: float = 0.001 REWARD_MAX: float = 0.999 REWARD_PRECISION: int = 3 DEFAULT_SEED: int = 42 DEFAULT_MAX_STEPS: int = 10 MAX_REPEATED_ACTIONS: int = 3 MAX_KEEP_REGIMEN_RATIO: float = 0.6 MAX_REVIEW_RATIO: float = 0.5 DEFAULT_STEP_TIMEOUT_SECONDS: float = 2.5 DEFAULT_EPISODE_TIMEOUT_SECONDS: float = 45.0 DEFAULT_REWARD_WEIGHTS: dict[str, float] = { "format_compliance_score": 0.08, "candidate_alignment_score": 0.08, "legality_score": 0.12, "safety_delta_score": 0.15, "burden_improvement_score": 0.08, "disease_stability_score": 0.10, "dosing_quality_score": 0.08, "abstention_quality_score": 0.06, "efficiency_score": 0.06, "process_fidelity_score": 0.06, "explanation_grounding_score": 0.03, "anti_cheat_score": 0.06, "uncertainty_calibration_score": 0.04, } REQUIRED_REWARD_KEYS: tuple[str, ...] = tuple(DEFAULT_REWARD_WEIGHTS.keys()) PRIMARY_REWARD_KEYS: tuple[str, ...] = ( "safety_legality", "clinical_improvement", "dosing_quality", "process_integrity", )