Spaces:
Running
Running
| """Shared constants for POLYGUARD-RL.""" | |
| from __future__ import annotations | |
| REWARD_MIN: float = 0.001 | |
| REWARD_MAX: float = 0.999 | |
| REWARD_PRECISION: int = 3 | |
| DEFAULT_SEED: int = 42 | |
| DEFAULT_MAX_STEPS: int = 10 | |
| MAX_REPEATED_ACTIONS: int = 3 | |
| MAX_KEEP_REGIMEN_RATIO: float = 0.6 | |
| MAX_REVIEW_RATIO: float = 0.5 | |
| DEFAULT_STEP_TIMEOUT_SECONDS: float = 2.5 | |
| DEFAULT_EPISODE_TIMEOUT_SECONDS: float = 45.0 | |
| DEFAULT_REWARD_WEIGHTS: dict[str, float] = { | |
| "format_compliance_score": 0.08, | |
| "candidate_alignment_score": 0.08, | |
| "legality_score": 0.12, | |
| "safety_delta_score": 0.15, | |
| "burden_improvement_score": 0.08, | |
| "disease_stability_score": 0.10, | |
| "dosing_quality_score": 0.08, | |
| "abstention_quality_score": 0.06, | |
| "efficiency_score": 0.06, | |
| "process_fidelity_score": 0.06, | |
| "explanation_grounding_score": 0.03, | |
| "anti_cheat_score": 0.06, | |
| "uncertainty_calibration_score": 0.04, | |
| } | |
| REQUIRED_REWARD_KEYS: tuple[str, ...] = tuple(DEFAULT_REWARD_WEIGHTS.keys()) | |
| PRIMARY_REWARD_KEYS: tuple[str, ...] = ( | |
| "safety_legality", | |
| "clinical_improvement", | |
| "dosing_quality", | |
| "process_integrity", | |
| ) | |