| """Shared constants for POLYGUARD-RL.""" |
|
|
| from __future__ import annotations |
|
|
| REWARD_MIN: float = 0.001 |
| REWARD_MAX: float = 0.999 |
| REWARD_PRECISION: int = 3 |
|
|
| DEFAULT_SEED: int = 42 |
| DEFAULT_MAX_STEPS: int = 10 |
| MAX_REPEATED_ACTIONS: int = 3 |
| MAX_KEEP_REGIMEN_RATIO: float = 0.6 |
| MAX_REVIEW_RATIO: float = 0.5 |
| DEFAULT_STEP_TIMEOUT_SECONDS: float = 2.5 |
| DEFAULT_EPISODE_TIMEOUT_SECONDS: float = 45.0 |
|
|
| DEFAULT_REWARD_WEIGHTS: dict[str, float] = { |
| "format_compliance_score": 0.08, |
| "candidate_alignment_score": 0.08, |
| "legality_score": 0.12, |
| "safety_delta_score": 0.15, |
| "burden_improvement_score": 0.08, |
| "disease_stability_score": 0.10, |
| "dosing_quality_score": 0.08, |
| "abstention_quality_score": 0.06, |
| "efficiency_score": 0.06, |
| "process_fidelity_score": 0.06, |
| "explanation_grounding_score": 0.03, |
| "anti_cheat_score": 0.06, |
| "uncertainty_calibration_score": 0.04, |
| } |
|
|
| REQUIRED_REWARD_KEYS: tuple[str, ...] = tuple(DEFAULT_REWARD_WEIGHTS.keys()) |
|
|
| PRIMARY_REWARD_KEYS: tuple[str, ...] = ( |
| "safety_legality", |
| "clinical_improvement", |
| "dosing_quality", |
| "process_integrity", |
| ) |
|
|