| """Normalization and reward range utilities.""" | |
| from __future__ import annotations | |
| from app.common.constants import REWARD_MAX, REWARD_MIN, REWARD_PRECISION | |
| def clamp_reward(value: float) -> float: | |
| """Clamp and quantize reward to [0.001, 0.999] with 3 decimals.""" | |
| value = min(REWARD_MAX, max(REWARD_MIN, float(value))) | |
| return round(value, REWARD_PRECISION) | |
| def normalize_unit_interval(value: float, lower: float, upper: float) -> float: | |
| if upper <= lower: | |
| return 0.5 | |
| ratio = (value - lower) / (upper - lower) | |
| return float(min(1.0, max(0.0, ratio))) | |
| def to_reward(value: float, lower: float, upper: float) -> float: | |
| raw = normalize_unit_interval(value, lower, upper) | |
| scaled = REWARD_MIN + raw * (REWARD_MAX - REWARD_MIN) | |
| return clamp_reward(scaled) | |