"""Normalization and reward range utilities.""" from __future__ import annotations from app.common.constants import REWARD_MAX, REWARD_MIN, REWARD_PRECISION def clamp_reward(value: float) -> float: """Clamp and quantize reward to [0.001, 0.999] with 3 decimals.""" value = min(REWARD_MAX, max(REWARD_MIN, float(value))) return round(value, REWARD_PRECISION) def normalize_unit_interval(value: float, lower: float, upper: float) -> float: if upper <= lower: return 0.5 ratio = (value - lower) / (upper - lower) return float(min(1.0, max(0.0, ratio))) def to_reward(value: float, lower: float, upper: float) -> float: raw = normalize_unit_interval(value, lower, upper) scaled = REWARD_MIN + raw * (REWARD_MAX - REWARD_MIN) return clamp_reward(scaled)