File size: 801 Bytes
21c7db9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | """Normalization and reward range utilities."""
from __future__ import annotations
from app.common.constants import REWARD_MAX, REWARD_MIN, REWARD_PRECISION
def clamp_reward(value: float) -> float:
"""Clamp and quantize reward to [0.001, 0.999] with 3 decimals."""
value = min(REWARD_MAX, max(REWARD_MIN, float(value)))
return round(value, REWARD_PRECISION)
def normalize_unit_interval(value: float, lower: float, upper: float) -> float:
if upper <= lower:
return 0.5
ratio = (value - lower) / (upper - lower)
return float(min(1.0, max(0.0, ratio)))
def to_reward(value: float, lower: float, upper: float) -> float:
raw = normalize_unit_interval(value, lower, upper)
scaled = REWARD_MIN + raw * (REWARD_MAX - REWARD_MIN)
return clamp_reward(scaled)
|