polyguard-openenv / app /common /normalization.py
TheJackBright's picture
Deploy PolyGuard OpenEnv Space
877add7 verified
"""Normalization and reward range utilities."""
from __future__ import annotations
from app.common.constants import REWARD_MAX, REWARD_MIN, REWARD_PRECISION
def clamp_reward(value: float) -> float:
"""Clamp and quantize reward to [0.001, 0.999] with 3 decimals."""
value = min(REWARD_MAX, max(REWARD_MIN, float(value)))
return round(value, REWARD_PRECISION)
def normalize_unit_interval(value: float, lower: float, upper: float) -> float:
if upper <= lower:
return 0.5
ratio = (value - lower) / (upper - lower)
return float(min(1.0, max(0.0, ratio)))
def to_reward(value: float, lower: float, upper: float) -> float:
raw = normalize_unit_interval(value, lower, upper)
scaled = REWARD_MIN + raw * (REWARD_MAX - REWARD_MIN)
return clamp_reward(scaled)