"""Reward shaping logic for RL-ready code analysis scores."""

from __future__ import annotations

from schemas.response import ScoreBreakdown


class RewardService:
    """Compute reward scores from model, domain, lint, and complexity signals."""

    def compute(self, *, ml_score: float, domain_score: float, lint_score: float, complexity_penalty: float) -> ScoreBreakdown:
        """Apply the weighted reward formula and clamp the result."""

        reward = max(
            0.0,
            min(
                1.0,
                (0.4 * ml_score) + (0.2 * domain_score) + (0.2 * lint_score) - (0.2 * complexity_penalty),
            ),
        )
        return ScoreBreakdown(
            ml_score=round(ml_score, 4),
            domain_score=round(domain_score, 4),
            lint_score=round(lint_score, 4),
            complexity_penalty=round(complexity_penalty, 4),
            reward=round(reward, 4),
        )