"""Evaluation. Reports headline metrics, sliced metrics, and (when
applicable) calibration curves. The output of this module feeds
/eval-report and /write-modelcard."""

from __future__ import annotations

import numpy as np
import pandas as pd


def headline_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict[str, float]:
    """Replace with metrics that match your task. MAE works for
    regression; swap for accuracy/F1/AUROC for classification."""
    return {
        "mae": float(np.mean(np.abs(y_true - y_pred))),
        "rmse": float(np.sqrt(np.mean((y_true - y_pred) ** 2))),
    }


def slice_metrics(
    df: pd.DataFrame,
    y_col: str,
    pred_col: str,
    slice_cols: list[str],
) -> pd.DataFrame:
    """Per-slice metrics (e.g., by season, by user segment). Hiring
    managers and reviewers both look for this — single overall number is
    not enough."""
    rows = []
    for col in slice_cols:
        for value, group in df.groupby(col):
            m = headline_metrics(group[y_col].to_numpy(), group[pred_col].to_numpy())
            rows.append({"slice_col": col, "slice_value": value, "n": len(group), **m})
    return pd.DataFrame(rows)