"""Evaluation. Reports headline metrics, sliced metrics, and (when applicable) calibration curves. The output of this module feeds /eval-report and /write-modelcard.""" from __future__ import annotations import numpy as np import pandas as pd def headline_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict[str, float]: """Replace with metrics that match your task. MAE works for regression; swap for accuracy/F1/AUROC for classification.""" return { "mae": float(np.mean(np.abs(y_true - y_pred))), "rmse": float(np.sqrt(np.mean((y_true - y_pred) ** 2))), } def slice_metrics( df: pd.DataFrame, y_col: str, pred_col: str, slice_cols: list[str], ) -> pd.DataFrame: """Per-slice metrics (e.g., by season, by user segment). Hiring managers and reviewers both look for this — single overall number is not enough.""" rows = [] for col in slice_cols: for value, group in df.groupby(col): m = headline_metrics(group[y_col].to_numpy(), group[pred_col].to_numpy()) rows.append({"slice_col": col, "slice_value": value, "n": len(group), **m}) return pd.DataFrame(rows)