Spaces:
Running
Running
File size: 1,175 Bytes
93ed35a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | """Evaluation. Reports headline metrics, sliced metrics, and (when
applicable) calibration curves. The output of this module feeds
/eval-report and /write-modelcard."""
from __future__ import annotations
import numpy as np
import pandas as pd
def headline_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict[str, float]:
"""Replace with metrics that match your task. MAE works for
regression; swap for accuracy/F1/AUROC for classification."""
return {
"mae": float(np.mean(np.abs(y_true - y_pred))),
"rmse": float(np.sqrt(np.mean((y_true - y_pred) ** 2))),
}
def slice_metrics(
df: pd.DataFrame,
y_col: str,
pred_col: str,
slice_cols: list[str],
) -> pd.DataFrame:
"""Per-slice metrics (e.g., by season, by user segment). Hiring
managers and reviewers both look for this — single overall number is
not enough."""
rows = []
for col in slice_cols:
for value, group in df.groupby(col):
m = headline_metrics(group[y_col].to_numpy(), group[pred_col].to_numpy())
rows.append({"slice_col": col, "slice_value": value, "n": len(group), **m})
return pd.DataFrame(rows)
|