Spaces:
Running
Running
| """Evaluation. Reports headline metrics, sliced metrics, and (when | |
| applicable) calibration curves. The output of this module feeds | |
| /eval-report and /write-modelcard.""" | |
| from __future__ import annotations | |
| import numpy as np | |
| import pandas as pd | |
| def headline_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict[str, float]: | |
| """Replace with metrics that match your task. MAE works for | |
| regression; swap for accuracy/F1/AUROC for classification.""" | |
| return { | |
| "mae": float(np.mean(np.abs(y_true - y_pred))), | |
| "rmse": float(np.sqrt(np.mean((y_true - y_pred) ** 2))), | |
| } | |
| def slice_metrics( | |
| df: pd.DataFrame, | |
| y_col: str, | |
| pred_col: str, | |
| slice_cols: list[str], | |
| ) -> pd.DataFrame: | |
| """Per-slice metrics (e.g., by season, by user segment). Hiring | |
| managers and reviewers both look for this — single overall number is | |
| not enough.""" | |
| rows = [] | |
| for col in slice_cols: | |
| for value, group in df.groupby(col): | |
| m = headline_metrics(group[y_col].to_numpy(), group[pred_col].to_numpy()) | |
| rows.append({"slice_col": col, "slice_value": value, "n": len(group), **m}) | |
| return pd.DataFrame(rows) | |