| """ |
| models/base.py |
| ============== |
| Abstract base class for all temporal fraud models. |
| |
| All models MUST: |
| - Accept a raw DataFrame event stream (sorted by timestamp) |
| - Maintain internal memory (or not, for static models) |
| - Return node-level fraud probabilities for a specified set of eval_nodes |
| - Support reset_memory() for temporal ablation experiments |
| """ |
|
|
| from __future__ import annotations |
|
|
| from abc import ABC, abstractmethod |
| from typing import List |
|
|
| import numpy as np |
| import pandas as pd |
|
|
|
|
| class TemporalModel(ABC): |
| """ |
| Unified interface for all temporal and static fraud detection models. |
| |
| Data contract |
| ------------- |
| df_train / df_eval must contain at minimum: |
| sender_id int β source node |
| receiver_id int β destination node |
| timestamp float β unix seconds, sorted ascending |
| is_fraud int β edge-level binary label (0/1) |
| dynamic_fraud_state float β hidden EMA state (available for mechanistic analysis but |
| MUST NOT be used as a feature) |
| |
| All models receive the complete DataFrame so they can build any internal |
| features they need. Models are responsible for respecting the data leakage |
| constraint (no dynamic_fraud_state in features). |
| """ |
|
|
| |
| |
| |
|
|
| @property |
| @abstractmethod |
| def name(self) -> str: |
| """Human-readable model identifier used in CSV/plot outputs.""" |
|
|
| @abstractmethod |
| def fit(self, df_train: pd.DataFrame, num_epochs: int = 3) -> None: |
| """ |
| Train on chronologically ordered event stream. |
| |
| Parameters |
| ---------- |
| df_train : pd.DataFrame |
| All events available for training (sorted by timestamp). |
| num_epochs : int |
| Number of passes over the training data. |
| """ |
|
|
| @abstractmethod |
| def predict(self, df_eval: pd.DataFrame, eval_nodes: List[int]) -> np.ndarray: |
| """ |
| Return fraud probability scores for eval_nodes. |
| |
| The model may perform a warm-up memory pass over df_eval events |
| (reading timestamps/IDs only β NOT fraud labels) before scoring. |
| |
| Parameters |
| ---------- |
| df_eval : pd.DataFrame |
| Events in the evaluation window. |
| eval_nodes : List[int] |
| Sender IDs of nodes to score, in order. |
| |
| Returns |
| ------- |
| probs : np.ndarray, shape (len(eval_nodes),), dtype float32 |
| Fraud probability in [0, 1] for each node. |
| """ |
|
|
| @abstractmethod |
| def reset_memory(self) -> None: |
| """ |
| Zero out all internal memory / hidden states. |
| |
| Used in the temporal ablation experiment to measure how much |
| the model relies on accumulated temporal history vs. static structure. |
| For static models (XGBoost, StaticGNN) this is a no-op. |
| """ |
|
|
| |
| |
| |
|
|
| @property |
| def is_temporal(self) -> bool: |
| """True for models that maintain temporal memory across events.""" |
| return True |
|
|
| |
| |
| |
|
|
| @staticmethod |
| def _safe_auc(y_true: np.ndarray, y_score: np.ndarray) -> float: |
| """ROC-AUC that returns 0.5 when only one class is present.""" |
| from sklearn.metrics import roc_auc_score |
| if len(np.unique(y_true)) < 2: |
| return 0.5 |
| return float(roc_auc_score(y_true, y_score)) |
|
|