Spaces:

SouravNath
/

repomind-api

Running

File size: 6,997 Bytes

dc71cad

"""
uncertainty/temperature_scaling.py
────────────────────────────────────
Temperature scaling for DeBERTa classifier logits.

After fine-tuning, DeBERTa's raw logits are often overconfident.
Temperature scaling is the simplest, most effective calibration method
(Guo et al., 2017 — "On Calibration of Modern Neural Networks").

Method:
    calibrated_prob = softmax(logits / T)
    T is learned by minimising NLL on a held-out calibration set.

For our use case, T is fit on the SWE-bench validation split:
    - True positives: (issue, gold_file) pairs → label=1
    - True negatives: (issue, non-gold_file) pairs → label=0
    - T is scalar, so only one parameter to fit (no overfitting risk)

After calibration:
    - ECE (Expected Calibration Error) < 0.05 target
    - Reliability diagram should be close to diagonal

Integration:
    DeBERTa ranker outputs raw logits → temperature_scale() → calibrated prob
    Calibrated prob replaces raw relevance_score in RankedFile
"""
from __future__ import annotations

import json
import logging
from pathlib import Path
from typing import Optional

import numpy as np

logger = logging.getLogger(__name__)


class TemperatureScaler:
    """
    Learns a single temperature parameter T by minimising NLL on validation data.

    T > 1: softer probabilities (reduces overconfidence)
    T < 1: harder probabilities (makes model more confident)
    T = 1: uncalibrated (no change)
    """

    def __init__(self, T: float = 1.0):
        self.T = T
        self._fitted = False

    def scale(self, logits: np.ndarray) -> np.ndarray:
        """
        Apply temperature scaling and return calibrated probabilities.

        Args:
            logits: shape (n, 2) — binary classification logits
        Returns:
            probs: shape (n, 2) — calibrated probabilities
        """
        scaled = logits / self.T
        # Numerically stable softmax
        shifted = scaled - scaled.max(axis=1, keepdims=True)
        exp = np.exp(shifted)
        return exp / exp.sum(axis=1, keepdims=True)

    def scale_score(self, logit_positive: float) -> float:
        """Scale a single logit for the positive class → calibrated probability."""
        # Convert single value to 2-class logit pair
        logits = np.array([[0.0, logit_positive]])
        probs = self.scale(logits)
        return float(probs[0, 1])

    def fit(
        self,
        logits: np.ndarray,   # shape (n, 2)
        labels: np.ndarray,   # shape (n,) — 0 or 1
        n_iter: int = 100,
        lr: float = 0.01,
        tol: float = 1e-6,
    ) -> dict:
        """
        Fit temperature by minimising NLL using gradient descent.

        Returns:
            stats dict: {T_before, T_after, nll_before, nll_after, ece_before, ece_after}
        """
        T_init = self.T
        nll_before = self._nll(logits, labels, T_init)
        ece_before = self._ece(logits, labels, T_init)

        # Simple gradient descent over scalar T
        T = float(T_init)
        for i in range(n_iter):
            grad = self._nll_gradient(logits, labels, T)
            T_new = T - lr * grad
            T_new = max(T_new, 0.01)  # T must be positive
            if abs(T_new - T) < tol:
                logger.debug("Temperature scaling converged at iteration %d", i)
                break
            T = T_new

        self.T = T
        self._fitted = True

        nll_after = self._nll(logits, labels, T)
        ece_after = self._ece(logits, labels, T)

        logger.info(
            "Temperature scaling: T=%.3f→%.3f | NLL: %.4f→%.4f | ECE: %.4f→%.4f",
            T_init, T, nll_before, nll_after, ece_before, ece_after
        )
        return {
            "T_before": T_init, "T_after": T,
            "nll_before": nll_before, "nll_after": nll_after,
            "ece_before": ece_before, "ece_after": ece_after,
            "fitted": True,
        }

    def _nll(self, logits: np.ndarray, labels: np.ndarray, T: float) -> float:
        """Negative log-likelihood at temperature T."""
        probs = self._softmax(logits / T)
        eps = 1e-8
        correct_probs = probs[np.arange(len(labels)), labels.astype(int)]
        return float(-np.mean(np.log(correct_probs + eps)))

    def _nll_gradient(self, logits: np.ndarray, labels: np.ndarray, T: float) -> float:
        """Numerical gradient of NLL w.r.t. T."""
        eps = 1e-4
        return (self._nll(logits, labels, T + eps) - self._nll(logits, labels, T - eps)) / (2 * eps)

    def _ece(self, logits: np.ndarray, labels: np.ndarray, T: float, n_bins: int = 10) -> float:
        """Expected Calibration Error (ECE)."""
        probs = self._softmax(logits / T)
        max_probs = probs.max(axis=1)
        predictions = probs.argmax(axis=1)
        correct = (predictions == labels.astype(int))

        bins = np.linspace(0, 1, n_bins + 1)
        ece = 0.0
        for i in range(n_bins):
            mask = (max_probs > bins[i]) & (max_probs <= bins[i + 1])
            if mask.sum() == 0:
                continue
            acc = correct[mask].mean()
            conf = max_probs[mask].mean()
            ece += mask.mean() * abs(acc - conf)
        return float(ece)

    @staticmethod
    def _softmax(logits: np.ndarray) -> np.ndarray:
        shifted = logits - logits.max(axis=1, keepdims=True)
        exp = np.exp(shifted)
        return exp / exp.sum(axis=1, keepdims=True)

    def save(self, path: Path) -> None:
        Path(path).parent.mkdir(parents=True, exist_ok=True)
        Path(path).write_text(json.dumps({"T": self.T, "fitted": self._fitted}))
        logger.info("Temperature scaler saved: T=%.4f → %s", self.T, path)

    @classmethod
    def load(cls, path: Path) -> "TemperatureScaler":
        data = json.loads(Path(path).read_text())
        ts = cls(T=data["T"])
        ts._fitted = data.get("fitted", False)
        logger.info("Temperature scaler loaded: T=%.4f from %s", ts.T, path)
        return ts


# ── ECE visualisation helper ──────────────────────────────────────────────────

def reliability_diagram_data(
    probs: np.ndarray,        # shape (n,) — predicted positive probabilities
    labels: np.ndarray,       # shape (n,) — true binary labels
    n_bins: int = 10,
) -> list[dict]:
    """
    Compute data for a reliability diagram.

    Returns list of bins:
        [{"confidence": 0.15, "accuracy": 0.12, "count": 45}, ...]
    """
    bins = np.linspace(0, 1, n_bins + 1)
    result = []
    for i in range(n_bins):
        mask = (probs >= bins[i]) & (probs < bins[i + 1])
        if mask.sum() == 0:
            continue
        result.append({
            "confidence": float((bins[i] + bins[i + 1]) / 2),
            "accuracy": float(labels[mask].mean()),
            "count": int(mask.sum()),
        })
    return result