| """ |
| TF-IDF + Truncated SVD embedding for failure records. |
| |
| No network downloads; no GPU required. Runs entirely with scikit-learn. |
| |
| Feature construction combines: |
| - TF-IDF on a failure-context string (input text, expected label, prediction, reasoning type) |
| - TruncatedSVD to produce a dense, low-dimensional representation |
| |
| Adapted from failure-induced-benchmarks/src/failure_geometry/embedding.py |
| (engineered sparse features) but extended to dense SVD projections for |
| KMeans and scatter visualisation. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import numpy as np |
| from sklearn.decomposition import TruncatedSVD |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.preprocessing import normalize |
|
|
|
|
| EMBED_DIMS = 32 |
|
|
|
|
| def _failure_text(failure: dict) -> str: |
| return ( |
| f"input: {failure['x']} " |
| f"expected: {failure['y']} " |
| f"prediction: {failure['prediction']} " |
| f"type: {failure['reasoning_type']} " |
| f"model: {failure['model_id']}" |
| ) |
|
|
|
|
| def embed_failures( |
| failures: list[dict], |
| n_components: int = EMBED_DIMS, |
| ) -> np.ndarray: |
| if not failures: |
| return np.empty((0, n_components)) |
|
|
| texts = [_failure_text(f) for f in failures] |
| n = len(texts) |
|
|
| vectorizer = TfidfVectorizer( |
| max_features=800, |
| ngram_range=(1, 2), |
| sublinear_tf=True, |
| ) |
| tfidf = vectorizer.fit_transform(texts) |
|
|
| effective_dims = min(n_components, tfidf.shape[1] - 1, n - 1) |
| if effective_dims < 2: |
| arr = tfidf.toarray() |
| return normalize(arr[:, : max(effective_dims, 1)]) |
|
|
| svd = TruncatedSVD(n_components=effective_dims, random_state=42) |
| dense = svd.fit_transform(tfidf) |
| return normalize(dense) |
|
|
|
|
| def embed_for_scatter(failures: list[dict]) -> np.ndarray: |
| """Return exactly 2 components for PCA-style scatter plots.""" |
| if len(failures) < 3: |
| return np.zeros((len(failures), 2)) |
| return embed_failures(failures, n_components=2) |
|
|