| from collections.abc import Sequence | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" | |
| def embed_failures(failures: Sequence[dict[str, object]]) -> np.ndarray: | |
| texts = [_failure_text(failure) for failure in failures] | |
| if not texts: | |
| return np.empty((0, 384)) | |
| model = SentenceTransformer(EMBEDDING_MODEL) | |
| return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True) | |
| def _failure_text(failure: dict[str, object]) -> str: | |
| return ( | |
| f"input: {failure['x']}\n" | |
| f"expected: {failure['y']}\n" | |
| f"prediction: {failure['prediction']}\n" | |
| f"reasoning_type: {failure['reasoning_type']}\n" | |
| f"model: {failure['model_id']}" | |
| ) | |