File size: 785 Bytes
1b435f0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | from collections.abc import Sequence
import numpy as np
from sentence_transformers import SentenceTransformer
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
def embed_failures(failures: Sequence[dict[str, object]]) -> np.ndarray:
texts = [_failure_text(failure) for failure in failures]
if not texts:
return np.empty((0, 384))
model = SentenceTransformer(EMBEDDING_MODEL)
return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
def _failure_text(failure: dict[str, object]) -> str:
return (
f"input: {failure['x']}\n"
f"expected: {failure['y']}\n"
f"prediction: {failure['prediction']}\n"
f"reasoning_type: {failure['reasoning_type']}\n"
f"model: {failure['model_id']}"
)
|