File size: 785 Bytes
1b435f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from collections.abc import Sequence

import numpy as np
from sentence_transformers import SentenceTransformer


EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"


def embed_failures(failures: Sequence[dict[str, object]]) -> np.ndarray:
    texts = [_failure_text(failure) for failure in failures]
    if not texts:
        return np.empty((0, 384))

    model = SentenceTransformer(EMBEDDING_MODEL)
    return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)


def _failure_text(failure: dict[str, object]) -> str:
    return (
        f"input: {failure['x']}\n"
        f"expected: {failure['y']}\n"
        f"prediction: {failure['prediction']}\n"
        f"reasoning_type: {failure['reasoning_type']}\n"
        f"model: {failure['model_id']}"
    )