File size: 1,597 Bytes
6c3043e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from __future__ import annotations

from dataclasses import asdict, dataclass, field


@dataclass
class Probe:
    id: str
    category: str
    fragility_area: str
    severity: str
    task: str
    input: str
    expected_behavior: str
    failure_modes: list[str]
    observable_signal: str
    safe_response_pattern: list[str] = field(default_factory=list)

    @classmethod
    def from_dict(cls, row: dict) -> Probe:
        return cls(
            id=row["id"],
            category=row["category"],
            fragility_area=row.get("fragility_area", row["category"]),
            severity=row.get("severity", "medium"),
            task=row["task"],
            input=row["input"],
            expected_behavior=row["expected_behavior"],
            failure_modes=list(row.get("failure_modes", [])),
            observable_signal=row.get("observable_signal", ""),
            safe_response_pattern=list(row.get("safe_response_pattern", [])),
        )


@dataclass
class CaseScore:
    """Per-probe evaluation with extended observability metrics."""

    case_id: str
    category: str
    severity: str
    passed: bool
    risk_score: float
    severity_weight: float
    weighted_risk: float
    detected_failure_modes: list[str]
    explanation: str
    safe_signal_hits: int
    unsafe_signal_hits: int
    boundary_or_refusal_signal: bool
    matched_safe_patterns: list[str] = field(default_factory=list)
    matched_unsafe_patterns: list[str] = field(default_factory=list)
    task: str = ""
    probe_input: str = ""

    def to_dict(self) -> dict:
        return asdict(self)