Spaces:

obversarystudios
/

agent-threat-map

Running

Threat-map metrics + observable geometry (embed/cluster/MI)

6c3043e verified 1 day ago

1.6 kB

	from __future__ import annotations

	from dataclasses import asdict, dataclass, field


	@dataclass
	class Probe:
	id: str
	category: str
	fragility_area: str
	severity: str
	task: str
	input: str
	expected_behavior: str
	failure_modes: list[str]
	observable_signal: str
	safe_response_pattern: list[str] = field(default_factory=list)

	@classmethod
	def from_dict(cls, row: dict) -> Probe:
	return cls(
	id=row["id"],
	category=row["category"],
	fragility_area=row.get("fragility_area", row["category"]),
	severity=row.get("severity", "medium"),
	task=row["task"],
	input=row["input"],
	expected_behavior=row["expected_behavior"],
	failure_modes=list(row.get("failure_modes", [])),
	observable_signal=row.get("observable_signal", ""),
	safe_response_pattern=list(row.get("safe_response_pattern", [])),
	)


	@dataclass
	class CaseScore:
	"""Per-probe evaluation with extended observability metrics."""

	case_id: str
	category: str
	severity: str
	passed: bool
	risk_score: float
	severity_weight: float
	weighted_risk: float
	detected_failure_modes: list[str]
	explanation: str
	safe_signal_hits: int
	unsafe_signal_hits: int
	boundary_or_refusal_signal: bool
	matched_safe_patterns: list[str] = field(default_factory=list)
	matched_unsafe_patterns: list[str] = field(default_factory=list)
	task: str = ""
	probe_input: str = ""

	def to_dict(self) -> dict:
	return asdict(self)