| """ |
| fingerprint.py — Capability fingerprinting from execution traces. |
| |
| Extracts a structured profile of what an agent CAN do based on what it HAS done: |
| - Domains used (coding, research, data, etc.) |
| - Tool motifs (common tool sequences) |
| - Reasoning patterns (plan→execute, trial→error→fix) |
| - Output schemas observed |
| - Failure modes (what breaks, how often) |
| - Latency/cost profile |
| """ |
| from __future__ import annotations |
| from collections import Counter, defaultdict |
| from dataclasses import dataclass, field |
| from typing import Any |
| from purpose_agent.trace import Trace |
|
|
|
|
| @dataclass |
| class CapabilityFingerprint: |
| """Structured capability profile derived from traces.""" |
| domains: dict[str, int] = field(default_factory=dict) |
| tool_motifs: list[tuple[str, ...]] = field(default_factory=list) |
| action_patterns: dict[str, int] = field(default_factory=dict) |
| failure_modes: dict[str, int] = field(default_factory=dict) |
| avg_steps_per_task: float = 0.0 |
| avg_duration_s: float = 0.0 |
| total_traces: int = 0 |
| success_rate: float = 0.0 |
| tool_usage: dict[str, int] = field(default_factory=dict) |
|
|
| def to_dict(self) -> dict[str, Any]: |
| return { |
| "domains": self.domains, |
| "tool_motifs": [list(m) for m in self.tool_motifs[:10]], |
| "action_patterns": dict(list(self.action_patterns.items())[:20]), |
| "failure_modes": self.failure_modes, |
| "avg_steps": round(self.avg_steps_per_task, 1), |
| "avg_duration_s": round(self.avg_duration_s, 1), |
| "total_traces": self.total_traces, |
| "success_rate": round(self.success_rate, 3), |
| "tool_usage": self.tool_usage, |
| } |
|
|
| def summary(self) -> str: |
| top_domains = sorted(self.domains.items(), key=lambda x: -x[1])[:5] |
| top_tools = sorted(self.tool_usage.items(), key=lambda x: -x[1])[:5] |
| return ( |
| f"Fingerprint: {self.total_traces} traces, {self.success_rate:.0%} success\n" |
| f" Domains: {', '.join(f'{d}({c})' for d,c in top_domains)}\n" |
| f" Tools: {', '.join(f'{t}({c})' for t,c in top_tools)}\n" |
| f" Avg steps: {self.avg_steps_per_task:.1f}, Avg time: {self.avg_duration_s:.1f}s\n" |
| f" Failures: {dict(list(self.failure_modes.items())[:3])}" |
| ) |
|
|
|
|
| |
| _DOMAIN_KEYWORDS = { |
| "coding": {"code", "function", "python", "debug", "script", "class", "def", "import"}, |
| "research": {"research", "paper", "find", "search", "study", "analyze"}, |
| "data": {"data", "csv", "database", "sql", "statistics", "chart"}, |
| "writing": {"write", "blog", "article", "essay", "content", "draft"}, |
| "operations": {"deploy", "monitor", "server", "docker", "pipeline"}, |
| "security": {"security", "vulnerability", "cve", "audit", "threat"}, |
| } |
|
|
|
|
| def fingerprint_traces(traces: list[Trace]) -> CapabilityFingerprint: |
| """ |
| Analyze a collection of traces and produce a capability fingerprint. |
| |
| Usage: |
| traces = Trace.load_many("./traces/") |
| fp = fingerprint_traces(traces) |
| print(fp.summary()) |
| """ |
| fp = CapabilityFingerprint(total_traces=len(traces)) |
| if not traces: |
| return fp |
|
|
| total_steps = 0 |
| total_duration = 0.0 |
| successes = 0 |
| tool_sequences: list[list[str]] = [] |
| action_counter: Counter = Counter() |
| failure_counter: Counter = Counter() |
| tool_counter: Counter = Counter() |
| domain_counter: Counter = Counter() |
|
|
| for trace in traces: |
| |
| total_duration += trace.duration_s |
| total_steps += trace.step_count |
|
|
| |
| purpose_words = set(trace.purpose.lower().split()) |
| for domain, keywords in _DOMAIN_KEYWORDS.items(): |
| if purpose_words & keywords: |
| domain_counter[domain] += 1 |
|
|
| |
| current_tool_seq = [] |
| for event in trace.events: |
| kind = event.kind |
|
|
| |
| if kind == "tool_started" or kind == "tool.started": |
| tool_name = event.data.get("name", event.data.get("tool", "unknown")) |
| tool_counter[tool_name] += 1 |
| current_tool_seq.append(tool_name) |
|
|
| |
| if kind == "action" or kind == "agent.progress": |
| action_name = event.data.get("name", event.data.get("action", "")) |
| if action_name: |
| action_counter[action_name] += 1 |
|
|
| |
| if "error" in kind: |
| error_type = event.data.get("error_type", event.data.get("error", "unknown"))[:50] |
| failure_counter[error_type] += 1 |
|
|
| |
| if kind in ("run.finished", "run_finished"): |
| if event.data.get("success") or event.data.get("phi", 0) >= 7: |
| successes += 1 |
|
|
| if current_tool_seq: |
| tool_sequences.append(current_tool_seq) |
|
|
| |
| fp.domains = dict(domain_counter.most_common(10)) |
| fp.tool_usage = dict(tool_counter.most_common(20)) |
| fp.action_patterns = dict(action_counter.most_common(20)) |
| fp.failure_modes = dict(failure_counter.most_common(10)) |
| fp.avg_steps_per_task = total_steps / len(traces) if traces else 0 |
| fp.avg_duration_s = total_duration / len(traces) if traces else 0 |
| fp.success_rate = successes / len(traces) if traces else 0 |
|
|
| |
| motif_counter: Counter = Counter() |
| for seq in tool_sequences: |
| for i in range(len(seq) - 1): |
| motif_counter[tuple(seq[i:i+2])] += 1 |
| for i in range(len(seq) - 2): |
| motif_counter[tuple(seq[i:i+3])] += 1 |
| fp.tool_motifs = [m for m, _ in motif_counter.most_common(10)] |
|
|
| return fp |
|
|