Rohan03 commited on
Commit
130c63c
·
verified ·
1 Parent(s): dbeec64

Sprint 9A: fingerprint.py — capability fingerprint from execution traces

Browse files
purpose_agent/optimization/fingerprint.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ fingerprint.py — Capability fingerprinting from execution traces.
3
+
4
+ Extracts a structured profile of what an agent CAN do based on what it HAS done:
5
+ - Domains used (coding, research, data, etc.)
6
+ - Tool motifs (common tool sequences)
7
+ - Reasoning patterns (plan→execute, trial→error→fix)
8
+ - Output schemas observed
9
+ - Failure modes (what breaks, how often)
10
+ - Latency/cost profile
11
+ """
12
+ from __future__ import annotations
13
+ from collections import Counter, defaultdict
14
+ from dataclasses import dataclass, field
15
+ from typing import Any
16
+ from purpose_agent.trace import Trace
17
+
18
+
19
+ @dataclass
20
+ class CapabilityFingerprint:
21
+ """Structured capability profile derived from traces."""
22
+ domains: dict[str, int] = field(default_factory=dict) # domain → count
23
+ tool_motifs: list[tuple[str, ...]] = field(default_factory=list) # common tool sequences
24
+ action_patterns: dict[str, int] = field(default_factory=dict) # pattern → count
25
+ failure_modes: dict[str, int] = field(default_factory=dict) # error_type → count
26
+ avg_steps_per_task: float = 0.0
27
+ avg_duration_s: float = 0.0
28
+ total_traces: int = 0
29
+ success_rate: float = 0.0
30
+ tool_usage: dict[str, int] = field(default_factory=dict) # tool → usage count
31
+
32
+ def to_dict(self) -> dict[str, Any]:
33
+ return {
34
+ "domains": self.domains,
35
+ "tool_motifs": [list(m) for m in self.tool_motifs[:10]],
36
+ "action_patterns": dict(list(self.action_patterns.items())[:20]),
37
+ "failure_modes": self.failure_modes,
38
+ "avg_steps": round(self.avg_steps_per_task, 1),
39
+ "avg_duration_s": round(self.avg_duration_s, 1),
40
+ "total_traces": self.total_traces,
41
+ "success_rate": round(self.success_rate, 3),
42
+ "tool_usage": self.tool_usage,
43
+ }
44
+
45
+ def summary(self) -> str:
46
+ top_domains = sorted(self.domains.items(), key=lambda x: -x[1])[:5]
47
+ top_tools = sorted(self.tool_usage.items(), key=lambda x: -x[1])[:5]
48
+ return (
49
+ f"Fingerprint: {self.total_traces} traces, {self.success_rate:.0%} success\n"
50
+ f" Domains: {', '.join(f'{d}({c})' for d,c in top_domains)}\n"
51
+ f" Tools: {', '.join(f'{t}({c})' for t,c in top_tools)}\n"
52
+ f" Avg steps: {self.avg_steps_per_task:.1f}, Avg time: {self.avg_duration_s:.1f}s\n"
53
+ f" Failures: {dict(list(self.failure_modes.items())[:3])}"
54
+ )
55
+
56
+
57
+ # Domain classification keywords
58
+ _DOMAIN_KEYWORDS = {
59
+ "coding": {"code", "function", "python", "debug", "script", "class", "def", "import"},
60
+ "research": {"research", "paper", "find", "search", "study", "analyze"},
61
+ "data": {"data", "csv", "database", "sql", "statistics", "chart"},
62
+ "writing": {"write", "blog", "article", "essay", "content", "draft"},
63
+ "operations": {"deploy", "monitor", "server", "docker", "pipeline"},
64
+ "security": {"security", "vulnerability", "cve", "audit", "threat"},
65
+ }
66
+
67
+
68
+ def fingerprint_traces(traces: list[Trace]) -> CapabilityFingerprint:
69
+ """
70
+ Analyze a collection of traces and produce a capability fingerprint.
71
+
72
+ Usage:
73
+ traces = Trace.load_many("./traces/")
74
+ fp = fingerprint_traces(traces)
75
+ print(fp.summary())
76
+ """
77
+ fp = CapabilityFingerprint(total_traces=len(traces))
78
+ if not traces:
79
+ return fp
80
+
81
+ total_steps = 0
82
+ total_duration = 0.0
83
+ successes = 0
84
+ tool_sequences: list[list[str]] = []
85
+ action_counter: Counter = Counter()
86
+ failure_counter: Counter = Counter()
87
+ tool_counter: Counter = Counter()
88
+ domain_counter: Counter = Counter()
89
+
90
+ for trace in traces:
91
+ # Duration
92
+ total_duration += trace.duration_s
93
+ total_steps += trace.step_count
94
+
95
+ # Domain classification from purpose
96
+ purpose_words = set(trace.purpose.lower().split())
97
+ for domain, keywords in _DOMAIN_KEYWORDS.items():
98
+ if purpose_words & keywords:
99
+ domain_counter[domain] += 1
100
+
101
+ # Analyze events
102
+ current_tool_seq = []
103
+ for event in trace.events:
104
+ kind = event.kind
105
+
106
+ # Tool usage
107
+ if kind == "tool_started" or kind == "tool.started":
108
+ tool_name = event.data.get("name", event.data.get("tool", "unknown"))
109
+ tool_counter[tool_name] += 1
110
+ current_tool_seq.append(tool_name)
111
+
112
+ # Actions
113
+ if kind == "action" or kind == "agent.progress":
114
+ action_name = event.data.get("name", event.data.get("action", ""))
115
+ if action_name:
116
+ action_counter[action_name] += 1
117
+
118
+ # Failures
119
+ if "error" in kind:
120
+ error_type = event.data.get("error_type", event.data.get("error", "unknown"))[:50]
121
+ failure_counter[error_type] += 1
122
+
123
+ # Success detection
124
+ if kind in ("run.finished", "run_finished"):
125
+ if event.data.get("success") or event.data.get("phi", 0) >= 7:
126
+ successes += 1
127
+
128
+ if current_tool_seq:
129
+ tool_sequences.append(current_tool_seq)
130
+
131
+ # Compile fingerprint
132
+ fp.domains = dict(domain_counter.most_common(10))
133
+ fp.tool_usage = dict(tool_counter.most_common(20))
134
+ fp.action_patterns = dict(action_counter.most_common(20))
135
+ fp.failure_modes = dict(failure_counter.most_common(10))
136
+ fp.avg_steps_per_task = total_steps / len(traces) if traces else 0
137
+ fp.avg_duration_s = total_duration / len(traces) if traces else 0
138
+ fp.success_rate = successes / len(traces) if traces else 0
139
+
140
+ # Extract tool motifs (common subsequences)
141
+ motif_counter: Counter = Counter()
142
+ for seq in tool_sequences:
143
+ for i in range(len(seq) - 1):
144
+ motif_counter[tuple(seq[i:i+2])] += 1
145
+ for i in range(len(seq) - 2):
146
+ motif_counter[tuple(seq[i:i+3])] += 1
147
+ fp.tool_motifs = [m for m, _ in motif_counter.most_common(10)]
148
+
149
+ return fp