Rohan03 commited on
Commit
be96ac2
·
verified ·
1 Parent(s): 44f8122

V2 merge: purpose_agent/retroformer.py

Browse files
Files changed (1) hide show
  1. purpose_agent/retroformer.py +248 -0
purpose_agent/retroformer.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ retroformer.py — Structured reflection with gradient-free policy improvement.
3
+
4
+ From Retroformer (arxiv:2308.02151):
5
+ A retrospective model Γ that takes the full trajectory (states, actions,
6
+ rewards) and generates an improved prompt for the next attempt. The key
7
+ insight: the LLM agent is frozen, but the retrospective model learns
8
+ to write better prompts for it.
9
+
10
+ Adaptation for Purpose Agent (no weight updates):
11
+ Instead of training the retrospective model with policy gradients,
12
+ we use the same LLM to reflect on trajectories and extract structured
13
+ lessons. These lessons become skill_card and failure_pattern memories
14
+ that improve future prompts via the PromptCompiler.
15
+
16
+ The reflection is structured (not free-form):
17
+ 1. What went well? (→ skill_card memories)
18
+ 2. What went wrong? (→ failure_pattern memories)
19
+ 3. What should change next time? (→ user_preference or tool_policy memories)
20
+ 4. What specific state patterns should I watch for? (→ episodic_case memories)
21
+
22
+ This replaces V1's raw heuristic distillation with a more rigorous,
23
+ typed memory extraction process.
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import json
28
+ import logging
29
+ from typing import Any
30
+
31
+ from purpose_agent.llm_backend import LLMBackend, ChatMessage
32
+ from purpose_agent.trace import Trace
33
+ from purpose_agent.memory import MemoryCard, MemoryKind, MemoryStatus
34
+ from purpose_agent.v2_types import MemoryScope
35
+ from purpose_agent.memory_ci import MemoryCI
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+ REFLECTION_PROMPT = """\
40
+ You are a RETROSPECTIVE ANALYST. Given a complete task trajectory, extract
41
+ structured lessons that will help an agent perform better next time.
42
+
43
+ Analyze the trajectory and produce EXACTLY these categories of lessons:
44
+
45
+ 1. SKILLS (what worked well — reusable procedures):
46
+ - Pattern: when does this apply?
47
+ - Strategy: what to do?
48
+ - Steps: concrete action sequence?
49
+
50
+ 2. FAILURES (what went wrong — patterns to avoid):
51
+ - What happened?
52
+ - Why was it wrong?
53
+ - What to do instead?
54
+
55
+ 3. POLICIES (new rules or constraints discovered):
56
+ - What tool/action needs a new constraint?
57
+ - What's the constraint?
58
+
59
+ 4. OBSERVATIONS (specific state patterns worth remembering):
60
+ - What state pattern was significant?
61
+ - What did it mean?
62
+
63
+ Be concrete. Use {variable} placeholders for generalizable parts.
64
+ Respond with JSON:
65
+ {
66
+ "skills": [{"pattern": "...", "strategy": "...", "steps": ["..."]}],
67
+ "failures": [{"pattern": "...", "what_happened": "...", "instead": "..."}],
68
+ "policies": [{"tool": "...", "constraint": "..."}],
69
+ "observations": [{"state_pattern": "...", "meaning": "..."}]
70
+ }
71
+ """
72
+
73
+ REFLECTION_SCHEMA = {
74
+ "type": "object",
75
+ "properties": {
76
+ "skills": {"type": "array", "items": {"type": "object"}},
77
+ "failures": {"type": "array", "items": {"type": "object"}},
78
+ "policies": {"type": "array", "items": {"type": "object"}},
79
+ "observations": {"type": "array", "items": {"type": "object"}},
80
+ },
81
+ "required": ["skills", "failures"],
82
+ }
83
+
84
+
85
+ class Retroformer:
86
+ """
87
+ Structured retrospective analysis of trajectories.
88
+
89
+ Replaces V1's raw heuristic distillation with typed memory extraction.
90
+ Every extracted lesson goes through the Memory CI pipeline (immune scan,
91
+ quarantine, replay test, promote/reject).
92
+
93
+ Usage:
94
+ retro = Retroformer(llm=model, memory_ci=ci)
95
+
96
+ # After each task:
97
+ memories = retro.reflect(trace)
98
+ # → Extracts skills, failures, policies, observations
99
+ # → Submits each to Memory CI for quarantine + scanning
100
+ # → Only safe, useful memories get promoted
101
+
102
+ # Over time: the agent accumulates vetted knowledge
103
+ """
104
+
105
+ def __init__(
106
+ self,
107
+ llm: LLMBackend,
108
+ memory_ci: MemoryCI,
109
+ agent_role: str = "",
110
+ ):
111
+ self.llm = llm
112
+ self.memory_ci = memory_ci
113
+ self.agent_role = agent_role
114
+ self._reflections: list[dict] = []
115
+
116
+ def reflect(self, trace: Trace) -> list[MemoryCard]:
117
+ """
118
+ Analyze a trace and extract structured lessons as typed memories.
119
+
120
+ Returns list of submitted MemoryCards (status will be CANDIDATE or QUARANTINED).
121
+ """
122
+ # Build trajectory summary for the LLM
123
+ summary = self._build_trajectory_summary(trace)
124
+
125
+ messages = [
126
+ ChatMessage(role="system", content=REFLECTION_PROMPT),
127
+ ChatMessage(role="user", content=summary),
128
+ ]
129
+
130
+ try:
131
+ result = self.llm.generate_structured(messages, schema=REFLECTION_SCHEMA)
132
+ except Exception as e:
133
+ logger.warning(f"Retroformer: Reflection failed ({e}), attempting text parse")
134
+ raw = self.llm.generate(messages, temperature=0.5)
135
+ try:
136
+ result = json.loads(raw)
137
+ except Exception:
138
+ result = {"skills": [], "failures": []}
139
+
140
+ cards = []
141
+
142
+ # Extract skills → skill_card memories
143
+ for skill in result.get("skills", []):
144
+ card = MemoryCard(
145
+ kind=MemoryKind.SKILL_CARD,
146
+ pattern=skill.get("pattern", ""),
147
+ strategy=skill.get("strategy", ""),
148
+ steps=skill.get("steps", []),
149
+ source_trace_id=trace.trace_id,
150
+ created_by="retroformer",
151
+ scope=MemoryScope(
152
+ agent_roles=[self.agent_role] if self.agent_role else [],
153
+ ),
154
+ )
155
+ self.memory_ci.submit(card)
156
+ cards.append(card)
157
+
158
+ # Extract failures → failure_pattern memories
159
+ for failure in result.get("failures", []):
160
+ card = MemoryCard(
161
+ kind=MemoryKind.FAILURE_PATTERN,
162
+ pattern=failure.get("pattern", failure.get("what_happened", "")),
163
+ strategy=f"AVOID: {failure.get('what_happened', '')}. INSTEAD: {failure.get('instead', '')}",
164
+ source_trace_id=trace.trace_id,
165
+ created_by="retroformer",
166
+ )
167
+ self.memory_ci.submit(card)
168
+ cards.append(card)
169
+
170
+ # Extract policies → tool_policy memories
171
+ for policy in result.get("policies", []):
172
+ tool_name = policy.get("tool", "")
173
+ card = MemoryCard(
174
+ kind=MemoryKind.TOOL_POLICY,
175
+ content=policy.get("constraint", ""),
176
+ strategy=policy.get("constraint", ""),
177
+ source_trace_id=trace.trace_id,
178
+ created_by="retroformer",
179
+ scope=MemoryScope(tool_names=[tool_name] if tool_name else []),
180
+ )
181
+ self.memory_ci.submit(card)
182
+ cards.append(card)
183
+
184
+ # Extract observations → episodic_case memories
185
+ for obs in result.get("observations", []):
186
+ card = MemoryCard(
187
+ kind=MemoryKind.EPISODIC_CASE,
188
+ pattern=obs.get("state_pattern", ""),
189
+ content=obs.get("meaning", ""),
190
+ source_trace_id=trace.trace_id,
191
+ created_by="retroformer",
192
+ )
193
+ self.memory_ci.submit(card)
194
+ cards.append(card)
195
+
196
+ self._reflections.append({
197
+ "trace_id": trace.trace_id,
198
+ "skills": len(result.get("skills", [])),
199
+ "failures": len(result.get("failures", [])),
200
+ "policies": len(result.get("policies", [])),
201
+ "observations": len(result.get("observations", [])),
202
+ "total_cards": len(cards),
203
+ })
204
+
205
+ logger.info(
206
+ f"Retroformer: Reflected on trace {trace.trace_id} → "
207
+ f"{len(cards)} memory candidates "
208
+ f"(skills={len(result.get('skills', []))}, "
209
+ f"failures={len(result.get('failures', []))})"
210
+ )
211
+ return cards
212
+
213
+ def _build_trajectory_summary(self, trace: Trace) -> str:
214
+ """Build a concise trajectory summary for the reflection prompt."""
215
+ lines = [f"## Task: {trace.purpose}", f"Run mode: {trace.run_mode}", ""]
216
+
217
+ step_events = sorted(
218
+ [e for e in trace.events if e.kind in ("action", "score")],
219
+ key=lambda e: (e.step, e.kind),
220
+ )
221
+
222
+ current_step = 0
223
+ for event in step_events:
224
+ if event.step != current_step:
225
+ current_step = event.step
226
+ lines.append(f"\n### Step {current_step}")
227
+
228
+ if event.kind == "action":
229
+ lines.append(f" Action: {event.data.get('name', '?')}({json.dumps(event.data.get('params', {}), default=str)})")
230
+ if event.data.get("thought"):
231
+ lines.append(f" Thought: {event.data['thought'][:150]}")
232
+
233
+ elif event.kind == "score":
234
+ phi_b = event.data.get("phi_before", 0)
235
+ phi_a = event.data.get("phi_after", 0)
236
+ lines.append(f" Score: Φ {phi_b:.1f} → {phi_a:.1f} (Δ={phi_a - phi_b:+.2f})")
237
+ if event.data.get("evidence"):
238
+ lines.append(f" Evidence: {event.data['evidence'][:150]}")
239
+
240
+ lines.append(f"\n## Summary")
241
+ lines.append(f"Total steps: {trace.step_count}")
242
+ lines.append(f"Duration: {trace.duration_s:.1f}s")
243
+
244
+ return "\n".join(lines)
245
+
246
+ @property
247
+ def reflections(self) -> list[dict]:
248
+ return self._reflections