Rohan03
/

purpose-agent

+"""
+retroformer.py — Structured reflection with gradient-free policy improvement.
+From Retroformer (arxiv:2308.02151):
+  A retrospective model Γ that takes the full trajectory (states, actions,
+  rewards) and generates an improved prompt for the next attempt. The key
+  insight: the LLM agent is frozen, but the retrospective model learns
+  to write better prompts for it.
+Adaptation for Purpose Agent (no weight updates):
+  Instead of training the retrospective model with policy gradients,
+  we use the same LLM to reflect on trajectories and extract structured
+  lessons. These lessons become skill_card and failure_pattern memories
+  that improve future prompts via the PromptCompiler.
+  The reflection is structured (not free-form):
+  1. What went well? (→ skill_card memories)
+  2. What went wrong? (→ failure_pattern memories)
+  3. What should change next time? (→ user_preference or tool_policy memories)
+  4. What specific state patterns should I watch for? (→ episodic_case memories)
+  This replaces V1's raw heuristic distillation with a more rigorous,
+  typed memory extraction process.
+"""
+from __future__ import annotations
+import json
+import logging
+from typing import Any
+from purpose_agent.llm_backend import LLMBackend, ChatMessage
+from purpose_agent.trace import Trace
+from purpose_agent.memory import MemoryCard, MemoryKind, MemoryStatus
+from purpose_agent.v2_types import MemoryScope
+from purpose_agent.memory_ci import MemoryCI
+logger = logging.getLogger(__name__)
+REFLECTION_PROMPT = """\
+You are a RETROSPECTIVE ANALYST. Given a complete task trajectory, extract
+structured lessons that will help an agent perform better next time.
+Analyze the trajectory and produce EXACTLY these categories of lessons:
+1. SKILLS (what worked well — reusable procedures):
+   - Pattern: when does this apply?
+   - Strategy: what to do?
+   - Steps: concrete action sequence?
+2. FAILURES (what went wrong — patterns to avoid):
+   - What happened?
+   - Why was it wrong?
+   - What to do instead?
+3. POLICIES (new rules or constraints discovered):
+   - What tool/action needs a new constraint?
+   - What's the constraint?
+4. OBSERVATIONS (specific state patterns worth remembering):
+   - What state pattern was significant?
+   - What did it mean?
+Be concrete. Use {variable} placeholders for generalizable parts.
+Respond with JSON:
+{
+  "skills": [{"pattern": "...", "strategy": "...", "steps": ["..."]}],
+  "failures": [{"pattern": "...", "what_happened": "...", "instead": "..."}],
+  "policies": [{"tool": "...", "constraint": "..."}],
+  "observations": [{"state_pattern": "...", "meaning": "..."}]
+}
+"""
+REFLECTION_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "skills": {"type": "array", "items": {"type": "object"}},
+        "failures": {"type": "array", "items": {"type": "object"}},
+        "policies": {"type": "array", "items": {"type": "object"}},
+        "observations": {"type": "array", "items": {"type": "object"}},
+    },
+    "required": ["skills", "failures"],
+}
+class Retroformer:
+    """
+    Structured retrospective analysis of trajectories.
+    Replaces V1's raw heuristic distillation with typed memory extraction.
+    Every extracted lesson goes through the Memory CI pipeline (immune scan,
+    quarantine, replay test, promote/reject).
+    Usage:
+        retro = Retroformer(llm=model, memory_ci=ci)
+        # After each task:
+        memories = retro.reflect(trace)
+        # → Extracts skills, failures, policies, observations
+        # → Submits each to Memory CI for quarantine + scanning
+        # → Only safe, useful memories get promoted
+        # Over time: the agent accumulates vetted knowledge
+    """
+    def __init__(
+        self,
+        llm: LLMBackend,
+        memory_ci: MemoryCI,
+        agent_role: str = "",
+    ):
+        self.llm = llm
+        self.memory_ci = memory_ci
+        self.agent_role = agent_role
+        self._reflections: list[dict] = []
+    def reflect(self, trace: Trace) -> list[MemoryCard]:
+        """
+        Analyze a trace and extract structured lessons as typed memories.
+        Returns list of submitted MemoryCards (status will be CANDIDATE or QUARANTINED).
+        """
+        # Build trajectory summary for the LLM
+        summary = self._build_trajectory_summary(trace)
+        messages = [
+            ChatMessage(role="system", content=REFLECTION_PROMPT),
+            ChatMessage(role="user", content=summary),
+        ]
+        try:
+            result = self.llm.generate_structured(messages, schema=REFLECTION_SCHEMA)
+        except Exception as e:
+            logger.warning(f"Retroformer: Reflection failed ({e}), attempting text parse")
+            raw = self.llm.generate(messages, temperature=0.5)
+            try:
+                result = json.loads(raw)
+            except Exception:
+                result = {"skills": [], "failures": []}
+        cards = []
+        # Extract skills → skill_card memories
+        for skill in result.get("skills", []):
+            card = MemoryCard(
+                kind=MemoryKind.SKILL_CARD,
+                pattern=skill.get("pattern", ""),
+                strategy=skill.get("strategy", ""),
+                steps=skill.get("steps", []),
+                source_trace_id=trace.trace_id,
+                created_by="retroformer",
+                scope=MemoryScope(
+                    agent_roles=[self.agent_role] if self.agent_role else [],
+                ),
+            )
+            self.memory_ci.submit(card)
+            cards.append(card)
+        # Extract failures → failure_pattern memories
+        for failure in result.get("failures", []):
+            card = MemoryCard(
+                kind=MemoryKind.FAILURE_PATTERN,
+                pattern=failure.get("pattern", failure.get("what_happened", "")),
+                strategy=f"AVOID: {failure.get('what_happened', '')}. INSTEAD: {failure.get('instead', '')}",
+                source_trace_id=trace.trace_id,
+                created_by="retroformer",
+            )
+            self.memory_ci.submit(card)
+            cards.append(card)
+        # Extract policies → tool_policy memories
+        for policy in result.get("policies", []):
+            tool_name = policy.get("tool", "")
+            card = MemoryCard(
+                kind=MemoryKind.TOOL_POLICY,
+                content=policy.get("constraint", ""),
+                strategy=policy.get("constraint", ""),
+                source_trace_id=trace.trace_id,
+                created_by="retroformer",
+                scope=MemoryScope(tool_names=[tool_name] if tool_name else []),
+            )
+            self.memory_ci.submit(card)
+            cards.append(card)
+        # Extract observations → episodic_case memories
+        for obs in result.get("observations", []):
+            card = MemoryCard(
+                kind=MemoryKind.EPISODIC_CASE,
+                pattern=obs.get("state_pattern", ""),
+                content=obs.get("meaning", ""),
+                source_trace_id=trace.trace_id,
+                created_by="retroformer",
+            )
+            self.memory_ci.submit(card)
+            cards.append(card)
+        self._reflections.append({
+            "trace_id": trace.trace_id,
+            "skills": len(result.get("skills", [])),
+            "failures": len(result.get("failures", [])),
+            "policies": len(result.get("policies", [])),
+            "observations": len(result.get("observations", [])),
+            "total_cards": len(cards),
+        })
+        logger.info(
+            f"Retroformer: Reflected on trace {trace.trace_id} → "
+            f"{len(cards)} memory candidates "
+            f"(skills={len(result.get('skills', []))}, "
+            f"failures={len(result.get('failures', []))})"
+        )
+        return cards
+    def _build_trajectory_summary(self, trace: Trace) -> str:
+        """Build a concise trajectory summary for the reflection prompt."""
+        lines = [f"## Task: {trace.purpose}", f"Run mode: {trace.run_mode}", ""]
+        step_events = sorted(
+            [e for e in trace.events if e.kind in ("action", "score")],
+            key=lambda e: (e.step, e.kind),
+        )
+        current_step = 0
+        for event in step_events:
+            if event.step != current_step:
+                current_step = event.step
+                lines.append(f"\n### Step {current_step}")
+            if event.kind == "action":
+                lines.append(f"  Action: {event.data.get('name', '?')}({json.dumps(event.data.get('params', {}), default=str)})")
+                if event.data.get("thought"):
+                    lines.append(f"  Thought: {event.data['thought'][:150]}")
+            elif event.kind == "score":
+                phi_b = event.data.get("phi_before", 0)
+                phi_a = event.data.get("phi_after", 0)
+                lines.append(f"  Score: Φ {phi_b:.1f} → {phi_a:.1f} (Δ={phi_a - phi_b:+.2f})")
+                if event.data.get("evidence"):
+                    lines.append(f"  Evidence: {event.data['evidence'][:150]}")
+        lines.append(f"\n## Summary")
+        lines.append(f"Total steps: {trace.step_count}")
+        lines.append(f"Duration: {trace.duration_s:.1f}s")
+        return "\n".join(lines)
+    @property
+    def reflections(self) -> list[dict]:
+        return self._reflections