Rohan03
/

purpose-agent

+"""
+prompt_optimizer.py — DSPy-style automatic prompt optimization.
+From DSPy (arxiv:2310.03714):
+  Instead of hand-crafting prompts, define signatures (input → output)
+  and let the optimizer bootstrap effective demonstrations automatically.
+Adaptation for Purpose Agent:
+  1. Define a Signature: e.g., "state, action, purpose → phi_score, reasoning"
+  2. Collect demonstration traces from successful runs
+  3. The optimizer selects the best N demonstrations by trial scoring
+  4. These demonstrations are injected into the prompt as few-shot examples
+  5. Periodically re-optimize as more traces become available
+  No weight updates — improvement comes from better few-shot examples
+  in the prompt, selected via a metric (accuracy on held-out examples).
+"""
+from __future__ import annotations
+import json
+import logging
+import random
+from dataclasses import dataclass, field
+from typing import Any, Callable
+from purpose_agent.llm_backend import LLMBackend, ChatMessage
+from purpose_agent.trace import Trace
+logger = logging.getLogger(__name__)
+@dataclass
+class Signature:
+    """
+    DSPy-style signature: declares what a prompt should do.
+    Example:
+        sig = Signature(
+            name="state_evaluator",
+            inputs=["state_before", "action", "state_after", "purpose"],
+            outputs=["phi_score", "reasoning", "evidence"],
+            instruction="Evaluate the state transition and score progress toward the purpose.",
+        )
+    """
+    name: str
+    inputs: list[str]
+    outputs: list[str]
+    instruction: str = ""
+@dataclass
+class Demonstration:
+    """A single input→output example for few-shot prompting."""
+    inputs: dict[str, str]
+    outputs: dict[str, str]
+    score: float = 0.0  # how good this demo is at improving task performance
+class PromptOptimizer:
+    """
+    Automatically optimizes prompts by bootstrapping demonstrations.
+    The DSPy approach adapted for Purpose Agent:
+    1. Collect candidate demonstrations from traces
+    2. Score each candidate by running it as a few-shot example and measuring output quality
+    3. Select the top-K demonstrations
+    4. Return an optimized prompt with the best demonstrations
+    Usage:
+        optimizer = PromptOptimizer(llm=model)
+        # Define what the prompt should do
+        sig = Signature(
+            name="actor",
+            inputs=["state", "purpose"],
+            outputs=["thought", "action"],
+            instruction="Decide the best next action.",
+        )
+        # Collect demonstrations from traces
+        demos = optimizer.extract_demonstrations(traces, sig)
+        # Optimize: find the best subset
+        best = optimizer.optimize(sig, demos, metric_fn=my_metric, k=3)
+        # Get the optimized prompt
+        prompt = optimizer.compile_prompt(sig, best)
+    """
+    def __init__(self, llm: LLMBackend | None = None):
+        self.llm = llm
+    def extract_demonstrations(
+        self,
+        traces: list[Trace],
+        signature: Signature,
+        max_demos: int = 50,
+    ) -> list[Demonstration]:
+        """
+        Extract candidate demonstrations from traces.
+        Looks for trace events that match the signature's input/output fields.
+        """
+        demos = []
+        for trace in traces:
+            for event in trace.events:
+                data = event.data
+                # Check if this event has the right fields
+                has_inputs = all(f in data or f in (event.kind,) for f in signature.inputs)
+                has_outputs = any(f in data for f in signature.outputs)
+                if has_outputs:
+                    inputs = {f: str(data.get(f, "")) for f in signature.inputs}
+                    outputs = {f: str(data.get(f, "")) for f in signature.outputs}
+                    demos.append(Demonstration(inputs=inputs, outputs=outputs))
+                if len(demos) >= max_demos:
+                    break
+        logger.info(f"PromptOptimizer: Extracted {len(demos)} candidate demonstrations for '{signature.name}'")
+        return demos
+    def optimize(
+        self,
+        signature: Signature,
+        candidates: list[Demonstration],
+        metric_fn: Callable[[str, dict], float] | None = None,
+        k: int = 3,
+        trials: int = 10,
+    ) -> list[Demonstration]:
+        """
+        Select the best K demonstrations by trial-and-error.
+        If metric_fn is provided, uses it to score each candidate set.
+        Otherwise, uses a diversity heuristic (varied examples > similar ones).
+        """
+        if len(candidates) <= k:
+            return candidates
+        if metric_fn is None:
+            # Diversity-based selection: pick demos with different output patterns
+            return self._diverse_select(candidates, k)
+        # Trial-based optimization: sample subsets and score them
+        best_subset = candidates[:k]
+        best_score = -float("inf")
+        for trial in range(trials):
+            subset = random.sample(candidates, min(k, len(candidates)))
+            prompt = self.compile_prompt(signature, subset)
+            # Score this prompt configuration
+            try:
+                score = metric_fn(prompt, {"signature": signature.name})
+            except Exception:
+                score = 0.0
+            if score > best_score:
+                best_score = score
+                best_subset = subset
+                logger.debug(f"PromptOptimizer: Trial {trial+1} new best score={score:.3f}")
+        # Record scores on selected demos
+        for demo in best_subset:
+            demo.score = best_score
+        logger.info(f"PromptOptimizer: Selected {len(best_subset)} demos (best_score={best_score:.3f})")
+        return best_subset
+    def compile_prompt(
+        self,
+        signature: Signature,
+        demonstrations: list[Demonstration],
+    ) -> str:
+        """
+        Compile a signature + demonstrations into a ready-to-use prompt.
+        Returns the optimized system prompt string.
+        """
+        sections = []
+        # Instruction
+        if signature.instruction:
+            sections.append(f"## Task\n{signature.instruction}")
+        # Input/output format
+        input_desc = ", ".join(signature.inputs)
+        output_desc = ", ".join(signature.outputs)
+        sections.append(f"## Format\nGiven: {input_desc}\nProduce: {output_desc}")
+        # Demonstrations
+        if demonstrations:
+            sections.append("## Examples")
+            for i, demo in enumerate(demonstrations[:5], 1):
+                lines = [f"### Example {i}"]
+                for k, v in demo.inputs.items():
+                    if v:
+                        lines.append(f"  {k}: {v[:150]}")
+                lines.append("  →")
+                for k, v in demo.outputs.items():
+                    if v:
+                        lines.append(f"  {k}: {v[:150]}")
+                sections.append("\n".join(lines))
+        return "\n\n".join(sections)
+    def _diverse_select(
+        self, candidates: list[Demonstration], k: int
+    ) -> list[Demonstration]:
+        """Select diverse demonstrations by output variety."""
+        seen_outputs: set[str] = set()
+        selected: list[Demonstration] = []
+        for demo in candidates:
+            key = str(sorted(demo.outputs.values()))[:50]
+            if key not in seen_outputs:
+                seen_outputs.add(key)
+                selected.append(demo)
+                if len(selected) >= k:
+                    break
+        # Fill remaining with any unused candidates
+        if len(selected) < k:
+            for demo in candidates:
+                if demo not in selected:
+                    selected.append(demo)
+                    if len(selected) >= k:
+                        break
+        return selected