| """ |
| prompt_optimizer.py — DSPy-style automatic prompt optimization. |
| |
| From DSPy (arxiv:2310.03714): |
| Instead of hand-crafting prompts, define signatures (input → output) |
| and let the optimizer bootstrap effective demonstrations automatically. |
| |
| Adaptation for Purpose Agent: |
| 1. Define a Signature: e.g., "state, action, purpose → phi_score, reasoning" |
| 2. Collect demonstration traces from successful runs |
| 3. The optimizer selects the best N demonstrations by trial scoring |
| 4. These demonstrations are injected into the prompt as few-shot examples |
| 5. Periodically re-optimize as more traces become available |
| |
| No weight updates — improvement comes from better few-shot examples |
| in the prompt, selected via a metric (accuracy on held-out examples). |
| """ |
| from __future__ import annotations |
|
|
| import json |
| import logging |
| import random |
| from dataclasses import dataclass, field |
| from typing import Any, Callable |
|
|
| from purpose_agent.llm_backend import LLMBackend, ChatMessage |
| from purpose_agent.trace import Trace |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| @dataclass |
| class Signature: |
| """ |
| DSPy-style signature: declares what a prompt should do. |
| |
| Example: |
| sig = Signature( |
| name="state_evaluator", |
| inputs=["state_before", "action", "state_after", "purpose"], |
| outputs=["phi_score", "reasoning", "evidence"], |
| instruction="Evaluate the state transition and score progress toward the purpose.", |
| ) |
| """ |
| name: str |
| inputs: list[str] |
| outputs: list[str] |
| instruction: str = "" |
|
|
|
|
| @dataclass |
| class Demonstration: |
| """A single input→output example for few-shot prompting.""" |
| inputs: dict[str, str] |
| outputs: dict[str, str] |
| score: float = 0.0 |
|
|
|
|
| class PromptOptimizer: |
| """ |
| Automatically optimizes prompts by bootstrapping demonstrations. |
| |
| The DSPy approach adapted for Purpose Agent: |
| 1. Collect candidate demonstrations from traces |
| 2. Score each candidate by running it as a few-shot example and measuring output quality |
| 3. Select the top-K demonstrations |
| 4. Return an optimized prompt with the best demonstrations |
| |
| Usage: |
| optimizer = PromptOptimizer(llm=model) |
| |
| # Define what the prompt should do |
| sig = Signature( |
| name="actor", |
| inputs=["state", "purpose"], |
| outputs=["thought", "action"], |
| instruction="Decide the best next action.", |
| ) |
| |
| # Collect demonstrations from traces |
| demos = optimizer.extract_demonstrations(traces, sig) |
| |
| # Optimize: find the best subset |
| best = optimizer.optimize(sig, demos, metric_fn=my_metric, k=3) |
| |
| # Get the optimized prompt |
| prompt = optimizer.compile_prompt(sig, best) |
| """ |
|
|
| def __init__(self, llm: LLMBackend | None = None): |
| self.llm = llm |
|
|
| def extract_demonstrations( |
| self, |
| traces: list[Trace], |
| signature: Signature, |
| max_demos: int = 50, |
| ) -> list[Demonstration]: |
| """ |
| Extract candidate demonstrations from traces. |
| |
| Looks for trace events that match the signature's input/output fields. |
| """ |
| demos = [] |
| for trace in traces: |
| for event in trace.events: |
| data = event.data |
| |
| has_inputs = all(f in data or f in (event.kind,) for f in signature.inputs) |
| has_outputs = any(f in data for f in signature.outputs) |
|
|
| if has_outputs: |
| inputs = {f: str(data.get(f, "")) for f in signature.inputs} |
| outputs = {f: str(data.get(f, "")) for f in signature.outputs} |
| demos.append(Demonstration(inputs=inputs, outputs=outputs)) |
|
|
| if len(demos) >= max_demos: |
| break |
|
|
| logger.info(f"PromptOptimizer: Extracted {len(demos)} candidate demonstrations for '{signature.name}'") |
| return demos |
|
|
| def optimize( |
| self, |
| signature: Signature, |
| candidates: list[Demonstration], |
| metric_fn: Callable[[str, dict], float] | None = None, |
| k: int = 3, |
| trials: int = 10, |
| ) -> list[Demonstration]: |
| """ |
| Select the best K demonstrations by trial-and-error. |
| |
| If metric_fn is provided, uses it to score each candidate set. |
| Otherwise, uses a diversity heuristic (varied examples > similar ones). |
| """ |
| if len(candidates) <= k: |
| return candidates |
|
|
| if metric_fn is None: |
| |
| return self._diverse_select(candidates, k) |
|
|
| |
| best_subset = candidates[:k] |
| best_score = -float("inf") |
|
|
| for trial in range(trials): |
| subset = random.sample(candidates, min(k, len(candidates))) |
| prompt = self.compile_prompt(signature, subset) |
|
|
| |
| try: |
| score = metric_fn(prompt, {"signature": signature.name}) |
| except Exception: |
| score = 0.0 |
|
|
| if score > best_score: |
| best_score = score |
| best_subset = subset |
| logger.debug(f"PromptOptimizer: Trial {trial+1} new best score={score:.3f}") |
|
|
| |
| for demo in best_subset: |
| demo.score = best_score |
|
|
| logger.info(f"PromptOptimizer: Selected {len(best_subset)} demos (best_score={best_score:.3f})") |
| return best_subset |
|
|
| def compile_prompt( |
| self, |
| signature: Signature, |
| demonstrations: list[Demonstration], |
| ) -> str: |
| """ |
| Compile a signature + demonstrations into a ready-to-use prompt. |
| |
| Returns the optimized system prompt string. |
| """ |
| sections = [] |
|
|
| |
| if signature.instruction: |
| sections.append(f"## Task\n{signature.instruction}") |
|
|
| |
| input_desc = ", ".join(signature.inputs) |
| output_desc = ", ".join(signature.outputs) |
| sections.append(f"## Format\nGiven: {input_desc}\nProduce: {output_desc}") |
|
|
| |
| if demonstrations: |
| sections.append("## Examples") |
| for i, demo in enumerate(demonstrations[:5], 1): |
| lines = [f"### Example {i}"] |
| for k, v in demo.inputs.items(): |
| if v: |
| lines.append(f" {k}: {v[:150]}") |
| lines.append(" →") |
| for k, v in demo.outputs.items(): |
| if v: |
| lines.append(f" {k}: {v[:150]}") |
| sections.append("\n".join(lines)) |
|
|
| return "\n\n".join(sections) |
|
|
| def _diverse_select( |
| self, candidates: list[Demonstration], k: int |
| ) -> list[Demonstration]: |
| """Select diverse demonstrations by output variety.""" |
| seen_outputs: set[str] = set() |
| selected: list[Demonstration] = [] |
|
|
| for demo in candidates: |
| key = str(sorted(demo.outputs.values()))[:50] |
| if key not in seen_outputs: |
| seen_outputs.add(key) |
| selected.append(demo) |
| if len(selected) >= k: |
| break |
|
|
| |
| if len(selected) < k: |
| for demo in candidates: |
| if demo not in selected: |
| selected.append(demo) |
| if len(selected) >= k: |
| break |
|
|
| return selected |
|
|