Rohan03 commited on
Commit
6dd7984
·
verified ·
1 Parent(s): 28b87a7

V2 merge: purpose_agent/prompt_optimizer.py

Browse files
Files changed (1) hide show
  1. purpose_agent/prompt_optimizer.py +230 -0
purpose_agent/prompt_optimizer.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ prompt_optimizer.py — DSPy-style automatic prompt optimization.
3
+
4
+ From DSPy (arxiv:2310.03714):
5
+ Instead of hand-crafting prompts, define signatures (input → output)
6
+ and let the optimizer bootstrap effective demonstrations automatically.
7
+
8
+ Adaptation for Purpose Agent:
9
+ 1. Define a Signature: e.g., "state, action, purpose → phi_score, reasoning"
10
+ 2. Collect demonstration traces from successful runs
11
+ 3. The optimizer selects the best N demonstrations by trial scoring
12
+ 4. These demonstrations are injected into the prompt as few-shot examples
13
+ 5. Periodically re-optimize as more traces become available
14
+
15
+ No weight updates — improvement comes from better few-shot examples
16
+ in the prompt, selected via a metric (accuracy on held-out examples).
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import logging
22
+ import random
23
+ from dataclasses import dataclass, field
24
+ from typing import Any, Callable
25
+
26
+ from purpose_agent.llm_backend import LLMBackend, ChatMessage
27
+ from purpose_agent.trace import Trace
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ @dataclass
33
+ class Signature:
34
+ """
35
+ DSPy-style signature: declares what a prompt should do.
36
+
37
+ Example:
38
+ sig = Signature(
39
+ name="state_evaluator",
40
+ inputs=["state_before", "action", "state_after", "purpose"],
41
+ outputs=["phi_score", "reasoning", "evidence"],
42
+ instruction="Evaluate the state transition and score progress toward the purpose.",
43
+ )
44
+ """
45
+ name: str
46
+ inputs: list[str]
47
+ outputs: list[str]
48
+ instruction: str = ""
49
+
50
+
51
+ @dataclass
52
+ class Demonstration:
53
+ """A single input→output example for few-shot prompting."""
54
+ inputs: dict[str, str]
55
+ outputs: dict[str, str]
56
+ score: float = 0.0 # how good this demo is at improving task performance
57
+
58
+
59
+ class PromptOptimizer:
60
+ """
61
+ Automatically optimizes prompts by bootstrapping demonstrations.
62
+
63
+ The DSPy approach adapted for Purpose Agent:
64
+ 1. Collect candidate demonstrations from traces
65
+ 2. Score each candidate by running it as a few-shot example and measuring output quality
66
+ 3. Select the top-K demonstrations
67
+ 4. Return an optimized prompt with the best demonstrations
68
+
69
+ Usage:
70
+ optimizer = PromptOptimizer(llm=model)
71
+
72
+ # Define what the prompt should do
73
+ sig = Signature(
74
+ name="actor",
75
+ inputs=["state", "purpose"],
76
+ outputs=["thought", "action"],
77
+ instruction="Decide the best next action.",
78
+ )
79
+
80
+ # Collect demonstrations from traces
81
+ demos = optimizer.extract_demonstrations(traces, sig)
82
+
83
+ # Optimize: find the best subset
84
+ best = optimizer.optimize(sig, demos, metric_fn=my_metric, k=3)
85
+
86
+ # Get the optimized prompt
87
+ prompt = optimizer.compile_prompt(sig, best)
88
+ """
89
+
90
+ def __init__(self, llm: LLMBackend | None = None):
91
+ self.llm = llm
92
+
93
+ def extract_demonstrations(
94
+ self,
95
+ traces: list[Trace],
96
+ signature: Signature,
97
+ max_demos: int = 50,
98
+ ) -> list[Demonstration]:
99
+ """
100
+ Extract candidate demonstrations from traces.
101
+
102
+ Looks for trace events that match the signature's input/output fields.
103
+ """
104
+ demos = []
105
+ for trace in traces:
106
+ for event in trace.events:
107
+ data = event.data
108
+ # Check if this event has the right fields
109
+ has_inputs = all(f in data or f in (event.kind,) for f in signature.inputs)
110
+ has_outputs = any(f in data for f in signature.outputs)
111
+
112
+ if has_outputs:
113
+ inputs = {f: str(data.get(f, "")) for f in signature.inputs}
114
+ outputs = {f: str(data.get(f, "")) for f in signature.outputs}
115
+ demos.append(Demonstration(inputs=inputs, outputs=outputs))
116
+
117
+ if len(demos) >= max_demos:
118
+ break
119
+
120
+ logger.info(f"PromptOptimizer: Extracted {len(demos)} candidate demonstrations for '{signature.name}'")
121
+ return demos
122
+
123
+ def optimize(
124
+ self,
125
+ signature: Signature,
126
+ candidates: list[Demonstration],
127
+ metric_fn: Callable[[str, dict], float] | None = None,
128
+ k: int = 3,
129
+ trials: int = 10,
130
+ ) -> list[Demonstration]:
131
+ """
132
+ Select the best K demonstrations by trial-and-error.
133
+
134
+ If metric_fn is provided, uses it to score each candidate set.
135
+ Otherwise, uses a diversity heuristic (varied examples > similar ones).
136
+ """
137
+ if len(candidates) <= k:
138
+ return candidates
139
+
140
+ if metric_fn is None:
141
+ # Diversity-based selection: pick demos with different output patterns
142
+ return self._diverse_select(candidates, k)
143
+
144
+ # Trial-based optimization: sample subsets and score them
145
+ best_subset = candidates[:k]
146
+ best_score = -float("inf")
147
+
148
+ for trial in range(trials):
149
+ subset = random.sample(candidates, min(k, len(candidates)))
150
+ prompt = self.compile_prompt(signature, subset)
151
+
152
+ # Score this prompt configuration
153
+ try:
154
+ score = metric_fn(prompt, {"signature": signature.name})
155
+ except Exception:
156
+ score = 0.0
157
+
158
+ if score > best_score:
159
+ best_score = score
160
+ best_subset = subset
161
+ logger.debug(f"PromptOptimizer: Trial {trial+1} new best score={score:.3f}")
162
+
163
+ # Record scores on selected demos
164
+ for demo in best_subset:
165
+ demo.score = best_score
166
+
167
+ logger.info(f"PromptOptimizer: Selected {len(best_subset)} demos (best_score={best_score:.3f})")
168
+ return best_subset
169
+
170
+ def compile_prompt(
171
+ self,
172
+ signature: Signature,
173
+ demonstrations: list[Demonstration],
174
+ ) -> str:
175
+ """
176
+ Compile a signature + demonstrations into a ready-to-use prompt.
177
+
178
+ Returns the optimized system prompt string.
179
+ """
180
+ sections = []
181
+
182
+ # Instruction
183
+ if signature.instruction:
184
+ sections.append(f"## Task\n{signature.instruction}")
185
+
186
+ # Input/output format
187
+ input_desc = ", ".join(signature.inputs)
188
+ output_desc = ", ".join(signature.outputs)
189
+ sections.append(f"## Format\nGiven: {input_desc}\nProduce: {output_desc}")
190
+
191
+ # Demonstrations
192
+ if demonstrations:
193
+ sections.append("## Examples")
194
+ for i, demo in enumerate(demonstrations[:5], 1):
195
+ lines = [f"### Example {i}"]
196
+ for k, v in demo.inputs.items():
197
+ if v:
198
+ lines.append(f" {k}: {v[:150]}")
199
+ lines.append(" →")
200
+ for k, v in demo.outputs.items():
201
+ if v:
202
+ lines.append(f" {k}: {v[:150]}")
203
+ sections.append("\n".join(lines))
204
+
205
+ return "\n\n".join(sections)
206
+
207
+ def _diverse_select(
208
+ self, candidates: list[Demonstration], k: int
209
+ ) -> list[Demonstration]:
210
+ """Select diverse demonstrations by output variety."""
211
+ seen_outputs: set[str] = set()
212
+ selected: list[Demonstration] = []
213
+
214
+ for demo in candidates:
215
+ key = str(sorted(demo.outputs.values()))[:50]
216
+ if key not in seen_outputs:
217
+ seen_outputs.add(key)
218
+ selected.append(demo)
219
+ if len(selected) >= k:
220
+ break
221
+
222
+ # Fill remaining with any unused candidates
223
+ if len(selected) < k:
224
+ for demo in candidates:
225
+ if demo not in selected:
226
+ selected.append(demo)
227
+ if len(selected) >= k:
228
+ break
229
+
230
+ return selected