Rohan03 commited on
Commit
b199fa3
·
verified ·
1 Parent(s): 12c2cae

Add purpose_agent/types.py

Browse files
Files changed (1) hide show
  1. purpose_agent/types.py +215 -0
purpose_agent/types.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core data types for the Purpose Agent framework.
3
+
4
+ All modules exchange these types — this keeps the architecture modular
5
+ and lets you swap out any component without touching the others.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import time
11
+ import uuid
12
+ from dataclasses import dataclass, field
13
+ from enum import Enum
14
+ from typing import Any
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # State & Action
19
+ # ---------------------------------------------------------------------------
20
+
21
+ @dataclass
22
+ class State:
23
+ """
24
+ A snapshot of the environment at a point in time.
25
+
26
+ `data` is intentionally a free-form dict — the framework doesn't prescribe
27
+ what "state" looks like. Your environment defines it.
28
+
29
+ `summary` is an optional human-readable string (used in LLM prompts).
30
+ If not set, the Purpose Function will serialize `data` to JSON.
31
+ """
32
+ data: dict[str, Any]
33
+ summary: str | None = None
34
+ timestamp: float = field(default_factory=time.time)
35
+ id: str = field(default_factory=lambda: uuid.uuid4().hex[:12])
36
+
37
+ def describe(self) -> str:
38
+ """Return a prompt-friendly description of this state."""
39
+ if self.summary:
40
+ return self.summary
41
+ import json
42
+ return json.dumps(self.data, indent=2, default=str)
43
+
44
+
45
+ @dataclass
46
+ class Action:
47
+ """
48
+ An action the Actor decided to take.
49
+
50
+ Attributes:
51
+ name: Action identifier (e.g. "search", "move_north", "edit_file")
52
+ params: Parameters for the action
53
+ thought: The Actor's chain-of-thought reasoning for this action
54
+ expected_delta: What the Actor *expects* to change in the state
55
+ """
56
+ name: str
57
+ params: dict[str, Any] = field(default_factory=dict)
58
+ thought: str = ""
59
+ expected_delta: str = ""
60
+
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # Trajectory
64
+ # ---------------------------------------------------------------------------
65
+
66
+ @dataclass
67
+ class PurposeScore:
68
+ """
69
+ Output of the Purpose Function for a single state transition.
70
+
71
+ Inspired by LATS (arxiv:2310.04406) V(s) formulation:
72
+ score = λ·LM_score + (1-λ)·consistency_score
73
+
74
+ Anti-reward-hacking: `evidence` must cite specific observable state changes
75
+ (per SPC arxiv:2504.19162). The orchestrator can reject scores with vague evidence.
76
+ """
77
+ phi_before: float # Φ(s_current) — distance-to-purpose of old state
78
+ phi_after: float # Φ(s_new) — distance-to-purpose of new state
79
+ delta: float # Φ(s_new) - Φ(s_current) — the improvement signal
80
+ reasoning: str # Step-by-step justification from the LLM
81
+ evidence: str # Specific observable changes cited as evidence
82
+ confidence: float # 0.0–1.0 — how confident the critic is
83
+
84
+ @property
85
+ def improved(self) -> bool:
86
+ """Did this transition move closer to the purpose?"""
87
+ return self.delta > 0.0
88
+
89
+
90
+ @dataclass
91
+ class TrajectoryStep:
92
+ """A single step in a trajectory: state → action → new_state → score."""
93
+ state_before: State
94
+ action: Action
95
+ state_after: State
96
+ score: PurposeScore | None = None
97
+ step_index: int = 0
98
+ wall_time_s: float = 0.0
99
+
100
+
101
+ @dataclass
102
+ class Trajectory:
103
+ """
104
+ A complete sequence of steps for one task attempt.
105
+
106
+ The Experience Replay module stores these and the Optimizer extracts
107
+ heuristics from high-reward trajectories.
108
+ """
109
+ task_description: str
110
+ purpose: str
111
+ steps: list[TrajectoryStep] = field(default_factory=list)
112
+ id: str = field(default_factory=lambda: uuid.uuid4().hex[:12])
113
+ created_at: float = field(default_factory=time.time)
114
+
115
+ @property
116
+ def cumulative_reward(self) -> float:
117
+ """Sum of all positive deltas in the trajectory."""
118
+ return sum(
119
+ s.score.delta for s in self.steps
120
+ if s.score is not None and s.score.delta > 0
121
+ )
122
+
123
+ @property
124
+ def total_delta(self) -> float:
125
+ """Net state improvement across the entire trajectory."""
126
+ return sum(
127
+ s.score.delta for s in self.steps if s.score is not None
128
+ )
129
+
130
+ @property
131
+ def success_rate(self) -> float:
132
+ """Fraction of steps that improved state."""
133
+ scored = [s for s in self.steps if s.score is not None]
134
+ if not scored:
135
+ return 0.0
136
+ return sum(1 for s in scored if s.score.improved) / len(scored)
137
+
138
+ @property
139
+ def final_phi(self) -> float | None:
140
+ """Final Φ value (state-distance-to-goal) at end of trajectory."""
141
+ scored = [s for s in self.steps if s.score is not None]
142
+ if not scored:
143
+ return None
144
+ return scored[-1].score.phi_after
145
+
146
+
147
+ # ---------------------------------------------------------------------------
148
+ # Memory & Heuristics
149
+ # ---------------------------------------------------------------------------
150
+
151
+ class MemoryTier(Enum):
152
+ """
153
+ 3-tier memory hierarchy from MUSE (arxiv:2510.08002):
154
+
155
+ STRATEGIC: High-level <Dilemma, Strategy> pairs loaded into system prompt.
156
+ Updated after each completed task via global distillation.
157
+
158
+ PROCEDURAL: SOPs (Standard Operating Procedures) indexed by task pattern.
159
+ Loaded lazily — only the index is in context, full SOPs fetched on demand.
160
+
161
+ TOOL: Per-action "muscle memory" — dynamic instructions returned with
162
+ each observation. Updated when new action patterns prove effective.
163
+ """
164
+ STRATEGIC = "strategic"
165
+ PROCEDURAL = "procedural"
166
+ TOOL = "tool"
167
+
168
+
169
+ @dataclass
170
+ class Heuristic:
171
+ """
172
+ A learned heuristic extracted from a high-reward trajectory.
173
+
174
+ Inspired by CER (arxiv:2506.06698) distillation format:
175
+ - `pattern`: The abstract situation (with {variable} placeholders)
176
+ - `strategy`: What to do in that situation
177
+ - `steps`: Concrete action sequence (procedural SOP)
178
+ """
179
+ pattern: str # When to apply (e.g. "When facing {obstacle_type} near {location}")
180
+ strategy: str # What to do (e.g. "Decompose into sub-tasks, handle {x} first")
181
+ steps: list[str] # Concrete action sequence
182
+ tier: MemoryTier # Which memory tier this belongs to
183
+ source_trajectory_id: str = ""
184
+ q_value: float = 0.5 # Learned quality estimate (REMEMBERER-style Q-value)
185
+ times_used: int = 0
186
+ times_succeeded: int = 0
187
+ id: str = field(default_factory=lambda: uuid.uuid4().hex[:12])
188
+
189
+ @property
190
+ def empirical_success_rate(self) -> float:
191
+ if self.times_used == 0:
192
+ return 0.5 # Prior
193
+ return self.times_succeeded / self.times_used
194
+
195
+ def update_q_value(self, reward: float, alpha: float = 0.1) -> None:
196
+ """
197
+ Monte Carlo Q-value update (from REMEMBERER arxiv:2306.07929):
198
+ Q_new = Q_old + α * (reward - Q_old)
199
+ """
200
+ self.q_value = self.q_value + alpha * (reward - self.q_value)
201
+
202
+
203
+ @dataclass
204
+ class MemoryRecord:
205
+ """
206
+ A single record in the experience replay buffer.
207
+
208
+ Combines the trajectory with its distilled heuristics and metadata
209
+ for retrieval ranking (semantic similarity + Q-value, per MemRL arxiv:2601.03192).
210
+ """
211
+ trajectory: Trajectory
212
+ heuristics: list[Heuristic] = field(default_factory=list)
213
+ task_embedding: list[float] | None = None # For semantic retrieval
214
+ retrieval_q_value: float = 0.5 # Learned utility for retrieval ranking
215
+ id: str = field(default_factory=lambda: uuid.uuid4().hex[:12])