Rohan03 commited on
Commit
2bec614
Β·
verified Β·
1 Parent(s): f7a7853

SRE fixes: 5 critical vulnerability patches (dict snapshot, UNKNOWN reject, token cap, fine-grained lock, None guard)

Browse files
Files changed (1) hide show
  1. purpose_agent/sre_patches.py +239 -0
purpose_agent/sre_patches.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ sre_patches.py β€” Surgical fixes for the 5 critical vulnerabilities found in SRE audit.
3
+
4
+ These patches are applied at import time via purpose_agent.__init__.
5
+ They fix the actual runtime behavior without rewriting entire modules.
6
+
7
+ Fixes:
8
+ 1. MemoryStore.retrieve() β€” snapshot dict before iteration (prevents RuntimeError)
9
+ 2. Actor.decide() β€” reject UNKNOWN/empty actions (prevents garbage propagation)
10
+ 3. Actor._build_system_prompt() β€” hard cap K=10 heuristics (prevents context overflow)
11
+ 4. ExperienceReplay β€” threading.Lock on mutations (prevents data corruption in swarm)
12
+ 5. Trajectory.cumulative_reward β€” guard against None scores (prevents TypeError crash)
13
+
14
+ Import this module to apply all patches:
15
+ import purpose_agent.sre_patches # auto-applied
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import threading
21
+ from typing import Any
22
+
23
+ logger = logging.getLogger("purpose_agent.sre")
24
+
25
+ _applied = False
26
+
27
+
28
+ def apply_all():
29
+ """Apply all SRE patches. Safe to call multiple times (idempotent)."""
30
+ global _applied
31
+ if _applied:
32
+ return
33
+ _applied = True
34
+
35
+ _patch_memory_store_snapshot()
36
+ _patch_actor_unknown_reject()
37
+ _patch_actor_heuristic_cap()
38
+ _patch_experience_replay_lock()
39
+ _patch_trajectory_none_guard()
40
+ logger.debug("SRE patches applied (5/5)")
41
+
42
+
43
+ # ═══════════════════════════════════════════════════════════════
44
+ # Fix 1: MemoryStore.retrieve() β€” snapshot before iteration
45
+ # ═══════════════════════════════════════════════════════════════
46
+
47
+ def _patch_memory_store_snapshot():
48
+ """Prevent RuntimeError: dictionary changed size during iteration."""
49
+ from purpose_agent.memory import MemoryStore
50
+
51
+ original_retrieve = MemoryStore.retrieve
52
+
53
+ def safe_retrieve(self, query_text="", scope=None, kinds=None, statuses=None, top_k=10):
54
+ """Patched: iterates over snapshot of _cards, not live dict."""
55
+ from purpose_agent.memory import MemoryStatus
56
+ import math
57
+
58
+ statuses = statuses or [MemoryStatus.PROMOTED]
59
+ candidates = []
60
+ query_emb = self._embed(query_text) if query_text else None
61
+
62
+ # FIX: snapshot the values BEFORE iteration
63
+ cards_snapshot = list(self._cards.values())
64
+
65
+ for card in cards_snapshot:
66
+ if card.status not in statuses:
67
+ continue
68
+ if kinds and card.kind not in kinds:
69
+ continue
70
+ if scope and not card.scope.matches(scope):
71
+ continue
72
+
73
+ relevance = 0.5
74
+ if query_emb and card.embedding:
75
+ relevance = self._cosine(query_emb, card.embedding)
76
+ elif query_emb:
77
+ card.embedding = self._embed(card.content or card.pattern)
78
+ relevance = self._cosine(query_emb, card.embedding)
79
+
80
+ score = 0.4 * relevance + 0.3 * card.trust_score + 0.3 * card.utility_score
81
+ candidates.append((score, card))
82
+
83
+ candidates.sort(key=lambda x: -x[0])
84
+ return [c for _, c in candidates[:top_k]]
85
+
86
+ MemoryStore.retrieve = safe_retrieve
87
+
88
+
89
+ # ═══════════════════════════════════════════════════════════════
90
+ # Fix 2: Actor.decide() β€” reject UNKNOWN/empty actions
91
+ # ═══════════════════════════════════════════════════════════════
92
+
93
+ def _patch_actor_unknown_reject():
94
+ """Prevent garbage UNKNOWN actions from propagating to environment."""
95
+ from purpose_agent.actor import Actor
96
+ from purpose_agent.types import Action
97
+
98
+ original_decide = Actor.decide
99
+
100
+ def safe_decide(self, purpose, current_state, history=None):
101
+ action = original_decide(self, purpose, current_state, history)
102
+
103
+ # Reject UNKNOWN/empty β€” safe fallback to DONE
104
+ if not action.name or action.name == "UNKNOWN":
105
+ logger.warning("Actor produced UNKNOWN action β€” falling back to DONE")
106
+ return Action(
107
+ name="DONE",
108
+ params={},
109
+ thought="[SRE] Failed to parse a valid action. Stopping safely.",
110
+ expected_delta="",
111
+ )
112
+
113
+ # Ensure params is always a dict (never None)
114
+ if not isinstance(action.params, dict):
115
+ action.params = {}
116
+
117
+ return action
118
+
119
+ Actor.decide = safe_decide
120
+
121
+
122
+ # ═══════════════════════════════════════════════════════════════
123
+ # Fix 3: Actor heuristic cap β€” max K=10 in prompt
124
+ # ═══════════════════════════════════════════════════════════════
125
+
126
+ def _patch_actor_heuristic_cap():
127
+ """Prevent context window overflow from unbounded heuristic injection."""
128
+ from purpose_agent.actor import Actor
129
+
130
+ MAX_STRATEGIC = 5 # Max strategic heuristics in prompt
131
+ MAX_PROCEDURAL = 5 # Max procedural SOPs in prompt
132
+
133
+ original_format_strategic = Actor._format_strategic_memory
134
+
135
+ def capped_format_strategic(self):
136
+ if not self.strategic_memory:
137
+ return "None yet β€” this is your first task."
138
+ # Cap: only top K by Q-value
139
+ top = sorted(self.strategic_memory, key=lambda x: -x.q_value)[:MAX_STRATEGIC]
140
+ lines = []
141
+ for h in top:
142
+ lines.append(f"- When: {h.pattern}\n Do: {h.strategy}")
143
+ if len(self.strategic_memory) > MAX_STRATEGIC:
144
+ lines.append(f" ({len(self.strategic_memory) - MAX_STRATEGIC} more available)")
145
+ return "\n".join(lines)
146
+
147
+ original_format_procedural = Actor._format_procedural_memory
148
+
149
+ def capped_format_procedural(self):
150
+ if not self.procedural_memory:
151
+ return "No procedures available."
152
+ top = sorted(self.procedural_memory, key=lambda x: -x.q_value)[:MAX_PROCEDURAL]
153
+ lines = ["Available procedures:"]
154
+ for h in top:
155
+ lines.append(f"- {h.pattern}: {h.strategy[:80]}")
156
+ return "\n".join(lines)
157
+
158
+ Actor._format_strategic_memory = capped_format_strategic
159
+ Actor._format_procedural_memory = capped_format_procedural
160
+
161
+
162
+ # ═══════════════════════════════════════════════════════════════
163
+ # Fix 4: ExperienceReplay β€” fine-grained threading lock
164
+ # ═══════════════════════════════════════════════════════════════
165
+
166
+ def _patch_experience_replay_lock():
167
+ """Add thread lock to ExperienceReplay mutations for swarm() safety."""
168
+ from purpose_agent.experience_replay import ExperienceReplay
169
+
170
+ # Add a lock to all instances
171
+ _lock = threading.Lock()
172
+
173
+ original_add = ExperienceReplay.add
174
+
175
+ def locked_add(self, trajectory):
176
+ with _lock:
177
+ return original_add(self, trajectory)
178
+
179
+ original_update_q = ExperienceReplay.update_q_value
180
+
181
+ def locked_update_q(self, record_id, reward, alpha=0.1):
182
+ with _lock:
183
+ return original_update_q(self, record_id, reward, alpha)
184
+
185
+ ExperienceReplay.add = locked_add
186
+ ExperienceReplay.update_q_value = locked_update_q
187
+
188
+
189
+ # ═══════════════════════════════════════════════════════════════
190
+ # Fix 5: Trajectory β€” guard against None scores
191
+ # ═══════════════════════════════════════════════════════════════
192
+
193
+ def _patch_trajectory_none_guard():
194
+ """Prevent TypeError when score is None in trajectory calculations."""
195
+ from purpose_agent.types import Trajectory
196
+
197
+ @property
198
+ def safe_cumulative_reward(self) -> float:
199
+ """Sum of positive deltas, guarding against None scores."""
200
+ total = 0.0
201
+ for s in self.steps:
202
+ if s.score is not None and s.score.delta is not None and s.score.delta > 0:
203
+ total += s.score.delta
204
+ return total
205
+
206
+ @property
207
+ def safe_total_delta(self) -> float:
208
+ """Net improvement, guarding against None scores."""
209
+ total = 0.0
210
+ for s in self.steps:
211
+ if s.score is not None and s.score.delta is not None:
212
+ total += s.score.delta
213
+ return total
214
+
215
+ @property
216
+ def safe_success_rate(self) -> float:
217
+ """Fraction of steps that improved, guarding against None."""
218
+ scored = [s for s in self.steps if s.score is not None and s.score.delta is not None]
219
+ if not scored:
220
+ return 0.0
221
+ return sum(1 for s in scored if s.score.improved) / len(scored)
222
+
223
+ @property
224
+ def safe_final_phi(self) -> float | None:
225
+ """Final Ξ¦, guarding against None."""
226
+ scored = [s for s in self.steps if s.score is not None]
227
+ if not scored:
228
+ return None
229
+ return scored[-1].score.phi_after
230
+
231
+ # Replace the properties
232
+ Trajectory.cumulative_reward = safe_cumulative_reward
233
+ Trajectory.total_delta = safe_total_delta
234
+ Trajectory.success_rate = safe_success_rate
235
+ Trajectory.final_phi = safe_final_phi
236
+
237
+
238
+ # Auto-apply on import
239
+ apply_all()