File size: 9,837 Bytes
2bec614
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
"""
sre_patches.py β€” Surgical fixes for the 5 critical vulnerabilities found in SRE audit.

These patches are applied at import time via purpose_agent.__init__.
They fix the actual runtime behavior without rewriting entire modules.

Fixes:
  1. MemoryStore.retrieve() β€” snapshot dict before iteration (prevents RuntimeError)
  2. Actor.decide() β€” reject UNKNOWN/empty actions (prevents garbage propagation)
  3. Actor._build_system_prompt() β€” hard cap K=10 heuristics (prevents context overflow)
  4. ExperienceReplay β€” threading.Lock on mutations (prevents data corruption in swarm)
  5. Trajectory.cumulative_reward β€” guard against None scores (prevents TypeError crash)

Import this module to apply all patches:
    import purpose_agent.sre_patches  # auto-applied
"""
from __future__ import annotations

import logging
import threading
from typing import Any

logger = logging.getLogger("purpose_agent.sre")

_applied = False


def apply_all():
    """Apply all SRE patches. Safe to call multiple times (idempotent)."""
    global _applied
    if _applied:
        return
    _applied = True

    _patch_memory_store_snapshot()
    _patch_actor_unknown_reject()
    _patch_actor_heuristic_cap()
    _patch_experience_replay_lock()
    _patch_trajectory_none_guard()
    logger.debug("SRE patches applied (5/5)")


# ═══════════════════════════════════════════════════════════════
# Fix 1: MemoryStore.retrieve() β€” snapshot before iteration
# ═══════════════════════════════════════════════════════════════

def _patch_memory_store_snapshot():
    """Prevent RuntimeError: dictionary changed size during iteration."""
    from purpose_agent.memory import MemoryStore

    original_retrieve = MemoryStore.retrieve

    def safe_retrieve(self, query_text="", scope=None, kinds=None, statuses=None, top_k=10):
        """Patched: iterates over snapshot of _cards, not live dict."""
        from purpose_agent.memory import MemoryStatus
        import math

        statuses = statuses or [MemoryStatus.PROMOTED]
        candidates = []
        query_emb = self._embed(query_text) if query_text else None

        # FIX: snapshot the values BEFORE iteration
        cards_snapshot = list(self._cards.values())

        for card in cards_snapshot:
            if card.status not in statuses:
                continue
            if kinds and card.kind not in kinds:
                continue
            if scope and not card.scope.matches(scope):
                continue

            relevance = 0.5
            if query_emb and card.embedding:
                relevance = self._cosine(query_emb, card.embedding)
            elif query_emb:
                card.embedding = self._embed(card.content or card.pattern)
                relevance = self._cosine(query_emb, card.embedding)

            score = 0.4 * relevance + 0.3 * card.trust_score + 0.3 * card.utility_score
            candidates.append((score, card))

        candidates.sort(key=lambda x: -x[0])
        return [c for _, c in candidates[:top_k]]

    MemoryStore.retrieve = safe_retrieve


# ═══════════════════════════════════════════════════════════════
# Fix 2: Actor.decide() β€” reject UNKNOWN/empty actions
# ═══════════════════════════════════════════════════════════════

def _patch_actor_unknown_reject():
    """Prevent garbage UNKNOWN actions from propagating to environment."""
    from purpose_agent.actor import Actor
    from purpose_agent.types import Action

    original_decide = Actor.decide

    def safe_decide(self, purpose, current_state, history=None):
        action = original_decide(self, purpose, current_state, history)

        # Reject UNKNOWN/empty β€” safe fallback to DONE
        if not action.name or action.name == "UNKNOWN":
            logger.warning("Actor produced UNKNOWN action β€” falling back to DONE")
            return Action(
                name="DONE",
                params={},
                thought="[SRE] Failed to parse a valid action. Stopping safely.",
                expected_delta="",
            )

        # Ensure params is always a dict (never None)
        if not isinstance(action.params, dict):
            action.params = {}

        return action

    Actor.decide = safe_decide


# ═══════════════════════════════════════════════════════════════
# Fix 3: Actor heuristic cap β€” max K=10 in prompt
# ═══════════════════════════════════════════════════════════════

def _patch_actor_heuristic_cap():
    """Prevent context window overflow from unbounded heuristic injection."""
    from purpose_agent.actor import Actor

    MAX_STRATEGIC = 5   # Max strategic heuristics in prompt
    MAX_PROCEDURAL = 5  # Max procedural SOPs in prompt

    original_format_strategic = Actor._format_strategic_memory

    def capped_format_strategic(self):
        if not self.strategic_memory:
            return "None yet β€” this is your first task."
        # Cap: only top K by Q-value
        top = sorted(self.strategic_memory, key=lambda x: -x.q_value)[:MAX_STRATEGIC]
        lines = []
        for h in top:
            lines.append(f"- When: {h.pattern}\n  Do: {h.strategy}")
        if len(self.strategic_memory) > MAX_STRATEGIC:
            lines.append(f"  ({len(self.strategic_memory) - MAX_STRATEGIC} more available)")
        return "\n".join(lines)

    original_format_procedural = Actor._format_procedural_memory

    def capped_format_procedural(self):
        if not self.procedural_memory:
            return "No procedures available."
        top = sorted(self.procedural_memory, key=lambda x: -x.q_value)[:MAX_PROCEDURAL]
        lines = ["Available procedures:"]
        for h in top:
            lines.append(f"- {h.pattern}: {h.strategy[:80]}")
        return "\n".join(lines)

    Actor._format_strategic_memory = capped_format_strategic
    Actor._format_procedural_memory = capped_format_procedural


# ═══════════════════════════════════════════════════════════════
# Fix 4: ExperienceReplay β€” fine-grained threading lock
# ═══════════════════════════════════════════════════════════════

def _patch_experience_replay_lock():
    """Add thread lock to ExperienceReplay mutations for swarm() safety."""
    from purpose_agent.experience_replay import ExperienceReplay

    # Add a lock to all instances
    _lock = threading.Lock()

    original_add = ExperienceReplay.add

    def locked_add(self, trajectory):
        with _lock:
            return original_add(self, trajectory)

    original_update_q = ExperienceReplay.update_q_value

    def locked_update_q(self, record_id, reward, alpha=0.1):
        with _lock:
            return original_update_q(self, record_id, reward, alpha)

    ExperienceReplay.add = locked_add
    ExperienceReplay.update_q_value = locked_update_q


# ═══════════════════════════════════════════════════════════════
# Fix 5: Trajectory β€” guard against None scores
# ═══════════════════════════════════════════════════════════════

def _patch_trajectory_none_guard():
    """Prevent TypeError when score is None in trajectory calculations."""
    from purpose_agent.types import Trajectory

    @property
    def safe_cumulative_reward(self) -> float:
        """Sum of positive deltas, guarding against None scores."""
        total = 0.0
        for s in self.steps:
            if s.score is not None and s.score.delta is not None and s.score.delta > 0:
                total += s.score.delta
        return total

    @property
    def safe_total_delta(self) -> float:
        """Net improvement, guarding against None scores."""
        total = 0.0
        for s in self.steps:
            if s.score is not None and s.score.delta is not None:
                total += s.score.delta
        return total

    @property
    def safe_success_rate(self) -> float:
        """Fraction of steps that improved, guarding against None."""
        scored = [s for s in self.steps if s.score is not None and s.score.delta is not None]
        if not scored:
            return 0.0
        return sum(1 for s in scored if s.score.improved) / len(scored)

    @property
    def safe_final_phi(self) -> float | None:
        """Final Ξ¦, guarding against None."""
        scored = [s for s in self.steps if s.score is not None]
        if not scored:
            return None
        return scored[-1].score.phi_after

    # Replace the properties
    Trajectory.cumulative_reward = safe_cumulative_reward
    Trajectory.total_delta = safe_total_delta
    Trajectory.success_rate = safe_success_rate
    Trajectory.final_phi = safe_final_phi


# Auto-apply on import
apply_all()