File size: 12,203 Bytes
3110b12 590e9f6 3110b12 4853f45 3110b12 4853f45 3110b12 2d933e9 3110b12 2d933e9 3110b12 0b10145 3110b12 0b10145 3110b12 0b10145 3110b12 0b10145 3110b12 0b10145 3110b12 0b10145 3110b12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 | """
Actor Module β The agent that executes actions in the environment.
Implements a ReAct-style (Reason + Act) loop where each step produces:
1. Thought: Chain-of-thought reasoning about the current state
2. Action: What to do next (name + params)
3. Expected Delta: What the actor predicts will change
The Actor's system prompt is dynamically composed from:
- Base instructions (static)
- Strategic memory heuristics (updated after each task β from MUSE)
- Retrieved procedural SOPs (fetched on demand β from MUSE)
- Tool-level "muscle memory" (returned with each observation β from MUSE)
This module is intentionally stateless between tasks β all learning happens
via the memory system that feeds into the prompt.
"""
from __future__ import annotations
import json
import logging
from typing import Any
from purpose_agent.types import Action, Heuristic, MemoryTier, State
from purpose_agent.llm_backend import ChatMessage, LLMBackend
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# System Prompt Templates
# ---------------------------------------------------------------------------
ACTOR_SYSTEM_PROMPT = """\
You are a goal-directed agent. Your purpose is to achieve the stated goal
by taking incremental actions that each move the state closer to the goal.
## Your Decision Process
For each step, you MUST:
1. THINK: Analyze the current state. What has been achieved? What remains?
2. ACT: Choose the single best next action from available actions.
3. PREDICT: State specifically what you expect to change after this action.
## Rules
- Take ONE action per step. Never skip ahead or combine actions.
- Be specific in your predictions β name exact state fields you expect to change.
- If a previous action didn't produce the expected result, adapt your strategy.
- If you believe the goal is achieved, use action "DONE" with no parameters.
## Available Actions
{available_actions}
## Learned Strategies (from past experience)
{strategic_memory}
## Relevant Procedures
{procedural_memory}
"""
ACTOR_STEP_PROMPT = """\
## Current Goal
{purpose}
## Current State
{state}
## Action History (last {history_window} steps)
{history}
## Tool Tips
{tool_memory}
Based on the current state and your goal, decide your next action.
Respond in this format:
```toml
thought = "Your reasoning about the current state and what to do next"
expected_delta = "What you expect to change"
[action]
name = "action_name"
[action.params]
param1 = "value1"
```
"""
class Actor:
"""
The Actor agent β executes actions in an environment.
The Actor does NOT evaluate its own performance. That's the Purpose
Function's job. The Actor just reasons, acts, and predicts.
Architecture notes (from MUSE arxiv:2510.08002):
- System prompt is composed dynamically from 3-tier memory
- Strategic memory is always present (global dilemmas β strategies)
- Procedural memory is lazy-loaded (index in prompt, details on demand)
- Tool memory is returned per-step (dynamic instructions with observations)
Args:
llm: The LLM backend to use for reasoning
available_actions: Dict of {action_name: description} the agent can take
history_window: How many past steps to include in the prompt
strategic_memory: List of strategic heuristics (loaded at task start)
procedural_memory: List of procedural SOPs (indexed, fetched on demand)
tool_memory: Dict of {action_name: dynamic_tip} (updated per-step)
"""
def __init__(
self,
llm: LLMBackend,
available_actions: dict[str, str] | None = None,
history_window: int = 5,
strategic_memory: list[Heuristic] | None = None,
procedural_memory: list[Heuristic] | None = None,
tool_memory: dict[str, str] | None = None,
):
self.llm = llm
self.available_actions = available_actions or {"DONE": "Signal that the goal is achieved"}
self.history_window = history_window
self.strategic_memory = strategic_memory or []
self.procedural_memory = procedural_memory or []
self.tool_memory = tool_memory or {}
# ------------------------------------------------------------------
# Prompt Composition
# ------------------------------------------------------------------
def _format_actions(self) -> str:
if not self.available_actions:
return "No specific action constraints. You may take any action."
lines = []
for name, desc in self.available_actions.items():
lines.append(f"- **{name}**: {desc}")
return "\n".join(lines)
def _format_strategic_memory(self) -> str:
if not self.strategic_memory:
return "None yet β this is your first task."
lines = []
for h in sorted(self.strategic_memory, key=lambda x: -x.q_value):
lines.append(f"- When: {h.pattern}\n Do: {h.strategy} (confidence: {h.q_value:.2f})")
return "\n".join(lines)
def _format_procedural_memory(self) -> str:
if not self.procedural_memory:
return "No standard operating procedures available."
lines = ["Available SOPs (ask for details if relevant):"]
for h in self.procedural_memory:
lines.append(f"- [{h.id}] {h.pattern}: {h.strategy}")
return "\n".join(lines)
def _format_tool_memory(self) -> str:
if not self.tool_memory:
return "No tool-specific tips available."
lines = []
for action_name, tip in self.tool_memory.items():
lines.append(f"- **{action_name}**: {tip}")
return "\n".join(lines)
def _format_history(self, history: list[dict[str, Any]]) -> str:
if not history:
return "No actions taken yet."
recent = history[-self.history_window:]
lines = []
for i, entry in enumerate(recent):
step_num = len(history) - len(recent) + i + 1
lines.append(
f"Step {step_num}: Action={entry.get('action', 'N/A')}, "
f"Result={entry.get('result', 'N/A')[:200]}"
)
return "\n".join(lines)
def _build_system_prompt(self) -> str:
return ACTOR_SYSTEM_PROMPT.format(
available_actions=self._format_actions(),
strategic_memory=self._format_strategic_memory(),
procedural_memory=self._format_procedural_memory(),
)
def _build_step_prompt(
self, purpose: str, state: State, history: list[dict[str, Any]]
) -> str:
return ACTOR_STEP_PROMPT.format(
purpose=purpose,
state=state.describe(),
history=self._format_history(history),
tool_memory=self._format_tool_memory(),
history_window=self.history_window,
)
# ------------------------------------------------------------------
# Core Action Generation
# ------------------------------------------------------------------
def decide(
self,
purpose: str,
current_state: State,
history: list[dict[str, Any]] | None = None,
) -> Action:
"""
Given the current state and purpose, decide the next action.
Returns an Action with thought, name, params, and expected_delta.
"""
history = history or []
messages = [
ChatMessage(role="system", content=self._build_system_prompt()),
ChatMessage(role="user", content=self._build_step_prompt(
purpose=purpose,
state=current_state,
history=history,
)),
]
# Universal parsing: try structured output, fall back to robust text parser
from purpose_agent.robust_parser import parse_actor_response
try:
result = self.llm.generate_structured(messages, schema={
"type": "object",
"properties": {
"thought": {"type": "string"},
"action": {"type": "object", "properties": {"name": {"type": "string"}, "params": {"type": "object"}}, "required": ["name"]},
"expected_delta": {"type": "string"},
},
"required": ["thought", "action", "expected_delta"],
})
except Exception:
# Structured output not available β use universal text parser
raw = self.llm.generate(messages, temperature=0.7, max_tokens=2000)
result = parse_actor_response(raw)
action_data = result.get("action", {})
if isinstance(action_data, str):
action_data = {"name": action_data, "params": {}}
return Action(
name=action_data.get("name", "UNKNOWN") if isinstance(action_data, dict) else str(action_data),
params=action_data.get("params", {}) if isinstance(action_data, dict) else {},
thought=result.get("thought", ""),
expected_delta=result.get("expected_delta", ""),
)
# ------------------------------------------------------------------
# Memory Updates (called by Orchestrator between tasks)
# ------------------------------------------------------------------
def update_strategic_memory(self, heuristics: list[Heuristic]) -> None:
"""Replace strategic memory with updated heuristics."""
self.strategic_memory = [
h for h in heuristics if h.tier == MemoryTier.STRATEGIC
]
logger.info(f"Actor strategic memory updated: {len(self.strategic_memory)} heuristics")
def update_procedural_memory(self, heuristics: list[Heuristic]) -> None:
"""Update the procedural SOP index."""
self.procedural_memory = [
h for h in heuristics if h.tier == MemoryTier.PROCEDURAL
]
logger.info(f"Actor procedural memory updated: {len(self.procedural_memory)} SOPs")
def update_tool_memory(self, tips: dict[str, str]) -> None:
"""Update per-action tool tips."""
self.tool_memory.update(tips)
logger.info(f"Actor tool memory updated: {list(tips.keys())}")
# ------------------------------------------------------------------
# Fallback Text Parser
# ------------------------------------------------------------------
@staticmethod
def _parse_action_text(raw: str) -> dict[str, Any]:
"""Best-effort extraction of action JSON from free-form text."""
import re
# Strategy 1: Try json.loads on the entire response (works if LLM outputs pure JSON)
text = raw.strip()
try:
return json.loads(text)
except (json.JSONDecodeError, ValueError):
pass
# Strategy 2: Extract JSON from markdown code blocks
code_match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', text, re.DOTALL)
if code_match:
try:
return json.loads(code_match.group(1))
except (json.JSONDecodeError, ValueError):
pass
# Strategy 3: Find the outermost { ... } by brace matching
start = text.find('{')
if start >= 0:
depth = 0
for i in range(start, len(text)):
if text[i] == '{': depth += 1
elif text[i] == '}': depth -= 1
if depth == 0:
try:
return json.loads(text[start:i+1])
except (json.JSONDecodeError, ValueError):
break
# Strategy 4: Extract key fields by regex
thought = ""
thought_match = re.search(r'"thought"\s*:\s*"((?:[^"\\]|\\.)*)"', text)
if thought_match:
thought = thought_match.group(1)
action_name = "UNKNOWN"
name_match = re.search(r'"name"\s*:\s*"([^"]*)"', text)
if name_match:
action_name = name_match.group(1)
return {
"thought": thought or raw[:200],
"action": {"name": action_name, "params": {}},
"expected_delta": "Unable to parse prediction",
}
|