""" Actor Module — The agent that executes actions in the environment. Implements a ReAct-style (Reason + Act) loop where each step produces: 1. Thought: Chain-of-thought reasoning about the current state 2. Action: What to do next (name + params) 3. Expected Delta: What the actor predicts will change The Actor's system prompt is dynamically composed from: - Base instructions (static) - Strategic memory heuristics (updated after each task — from MUSE) - Retrieved procedural SOPs (fetched on demand — from MUSE) - Tool-level "muscle memory" (returned with each observation — from MUSE) This module is intentionally stateless between tasks — all learning happens via the memory system that feeds into the prompt. """ from __future__ import annotations import json import logging from typing import Any from purpose_agent.types import Action, Heuristic, MemoryTier, State from purpose_agent.llm_backend import ChatMessage, LLMBackend logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # System Prompt Templates # --------------------------------------------------------------------------- ACTOR_SYSTEM_PROMPT = """\ You are a goal-directed agent. Your purpose is to achieve the stated goal by taking incremental actions that each move the state closer to the goal. ## Your Decision Process For each step, you MUST: 1. THINK: Analyze the current state. What has been achieved? What remains? 2. ACT: Choose the single best next action from available actions. 3. PREDICT: State specifically what you expect to change after this action. ## Rules - Take ONE action per step. Never skip ahead or combine actions. - Be specific in your predictions — name exact state fields you expect to change. - If a previous action didn't produce the expected result, adapt your strategy. - If you believe the goal is achieved, use action "DONE" with no parameters. ## Available Actions {available_actions} ## Learned Strategies (from past experience) {strategic_memory} ## Relevant Procedures {procedural_memory} """ ACTOR_STEP_PROMPT = """\ ## Current Goal {purpose} ## Current State {state} ## Action History (last {history_window} steps) {history} ## Tool Tips {tool_memory} Based on the current state and your goal, decide your next action. Respond in this format: ```toml thought = "Your reasoning about the current state and what to do next" expected_delta = "What you expect to change" [action] name = "action_name" [action.params] param1 = "value1" ``` """ class Actor: """ The Actor agent — executes actions in an environment. The Actor does NOT evaluate its own performance. That's the Purpose Function's job. The Actor just reasons, acts, and predicts. Architecture notes (from MUSE arxiv:2510.08002): - System prompt is composed dynamically from 3-tier memory - Strategic memory is always present (global dilemmas → strategies) - Procedural memory is lazy-loaded (index in prompt, details on demand) - Tool memory is returned per-step (dynamic instructions with observations) Args: llm: The LLM backend to use for reasoning available_actions: Dict of {action_name: description} the agent can take history_window: How many past steps to include in the prompt strategic_memory: List of strategic heuristics (loaded at task start) procedural_memory: List of procedural SOPs (indexed, fetched on demand) tool_memory: Dict of {action_name: dynamic_tip} (updated per-step) """ def __init__( self, llm: LLMBackend, available_actions: dict[str, str] | None = None, history_window: int = 5, strategic_memory: list[Heuristic] | None = None, procedural_memory: list[Heuristic] | None = None, tool_memory: dict[str, str] | None = None, ): self.llm = llm self.available_actions = available_actions or {"DONE": "Signal that the goal is achieved"} self.history_window = history_window self.strategic_memory = strategic_memory or [] self.procedural_memory = procedural_memory or [] self.tool_memory = tool_memory or {} # ------------------------------------------------------------------ # Prompt Composition # ------------------------------------------------------------------ def _format_actions(self) -> str: if not self.available_actions: return "No specific action constraints. You may take any action." lines = [] for name, desc in self.available_actions.items(): lines.append(f"- **{name}**: {desc}") return "\n".join(lines) def _format_strategic_memory(self) -> str: if not self.strategic_memory: return "None yet — this is your first task." lines = [] for h in sorted(self.strategic_memory, key=lambda x: -x.q_value): lines.append(f"- When: {h.pattern}\n Do: {h.strategy} (confidence: {h.q_value:.2f})") return "\n".join(lines) def _format_procedural_memory(self) -> str: if not self.procedural_memory: return "No standard operating procedures available." lines = ["Available SOPs (ask for details if relevant):"] for h in self.procedural_memory: lines.append(f"- [{h.id}] {h.pattern}: {h.strategy}") return "\n".join(lines) def _format_tool_memory(self) -> str: if not self.tool_memory: return "No tool-specific tips available." lines = [] for action_name, tip in self.tool_memory.items(): lines.append(f"- **{action_name}**: {tip}") return "\n".join(lines) def _format_history(self, history: list[dict[str, Any]]) -> str: if not history: return "No actions taken yet." recent = history[-self.history_window:] lines = [] for i, entry in enumerate(recent): step_num = len(history) - len(recent) + i + 1 lines.append( f"Step {step_num}: Action={entry.get('action', 'N/A')}, " f"Result={entry.get('result', 'N/A')[:200]}" ) return "\n".join(lines) def _build_system_prompt(self) -> str: return ACTOR_SYSTEM_PROMPT.format( available_actions=self._format_actions(), strategic_memory=self._format_strategic_memory(), procedural_memory=self._format_procedural_memory(), ) def _build_step_prompt( self, purpose: str, state: State, history: list[dict[str, Any]] ) -> str: return ACTOR_STEP_PROMPT.format( purpose=purpose, state=state.describe(), history=self._format_history(history), tool_memory=self._format_tool_memory(), history_window=self.history_window, ) # ------------------------------------------------------------------ # Core Action Generation # ------------------------------------------------------------------ def decide( self, purpose: str, current_state: State, history: list[dict[str, Any]] | None = None, ) -> Action: """ Given the current state and purpose, decide the next action. Returns an Action with thought, name, params, and expected_delta. """ history = history or [] messages = [ ChatMessage(role="system", content=self._build_system_prompt()), ChatMessage(role="user", content=self._build_step_prompt( purpose=purpose, state=current_state, history=history, )), ] # Universal parsing: try structured output, fall back to robust text parser from purpose_agent.robust_parser import parse_actor_response try: result = self.llm.generate_structured(messages, schema={ "type": "object", "properties": { "thought": {"type": "string"}, "action": {"type": "object", "properties": {"name": {"type": "string"}, "params": {"type": "object"}}, "required": ["name"]}, "expected_delta": {"type": "string"}, }, "required": ["thought", "action", "expected_delta"], }) except Exception: # Structured output not available — use universal text parser raw = self.llm.generate(messages, temperature=0.7, max_tokens=2000) result = parse_actor_response(raw) action_data = result.get("action", {}) if isinstance(action_data, str): action_data = {"name": action_data, "params": {}} return Action( name=action_data.get("name", "UNKNOWN") if isinstance(action_data, dict) else str(action_data), params=action_data.get("params", {}) if isinstance(action_data, dict) else {}, thought=result.get("thought", ""), expected_delta=result.get("expected_delta", ""), ) # ------------------------------------------------------------------ # Memory Updates (called by Orchestrator between tasks) # ------------------------------------------------------------------ def update_strategic_memory(self, heuristics: list[Heuristic]) -> None: """Replace strategic memory with updated heuristics.""" self.strategic_memory = [ h for h in heuristics if h.tier == MemoryTier.STRATEGIC ] logger.info(f"Actor strategic memory updated: {len(self.strategic_memory)} heuristics") def update_procedural_memory(self, heuristics: list[Heuristic]) -> None: """Update the procedural SOP index.""" self.procedural_memory = [ h for h in heuristics if h.tier == MemoryTier.PROCEDURAL ] logger.info(f"Actor procedural memory updated: {len(self.procedural_memory)} SOPs") def update_tool_memory(self, tips: dict[str, str]) -> None: """Update per-action tool tips.""" self.tool_memory.update(tips) logger.info(f"Actor tool memory updated: {list(tips.keys())}") # ------------------------------------------------------------------ # Fallback Text Parser # ------------------------------------------------------------------ @staticmethod def _parse_action_text(raw: str) -> dict[str, Any]: """Best-effort extraction of action JSON from free-form text.""" import re # Strategy 1: Try json.loads on the entire response (works if LLM outputs pure JSON) text = raw.strip() try: return json.loads(text) except (json.JSONDecodeError, ValueError): pass # Strategy 2: Extract JSON from markdown code blocks code_match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', text, re.DOTALL) if code_match: try: return json.loads(code_match.group(1)) except (json.JSONDecodeError, ValueError): pass # Strategy 3: Find the outermost { ... } by brace matching start = text.find('{') if start >= 0: depth = 0 for i in range(start, len(text)): if text[i] == '{': depth += 1 elif text[i] == '}': depth -= 1 if depth == 0: try: return json.loads(text[start:i+1]) except (json.JSONDecodeError, ValueError): break # Strategy 4: Extract key fields by regex thought = "" thought_match = re.search(r'"thought"\s*:\s*"((?:[^"\\]|\\.)*)"', text) if thought_match: thought = thought_match.group(1) action_name = "UNKNOWN" name_match = re.search(r'"name"\s*:\s*"([^"]*)"', text) if name_match: action_name = name_match.group(1) return { "thought": thought or raw[:200], "action": {"name": action_name, "params": {}}, "expected_delta": "Unable to parse prediction", }