| """ |
| Actor Module β The agent that executes actions in the environment. |
| |
| Implements a ReAct-style (Reason + Act) loop where each step produces: |
| 1. Thought: Chain-of-thought reasoning about the current state |
| 2. Action: What to do next (name + params) |
| 3. Expected Delta: What the actor predicts will change |
| |
| The Actor's system prompt is dynamically composed from: |
| - Base instructions (static) |
| - Strategic memory heuristics (updated after each task β from MUSE) |
| - Retrieved procedural SOPs (fetched on demand β from MUSE) |
| - Tool-level "muscle memory" (returned with each observation β from MUSE) |
| |
| This module is intentionally stateless between tasks β all learning happens |
| via the memory system that feeds into the prompt. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import json |
| import logging |
| from typing import Any |
|
|
| from purpose_agent.types import Action, Heuristic, MemoryTier, State |
| from purpose_agent.llm_backend import ChatMessage, LLMBackend |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| |
| |
| |
|
|
| ACTOR_SYSTEM_PROMPT = """\ |
| You are a goal-directed agent. Your purpose is to achieve the stated goal |
| by taking incremental actions that each move the state closer to the goal. |
| |
| ## Your Decision Process |
| For each step, you MUST: |
| 1. THINK: Analyze the current state. What has been achieved? What remains? |
| 2. ACT: Choose the single best next action from available actions. |
| 3. PREDICT: State specifically what you expect to change after this action. |
| |
| ## Rules |
| - Take ONE action per step. Never skip ahead or combine actions. |
| - Be specific in your predictions β name exact state fields you expect to change. |
| - If a previous action didn't produce the expected result, adapt your strategy. |
| - If you believe the goal is achieved, use action "DONE" with no parameters. |
| |
| ## Available Actions |
| {available_actions} |
| |
| ## Learned Strategies (from past experience) |
| {strategic_memory} |
| |
| ## Relevant Procedures |
| {procedural_memory} |
| """ |
|
|
| ACTOR_STEP_PROMPT = """\ |
| ## Current Goal |
| {purpose} |
| |
| ## Current State |
| {state} |
| |
| ## Action History (last {history_window} steps) |
| {history} |
| |
| ## Tool Tips |
| {tool_memory} |
| |
| Based on the current state and your goal, decide your next action. |
| |
| Respond in this format: |
| ```toml |
| thought = "Your reasoning about the current state and what to do next" |
| expected_delta = "What you expect to change" |
| |
| [action] |
| name = "action_name" |
| |
| [action.params] |
| param1 = "value1" |
| ``` |
| """ |
|
|
|
|
| class Actor: |
| """ |
| The Actor agent β executes actions in an environment. |
| |
| The Actor does NOT evaluate its own performance. That's the Purpose |
| Function's job. The Actor just reasons, acts, and predicts. |
| |
| Architecture notes (from MUSE arxiv:2510.08002): |
| - System prompt is composed dynamically from 3-tier memory |
| - Strategic memory is always present (global dilemmas β strategies) |
| - Procedural memory is lazy-loaded (index in prompt, details on demand) |
| - Tool memory is returned per-step (dynamic instructions with observations) |
| |
| Args: |
| llm: The LLM backend to use for reasoning |
| available_actions: Dict of {action_name: description} the agent can take |
| history_window: How many past steps to include in the prompt |
| strategic_memory: List of strategic heuristics (loaded at task start) |
| procedural_memory: List of procedural SOPs (indexed, fetched on demand) |
| tool_memory: Dict of {action_name: dynamic_tip} (updated per-step) |
| """ |
|
|
| def __init__( |
| self, |
| llm: LLMBackend, |
| available_actions: dict[str, str] | None = None, |
| history_window: int = 5, |
| strategic_memory: list[Heuristic] | None = None, |
| procedural_memory: list[Heuristic] | None = None, |
| tool_memory: dict[str, str] | None = None, |
| ): |
| self.llm = llm |
| self.available_actions = available_actions or {"DONE": "Signal that the goal is achieved"} |
| self.history_window = history_window |
| self.strategic_memory = strategic_memory or [] |
| self.procedural_memory = procedural_memory or [] |
| self.tool_memory = tool_memory or {} |
|
|
| |
| |
| |
|
|
| def _format_actions(self) -> str: |
| if not self.available_actions: |
| return "No specific action constraints. You may take any action." |
| lines = [] |
| for name, desc in self.available_actions.items(): |
| lines.append(f"- **{name}**: {desc}") |
| return "\n".join(lines) |
|
|
| def _format_strategic_memory(self) -> str: |
| if not self.strategic_memory: |
| return "None yet β this is your first task." |
| lines = [] |
| for h in sorted(self.strategic_memory, key=lambda x: -x.q_value): |
| lines.append(f"- When: {h.pattern}\n Do: {h.strategy} (confidence: {h.q_value:.2f})") |
| return "\n".join(lines) |
|
|
| def _format_procedural_memory(self) -> str: |
| if not self.procedural_memory: |
| return "No standard operating procedures available." |
| lines = ["Available SOPs (ask for details if relevant):"] |
| for h in self.procedural_memory: |
| lines.append(f"- [{h.id}] {h.pattern}: {h.strategy}") |
| return "\n".join(lines) |
|
|
| def _format_tool_memory(self) -> str: |
| if not self.tool_memory: |
| return "No tool-specific tips available." |
| lines = [] |
| for action_name, tip in self.tool_memory.items(): |
| lines.append(f"- **{action_name}**: {tip}") |
| return "\n".join(lines) |
|
|
| def _format_history(self, history: list[dict[str, Any]]) -> str: |
| if not history: |
| return "No actions taken yet." |
| recent = history[-self.history_window:] |
| lines = [] |
| for i, entry in enumerate(recent): |
| step_num = len(history) - len(recent) + i + 1 |
| lines.append( |
| f"Step {step_num}: Action={entry.get('action', 'N/A')}, " |
| f"Result={entry.get('result', 'N/A')[:200]}" |
| ) |
| return "\n".join(lines) |
|
|
| def _build_system_prompt(self) -> str: |
| return ACTOR_SYSTEM_PROMPT.format( |
| available_actions=self._format_actions(), |
| strategic_memory=self._format_strategic_memory(), |
| procedural_memory=self._format_procedural_memory(), |
| ) |
|
|
| def _build_step_prompt( |
| self, purpose: str, state: State, history: list[dict[str, Any]] |
| ) -> str: |
| return ACTOR_STEP_PROMPT.format( |
| purpose=purpose, |
| state=state.describe(), |
| history=self._format_history(history), |
| tool_memory=self._format_tool_memory(), |
| history_window=self.history_window, |
| ) |
|
|
| |
| |
| |
|
|
| def decide( |
| self, |
| purpose: str, |
| current_state: State, |
| history: list[dict[str, Any]] | None = None, |
| ) -> Action: |
| """ |
| Given the current state and purpose, decide the next action. |
| |
| Returns an Action with thought, name, params, and expected_delta. |
| """ |
| history = history or [] |
|
|
| messages = [ |
| ChatMessage(role="system", content=self._build_system_prompt()), |
| ChatMessage(role="user", content=self._build_step_prompt( |
| purpose=purpose, |
| state=current_state, |
| history=history, |
| )), |
| ] |
|
|
| |
| from purpose_agent.robust_parser import parse_actor_response |
|
|
| try: |
| result = self.llm.generate_structured(messages, schema={ |
| "type": "object", |
| "properties": { |
| "thought": {"type": "string"}, |
| "action": {"type": "object", "properties": {"name": {"type": "string"}, "params": {"type": "object"}}, "required": ["name"]}, |
| "expected_delta": {"type": "string"}, |
| }, |
| "required": ["thought", "action", "expected_delta"], |
| }) |
| except Exception: |
| |
| raw = self.llm.generate(messages, temperature=0.7, max_tokens=2000) |
| result = parse_actor_response(raw) |
|
|
| action_data = result.get("action", {}) |
| if isinstance(action_data, str): |
| action_data = {"name": action_data, "params": {}} |
| return Action( |
| name=action_data.get("name", "UNKNOWN") if isinstance(action_data, dict) else str(action_data), |
| params=action_data.get("params", {}) if isinstance(action_data, dict) else {}, |
| thought=result.get("thought", ""), |
| expected_delta=result.get("expected_delta", ""), |
| ) |
|
|
| |
| |
| |
|
|
| def update_strategic_memory(self, heuristics: list[Heuristic]) -> None: |
| """Replace strategic memory with updated heuristics.""" |
| self.strategic_memory = [ |
| h for h in heuristics if h.tier == MemoryTier.STRATEGIC |
| ] |
| logger.info(f"Actor strategic memory updated: {len(self.strategic_memory)} heuristics") |
|
|
| def update_procedural_memory(self, heuristics: list[Heuristic]) -> None: |
| """Update the procedural SOP index.""" |
| self.procedural_memory = [ |
| h for h in heuristics if h.tier == MemoryTier.PROCEDURAL |
| ] |
| logger.info(f"Actor procedural memory updated: {len(self.procedural_memory)} SOPs") |
|
|
| def update_tool_memory(self, tips: dict[str, str]) -> None: |
| """Update per-action tool tips.""" |
| self.tool_memory.update(tips) |
| logger.info(f"Actor tool memory updated: {list(tips.keys())}") |
|
|
| |
| |
| |
|
|
| @staticmethod |
| def _parse_action_text(raw: str) -> dict[str, Any]: |
| """Best-effort extraction of action JSON from free-form text.""" |
| import re |
|
|
| |
| text = raw.strip() |
| try: |
| return json.loads(text) |
| except (json.JSONDecodeError, ValueError): |
| pass |
|
|
| |
| code_match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', text, re.DOTALL) |
| if code_match: |
| try: |
| return json.loads(code_match.group(1)) |
| except (json.JSONDecodeError, ValueError): |
| pass |
|
|
| |
| start = text.find('{') |
| if start >= 0: |
| depth = 0 |
| for i in range(start, len(text)): |
| if text[i] == '{': depth += 1 |
| elif text[i] == '}': depth -= 1 |
| if depth == 0: |
| try: |
| return json.loads(text[start:i+1]) |
| except (json.JSONDecodeError, ValueError): |
| break |
|
|
| |
| thought = "" |
| thought_match = re.search(r'"thought"\s*:\s*"((?:[^"\\]|\\.)*)"', text) |
| if thought_match: |
| thought = thought_match.group(1) |
|
|
| action_name = "UNKNOWN" |
| name_match = re.search(r'"name"\s*:\s*"([^"]*)"', text) |
| if name_match: |
| action_name = name_match.group(1) |
|
|
| return { |
| "thought": thought or raw[:200], |
| "action": {"name": action_name, "params": {}}, |
| "expected_delta": "Unable to parse prediction", |
| } |
|
|