Rohan03's picture
Add purpose_agent/actor.py
3110b12 verified
raw
history blame
11.6 kB
"""
Actor Module — The agent that executes actions in the environment.
Implements a ReAct-style (Reason + Act) loop where each step produces:
1. Thought: Chain-of-thought reasoning about the current state
2. Action: What to do next (name + params)
3. Expected Delta: What the actor predicts will change
The Actor's system prompt is dynamically composed from:
- Base instructions (static)
- Strategic memory heuristics (updated after each task — from MUSE)
- Retrieved procedural SOPs (fetched on demand — from MUSE)
- Tool-level "muscle memory" (returned with each observation — from MUSE)
This module is intentionally stateless between tasks — all learning happens
via the memory system that feeds into the prompt.
"""
from __future__ import annotations
import json
import logging
from typing import Any
from purpose_agent.types import Action, Heuristic, MemoryTier, State
from purpose_agent.llm_backend import ChatMessage, LLMBackend
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# System Prompt Templates
# ---------------------------------------------------------------------------
ACTOR_SYSTEM_PROMPT = """\
You are a goal-directed agent. Your purpose is to achieve the stated goal
by taking incremental actions that each move the state closer to the goal.
## Your Decision Process
For each step, you MUST:
1. THINK: Analyze the current state. What has been achieved? What remains?
2. ACT: Choose the single best next action from available actions.
3. PREDICT: State specifically what you expect to change after this action.
## Rules
- Take ONE action per step. Never skip ahead or combine actions.
- Be specific in your predictions — name exact state fields you expect to change.
- If a previous action didn't produce the expected result, adapt your strategy.
- If you believe the goal is achieved, use action "DONE" with no parameters.
## Available Actions
{available_actions}
## Learned Strategies (from past experience)
{strategic_memory}
## Relevant Procedures
{procedural_memory}
"""
ACTOR_STEP_PROMPT = """\
## Current Goal
{purpose}
## Current State
{state}
## Action History (last {history_window} steps)
{history}
## Tool Tips
{tool_memory}
Based on the current state and your goal, decide your next action.
Respond in this exact JSON format:
```json
{{
"thought": "Your reasoning about the current state and what to do next",
"action": {{
"name": "action_name",
"params": {{"param1": "value1"}}
}},
"expected_delta": "Specific prediction of what will change in the state"
}}
```
"""
class Actor:
"""
The Actor agent — executes actions in an environment.
The Actor does NOT evaluate its own performance. That's the Purpose
Function's job. The Actor just reasons, acts, and predicts.
Architecture notes (from MUSE arxiv:2510.08002):
- System prompt is composed dynamically from 3-tier memory
- Strategic memory is always present (global dilemmas → strategies)
- Procedural memory is lazy-loaded (index in prompt, details on demand)
- Tool memory is returned per-step (dynamic instructions with observations)
Args:
llm: The LLM backend to use for reasoning
available_actions: Dict of {action_name: description} the agent can take
history_window: How many past steps to include in the prompt
strategic_memory: List of strategic heuristics (loaded at task start)
procedural_memory: List of procedural SOPs (indexed, fetched on demand)
tool_memory: Dict of {action_name: dynamic_tip} (updated per-step)
"""
def __init__(
self,
llm: LLMBackend,
available_actions: dict[str, str] | None = None,
history_window: int = 5,
strategic_memory: list[Heuristic] | None = None,
procedural_memory: list[Heuristic] | None = None,
tool_memory: dict[str, str] | None = None,
):
self.llm = llm
self.available_actions = available_actions or {"DONE": "Signal that the goal is achieved"}
self.history_window = history_window
self.strategic_memory = strategic_memory or []
self.procedural_memory = procedural_memory or []
self.tool_memory = tool_memory or {}
# ------------------------------------------------------------------
# Prompt Composition
# ------------------------------------------------------------------
def _format_actions(self) -> str:
if not self.available_actions:
return "No specific action constraints. You may take any action."
lines = []
for name, desc in self.available_actions.items():
lines.append(f"- **{name}**: {desc}")
return "\n".join(lines)
def _format_strategic_memory(self) -> str:
if not self.strategic_memory:
return "None yet — this is your first task."
lines = []
for h in sorted(self.strategic_memory, key=lambda x: -x.q_value):
lines.append(f"- When: {h.pattern}\n Do: {h.strategy} (confidence: {h.q_value:.2f})")
return "\n".join(lines)
def _format_procedural_memory(self) -> str:
if not self.procedural_memory:
return "No standard operating procedures available."
lines = ["Available SOPs (ask for details if relevant):"]
for h in self.procedural_memory:
lines.append(f"- [{h.id}] {h.pattern}: {h.strategy}")
return "\n".join(lines)
def _format_tool_memory(self) -> str:
if not self.tool_memory:
return "No tool-specific tips available."
lines = []
for action_name, tip in self.tool_memory.items():
lines.append(f"- **{action_name}**: {tip}")
return "\n".join(lines)
def _format_history(self, history: list[dict[str, Any]]) -> str:
if not history:
return "No actions taken yet."
recent = history[-self.history_window:]
lines = []
for i, entry in enumerate(recent):
step_num = len(history) - len(recent) + i + 1
lines.append(
f"Step {step_num}: Action={entry.get('action', 'N/A')}, "
f"Result={entry.get('result', 'N/A')[:200]}"
)
return "\n".join(lines)
def _build_system_prompt(self) -> str:
return ACTOR_SYSTEM_PROMPT.format(
available_actions=self._format_actions(),
strategic_memory=self._format_strategic_memory(),
procedural_memory=self._format_procedural_memory(),
)
def _build_step_prompt(
self, purpose: str, state: State, history: list[dict[str, Any]]
) -> str:
return ACTOR_STEP_PROMPT.format(
purpose=purpose,
state=state.describe(),
history=self._format_history(history),
tool_memory=self._format_tool_memory(),
history_window=self.history_window,
)
# ------------------------------------------------------------------
# Core Action Generation
# ------------------------------------------------------------------
def decide(
self,
purpose: str,
current_state: State,
history: list[dict[str, Any]] | None = None,
) -> Action:
"""
Given the current state and purpose, decide the next action.
Returns an Action with thought, name, params, and expected_delta.
"""
history = history or []
messages = [
ChatMessage(role="system", content=self._build_system_prompt()),
ChatMessage(role="user", content=self._build_step_prompt(
purpose=purpose,
state=current_state,
history=history,
)),
]
# Try structured output first, fall back to text parsing
schema = {
"type": "object",
"properties": {
"thought": {"type": "string"},
"action": {
"type": "object",
"properties": {
"name": {"type": "string"},
"params": {"type": "object"},
},
"required": ["name"],
},
"expected_delta": {"type": "string"},
},
"required": ["thought", "action", "expected_delta"],
}
try:
result = self.llm.generate_structured(messages, schema=schema)
except Exception as e:
logger.warning(f"Structured output failed ({e}), falling back to text parse")
raw = self.llm.generate(messages, temperature=0.7)
result = self._parse_action_text(raw)
action_data = result.get("action", {})
return Action(
name=action_data.get("name", "UNKNOWN"),
params=action_data.get("params", {}),
thought=result.get("thought", ""),
expected_delta=result.get("expected_delta", ""),
)
# ------------------------------------------------------------------
# Memory Updates (called by Orchestrator between tasks)
# ------------------------------------------------------------------
def update_strategic_memory(self, heuristics: list[Heuristic]) -> None:
"""Replace strategic memory with updated heuristics."""
self.strategic_memory = [
h for h in heuristics if h.tier == MemoryTier.STRATEGIC
]
logger.info(f"Actor strategic memory updated: {len(self.strategic_memory)} heuristics")
def update_procedural_memory(self, heuristics: list[Heuristic]) -> None:
"""Update the procedural SOP index."""
self.procedural_memory = [
h for h in heuristics if h.tier == MemoryTier.PROCEDURAL
]
logger.info(f"Actor procedural memory updated: {len(self.procedural_memory)} SOPs")
def update_tool_memory(self, tips: dict[str, str]) -> None:
"""Update per-action tool tips."""
self.tool_memory.update(tips)
logger.info(f"Actor tool memory updated: {list(tips.keys())}")
# ------------------------------------------------------------------
# Fallback Text Parser
# ------------------------------------------------------------------
@staticmethod
def _parse_action_text(raw: str) -> dict[str, Any]:
"""Best-effort extraction of action JSON from free-form text."""
import re
# Try to find JSON block
json_match = re.search(r'\{[^{}]*"thought"[^{}]*\}', raw, re.DOTALL)
if json_match:
try:
return json.loads(json_match.group())
except json.JSONDecodeError:
pass
# Try to find JSON in code blocks
code_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', raw, re.DOTALL)
if code_match:
try:
return json.loads(code_match.group(1))
except json.JSONDecodeError:
pass
# Last resort: extract key-value pairs
thought = ""
thought_match = re.search(r'"thought"\s*:\s*"([^"]*)"', raw)
if thought_match:
thought = thought_match.group(1)
action_name = "UNKNOWN"
name_match = re.search(r'"name"\s*:\s*"([^"]*)"', raw)
if name_match:
action_name = name_match.group(1)
return {
"thought": thought or raw[:200],
"action": {"name": action_name, "params": {}},
"expected_delta": "Unable to parse prediction",
}