Track 3: TOML prompts + PURPOSE_LEARNING.md whitepaper — purpose_agent/actor.py

590e9f6 verified 14 days ago

12.2 kB

	"""
	Actor Module — The agent that executes actions in the environment.

	Implements a ReAct-style (Reason + Act) loop where each step produces:
	1. Thought: Chain-of-thought reasoning about the current state
	2. Action: What to do next (name + params)
	3. Expected Delta: What the actor predicts will change

	The Actor's system prompt is dynamically composed from:
	- Base instructions (static)
	- Strategic memory heuristics (updated after each task — from MUSE)
	- Retrieved procedural SOPs (fetched on demand — from MUSE)
	- Tool-level "muscle memory" (returned with each observation — from MUSE)

	This module is intentionally stateless between tasks — all learning happens
	via the memory system that feeds into the prompt.
	"""

	from __future__ import annotations

	import json
	import logging
	from typing import Any

	from purpose_agent.types import Action, Heuristic, MemoryTier, State
	from purpose_agent.llm_backend import ChatMessage, LLMBackend

	logger = logging.getLogger(__name__)


	# ---------------------------------------------------------------------------
	# System Prompt Templates
	# ---------------------------------------------------------------------------

	ACTOR_SYSTEM_PROMPT = """\
	You are a goal-directed agent. Your purpose is to achieve the stated goal
	by taking incremental actions that each move the state closer to the goal.

	## Your Decision Process
	For each step, you MUST:
	1. THINK: Analyze the current state. What has been achieved? What remains?
	2. ACT: Choose the single best next action from available actions.
	3. PREDICT: State specifically what you expect to change after this action.

	## Rules
	- Take ONE action per step. Never skip ahead or combine actions.
	- Be specific in your predictions — name exact state fields you expect to change.
	- If a previous action didn't produce the expected result, adapt your strategy.
	- If you believe the goal is achieved, use action "DONE" with no parameters.

	## Available Actions
	{available_actions}

	## Learned Strategies (from past experience)
	{strategic_memory}

	## Relevant Procedures
	{procedural_memory}
	"""

	ACTOR_STEP_PROMPT = """\
	## Current Goal
	{purpose}

	## Current State
	{state}

	## Action History (last {history_window} steps)
	{history}

	## Tool Tips
	{tool_memory}

	Based on the current state and your goal, decide your next action.

	Respond in this format:
	```toml
	thought = "Your reasoning about the current state and what to do next"
	expected_delta = "What you expect to change"

	[action]
	name = "action_name"

	[action.params]
	param1 = "value1"
	```
	"""


	class Actor:
	"""
	The Actor agent — executes actions in an environment.

	The Actor does NOT evaluate its own performance. That's the Purpose
	Function's job. The Actor just reasons, acts, and predicts.

	Architecture notes (from MUSE arxiv:2510.08002):
	- System prompt is composed dynamically from 3-tier memory
	- Strategic memory is always present (global dilemmas → strategies)
	- Procedural memory is lazy-loaded (index in prompt, details on demand)
	- Tool memory is returned per-step (dynamic instructions with observations)

	Args:
	llm: The LLM backend to use for reasoning
	available_actions: Dict of {action_name: description} the agent can take
	history_window: How many past steps to include in the prompt
	strategic_memory: List of strategic heuristics (loaded at task start)
	procedural_memory: List of procedural SOPs (indexed, fetched on demand)
	tool_memory: Dict of {action_name: dynamic_tip} (updated per-step)
	"""

	def __init__(
	self,
	llm: LLMBackend,
	available_actions: dict[str, str] \| None = None,
	history_window: int = 5,
	strategic_memory: list[Heuristic] \| None = None,
	procedural_memory: list[Heuristic] \| None = None,
	tool_memory: dict[str, str] \| None = None,
	):
	self.llm = llm
	self.available_actions = available_actions or {"DONE": "Signal that the goal is achieved"}
	self.history_window = history_window
	self.strategic_memory = strategic_memory or []
	self.procedural_memory = procedural_memory or []
	self.tool_memory = tool_memory or {}

	# ------------------------------------------------------------------
	# Prompt Composition
	# ------------------------------------------------------------------

	def _format_actions(self) -> str:
	if not self.available_actions:
	return "No specific action constraints. You may take any action."
	lines = []
	for name, desc in self.available_actions.items():
	lines.append(f"- {name}: {desc}")
	return "\n".join(lines)

	def _format_strategic_memory(self) -> str:
	if not self.strategic_memory:
	return "None yet — this is your first task."
	lines = []
	for h in sorted(self.strategic_memory, key=lambda x: -x.q_value):
	lines.append(f"- When: {h.pattern}\n Do: {h.strategy} (confidence: {h.q_value:.2f})")
	return "\n".join(lines)

	def _format_procedural_memory(self) -> str:
	if not self.procedural_memory:
	return "No standard operating procedures available."
	lines = ["Available SOPs (ask for details if relevant):"]
	for h in self.procedural_memory:
	lines.append(f"- [{h.id}] {h.pattern}: {h.strategy}")
	return "\n".join(lines)

	def _format_tool_memory(self) -> str:
	if not self.tool_memory:
	return "No tool-specific tips available."
	lines = []
	for action_name, tip in self.tool_memory.items():
	lines.append(f"- {action_name}: {tip}")
	return "\n".join(lines)

	def _format_history(self, history: list[dict[str, Any]]) -> str:
	if not history:
	return "No actions taken yet."
	recent = history[-self.history_window:]
	lines = []
	for i, entry in enumerate(recent):
	step_num = len(history) - len(recent) + i + 1
	lines.append(
	f"Step {step_num}: Action={entry.get('action', 'N/A')}, "
	f"Result={entry.get('result', 'N/A')[:200]}"
	)
	return "\n".join(lines)

	def _build_system_prompt(self) -> str:
	return ACTOR_SYSTEM_PROMPT.format(
	available_actions=self._format_actions(),
	strategic_memory=self._format_strategic_memory(),
	procedural_memory=self._format_procedural_memory(),
	)

	def _build_step_prompt(
	self, purpose: str, state: State, history: list[dict[str, Any]]
	) -> str:
	return ACTOR_STEP_PROMPT.format(
	purpose=purpose,
	state=state.describe(),
	history=self._format_history(history),
	tool_memory=self._format_tool_memory(),
	history_window=self.history_window,
	)

	# ------------------------------------------------------------------
	# Core Action Generation
	# ------------------------------------------------------------------

	def decide(
	self,
	purpose: str,
	current_state: State,
	history: list[dict[str, Any]] \| None = None,
	) -> Action:
	"""
	Given the current state and purpose, decide the next action.

	Returns an Action with thought, name, params, and expected_delta.
	"""
	history = history or []

	messages = [
	ChatMessage(role="system", content=self._build_system_prompt()),
	ChatMessage(role="user", content=self._build_step_prompt(
	purpose=purpose,
	state=current_state,
	history=history,
	)),
	]

	# Universal parsing: try structured output, fall back to robust text parser
	from purpose_agent.robust_parser import parse_actor_response

	try:
	result = self.llm.generate_structured(messages, schema={
	"type": "object",
	"properties": {
	"thought": {"type": "string"},
	"action": {"type": "object", "properties": {"name": {"type": "string"}, "params": {"type": "object"}}, "required": ["name"]},
	"expected_delta": {"type": "string"},
	},
	"required": ["thought", "action", "expected_delta"],
	})
	except Exception:
	# Structured output not available — use universal text parser
	raw = self.llm.generate(messages, temperature=0.7, max_tokens=2000)
	result = parse_actor_response(raw)

	action_data = result.get("action", {})
	if isinstance(action_data, str):
	action_data = {"name": action_data, "params": {}}
	return Action(
	name=action_data.get("name", "UNKNOWN") if isinstance(action_data, dict) else str(action_data),
	params=action_data.get("params", {}) if isinstance(action_data, dict) else {},
	thought=result.get("thought", ""),
	expected_delta=result.get("expected_delta", ""),
	)

	# ------------------------------------------------------------------
	# Memory Updates (called by Orchestrator between tasks)
	# ------------------------------------------------------------------

	def update_strategic_memory(self, heuristics: list[Heuristic]) -> None:
	"""Replace strategic memory with updated heuristics."""
	self.strategic_memory = [
	h for h in heuristics if h.tier == MemoryTier.STRATEGIC
	]
	logger.info(f"Actor strategic memory updated: {len(self.strategic_memory)} heuristics")

	def update_procedural_memory(self, heuristics: list[Heuristic]) -> None:
	"""Update the procedural SOP index."""
	self.procedural_memory = [
	h for h in heuristics if h.tier == MemoryTier.PROCEDURAL
	]
	logger.info(f"Actor procedural memory updated: {len(self.procedural_memory)} SOPs")

	def update_tool_memory(self, tips: dict[str, str]) -> None:
	"""Update per-action tool tips."""
	self.tool_memory.update(tips)
	logger.info(f"Actor tool memory updated: {list(tips.keys())}")

	# ------------------------------------------------------------------
	# Fallback Text Parser
	# ------------------------------------------------------------------

	@staticmethod
	def _parse_action_text(raw: str) -> dict[str, Any]:
	"""Best-effort extraction of action JSON from free-form text."""
	import re

	# Strategy 1: Try json.loads on the entire response (works if LLM outputs pure JSON)
	text = raw.strip()
	try:
	return json.loads(text)
	except (json.JSONDecodeError, ValueError):
	pass

	# Strategy 2: Extract JSON from markdown code blocks
	code_match = re.search(r'```(?:json)?\s(\{.\})\s*```', text, re.DOTALL)
	if code_match:
	try:
	return json.loads(code_match.group(1))
	except (json.JSONDecodeError, ValueError):
	pass

	# Strategy 3: Find the outermost { ... } by brace matching
	start = text.find('{')
	if start >= 0:
	depth = 0
	for i in range(start, len(text)):
	if text[i] == '{': depth += 1
	elif text[i] == '}': depth -= 1
	if depth == 0:
	try:
	return json.loads(text[start:i+1])
	except (json.JSONDecodeError, ValueError):
	break

	# Strategy 4: Extract key fields by regex
	thought = ""
	thought_match = re.search(r'"thought"\s:\s"((?:[^"\\]\|\\.)*)"', text)
	if thought_match:
	thought = thought_match.group(1)

	action_name = "UNKNOWN"
	name_match = re.search(r'"name"\s:\s"([^"]*)"', text)
	if name_match:
	action_name = name_match.group(1)

	return {
	"thought": thought or raw[:200],
	"action": {"name": action_name, "params": {}},
	"expected_delta": "Unable to parse prediction",
	}