File size: 12,203 Bytes

"""
Actor Module — The agent that executes actions in the environment.

Implements a ReAct-style (Reason + Act) loop where each step produces:
  1. Thought: Chain-of-thought reasoning about the current state
  2. Action: What to do next (name + params)
  3. Expected Delta: What the actor predicts will change

The Actor's system prompt is dynamically composed from:
  - Base instructions (static)
  - Strategic memory heuristics (updated after each task — from MUSE)
  - Retrieved procedural SOPs (fetched on demand — from MUSE)
  - Tool-level "muscle memory" (returned with each observation — from MUSE)

This module is intentionally stateless between tasks — all learning happens
via the memory system that feeds into the prompt.
"""

from __future__ import annotations

import json
import logging
from typing import Any

from purpose_agent.types import Action, Heuristic, MemoryTier, State
from purpose_agent.llm_backend import ChatMessage, LLMBackend

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# System Prompt Templates
# ---------------------------------------------------------------------------

ACTOR_SYSTEM_PROMPT = """\
You are a goal-directed agent. Your purpose is to achieve the stated goal
by taking incremental actions that each move the state closer to the goal.

## Your Decision Process
For each step, you MUST:
1. THINK: Analyze the current state. What has been achieved? What remains?
2. ACT: Choose the single best next action from available actions.
3. PREDICT: State specifically what you expect to change after this action.

## Rules
- Take ONE action per step. Never skip ahead or combine actions.
- Be specific in your predictions — name exact state fields you expect to change.
- If a previous action didn't produce the expected result, adapt your strategy.
- If you believe the goal is achieved, use action "DONE" with no parameters.

## Available Actions
{available_actions}

## Learned Strategies (from past experience)
{strategic_memory}

## Relevant Procedures
{procedural_memory}
"""

ACTOR_STEP_PROMPT = """\
## Current Goal
{purpose}

## Current State
{state}

## Action History (last {history_window} steps)
{history}

## Tool Tips
{tool_memory}

Based on the current state and your goal, decide your next action.

Respond in this format:
```toml
thought = "Your reasoning about the current state and what to do next"
expected_delta = "What you expect to change"

[action]
name = "action_name"

[action.params]
param1 = "value1"
```
"""


class Actor:
    """
    The Actor agent — executes actions in an environment.
    
    The Actor does NOT evaluate its own performance. That's the Purpose
    Function's job. The Actor just reasons, acts, and predicts.
    
    Architecture notes (from MUSE arxiv:2510.08002):
    - System prompt is composed dynamically from 3-tier memory
    - Strategic memory is always present (global dilemmas → strategies)
    - Procedural memory is lazy-loaded (index in prompt, details on demand)
    - Tool memory is returned per-step (dynamic instructions with observations)
    
    Args:
        llm: The LLM backend to use for reasoning
        available_actions: Dict of {action_name: description} the agent can take
        history_window: How many past steps to include in the prompt
        strategic_memory: List of strategic heuristics (loaded at task start)
        procedural_memory: List of procedural SOPs (indexed, fetched on demand)
        tool_memory: Dict of {action_name: dynamic_tip} (updated per-step)
    """

    def __init__(
        self,
        llm: LLMBackend,
        available_actions: dict[str, str] | None = None,
        history_window: int = 5,
        strategic_memory: list[Heuristic] | None = None,
        procedural_memory: list[Heuristic] | None = None,
        tool_memory: dict[str, str] | None = None,
    ):
        self.llm = llm
        self.available_actions = available_actions or {"DONE": "Signal that the goal is achieved"}
        self.history_window = history_window
        self.strategic_memory = strategic_memory or []
        self.procedural_memory = procedural_memory or []
        self.tool_memory = tool_memory or {}

    # ------------------------------------------------------------------
    # Prompt Composition
    # ------------------------------------------------------------------

    def _format_actions(self) -> str:
        if not self.available_actions:
            return "No specific action constraints. You may take any action."
        lines = []
        for name, desc in self.available_actions.items():
            lines.append(f"- **{name}**: {desc}")
        return "\n".join(lines)

    def _format_strategic_memory(self) -> str:
        if not self.strategic_memory:
            return "None yet — this is your first task."
        lines = []
        for h in sorted(self.strategic_memory, key=lambda x: -x.q_value):
            lines.append(f"- When: {h.pattern}\n  Do: {h.strategy} (confidence: {h.q_value:.2f})")
        return "\n".join(lines)

    def _format_procedural_memory(self) -> str:
        if not self.procedural_memory:
            return "No standard operating procedures available."
        lines = ["Available SOPs (ask for details if relevant):"]
        for h in self.procedural_memory:
            lines.append(f"- [{h.id}] {h.pattern}: {h.strategy}")
        return "\n".join(lines)

    def _format_tool_memory(self) -> str:
        if not self.tool_memory:
            return "No tool-specific tips available."
        lines = []
        for action_name, tip in self.tool_memory.items():
            lines.append(f"- **{action_name}**: {tip}")
        return "\n".join(lines)

    def _format_history(self, history: list[dict[str, Any]]) -> str:
        if not history:
            return "No actions taken yet."
        recent = history[-self.history_window:]
        lines = []
        for i, entry in enumerate(recent):
            step_num = len(history) - len(recent) + i + 1
            lines.append(
                f"Step {step_num}: Action={entry.get('action', 'N/A')}, "
                f"Result={entry.get('result', 'N/A')[:200]}"
            )
        return "\n".join(lines)

    def _build_system_prompt(self) -> str:
        return ACTOR_SYSTEM_PROMPT.format(
            available_actions=self._format_actions(),
            strategic_memory=self._format_strategic_memory(),
            procedural_memory=self._format_procedural_memory(),
        )

    def _build_step_prompt(
        self, purpose: str, state: State, history: list[dict[str, Any]]
    ) -> str:
        return ACTOR_STEP_PROMPT.format(
            purpose=purpose,
            state=state.describe(),
            history=self._format_history(history),
            tool_memory=self._format_tool_memory(),
            history_window=self.history_window,
        )

    # ------------------------------------------------------------------
    # Core Action Generation
    # ------------------------------------------------------------------

    def decide(
        self,
        purpose: str,
        current_state: State,
        history: list[dict[str, Any]] | None = None,
    ) -> Action:
        """
        Given the current state and purpose, decide the next action.
        
        Returns an Action with thought, name, params, and expected_delta.
        """
        history = history or []

        messages = [
            ChatMessage(role="system", content=self._build_system_prompt()),
            ChatMessage(role="user", content=self._build_step_prompt(
                purpose=purpose,
                state=current_state,
                history=history,
            )),
        ]

        # Universal parsing: try structured output, fall back to robust text parser
        from purpose_agent.robust_parser import parse_actor_response

        try:
            result = self.llm.generate_structured(messages, schema={
                "type": "object",
                "properties": {
                    "thought": {"type": "string"},
                    "action": {"type": "object", "properties": {"name": {"type": "string"}, "params": {"type": "object"}}, "required": ["name"]},
                    "expected_delta": {"type": "string"},
                },
                "required": ["thought", "action", "expected_delta"],
            })
        except Exception:
            # Structured output not available — use universal text parser
            raw = self.llm.generate(messages, temperature=0.7, max_tokens=2000)
            result = parse_actor_response(raw)

        action_data = result.get("action", {})
        if isinstance(action_data, str):
            action_data = {"name": action_data, "params": {}}
        return Action(
            name=action_data.get("name", "UNKNOWN") if isinstance(action_data, dict) else str(action_data),
            params=action_data.get("params", {}) if isinstance(action_data, dict) else {},
            thought=result.get("thought", ""),
            expected_delta=result.get("expected_delta", ""),
        )

    # ------------------------------------------------------------------
    # Memory Updates (called by Orchestrator between tasks)
    # ------------------------------------------------------------------

    def update_strategic_memory(self, heuristics: list[Heuristic]) -> None:
        """Replace strategic memory with updated heuristics."""
        self.strategic_memory = [
            h for h in heuristics if h.tier == MemoryTier.STRATEGIC
        ]
        logger.info(f"Actor strategic memory updated: {len(self.strategic_memory)} heuristics")

    def update_procedural_memory(self, heuristics: list[Heuristic]) -> None:
        """Update the procedural SOP index."""
        self.procedural_memory = [
            h for h in heuristics if h.tier == MemoryTier.PROCEDURAL
        ]
        logger.info(f"Actor procedural memory updated: {len(self.procedural_memory)} SOPs")

    def update_tool_memory(self, tips: dict[str, str]) -> None:
        """Update per-action tool tips."""
        self.tool_memory.update(tips)
        logger.info(f"Actor tool memory updated: {list(tips.keys())}")

    # ------------------------------------------------------------------
    # Fallback Text Parser
    # ------------------------------------------------------------------

    @staticmethod
    def _parse_action_text(raw: str) -> dict[str, Any]:
        """Best-effort extraction of action JSON from free-form text."""
        import re

        # Strategy 1: Try json.loads on the entire response (works if LLM outputs pure JSON)
        text = raw.strip()
        try:
            return json.loads(text)
        except (json.JSONDecodeError, ValueError):
            pass

        # Strategy 2: Extract JSON from markdown code blocks
        code_match = re.search(r'```(?:json)?\s*(\{.*\})\s*```', text, re.DOTALL)
        if code_match:
            try:
                return json.loads(code_match.group(1))
            except (json.JSONDecodeError, ValueError):
                pass

        # Strategy 3: Find the outermost { ... } by brace matching
        start = text.find('{')
        if start >= 0:
            depth = 0
            for i in range(start, len(text)):
                if text[i] == '{': depth += 1
                elif text[i] == '}': depth -= 1
                if depth == 0:
                    try:
                        return json.loads(text[start:i+1])
                    except (json.JSONDecodeError, ValueError):
                        break

        # Strategy 4: Extract key fields by regex
        thought = ""
        thought_match = re.search(r'"thought"\s*:\s*"((?:[^"\\]|\\.)*)"', text)
        if thought_match:
            thought = thought_match.group(1)

        action_name = "UNKNOWN"
        name_match = re.search(r'"name"\s*:\s*"([^"]*)"', text)
        if name_match:
            action_name = name_match.group(1)

        return {
            "thought": thought or raw[:200],
            "action": {"name": action_name, "params": {}},
            "expected_delta": "Unable to parse prediction",
        }