Rohan03
/

purpose-agent

+"""
+state_delta.py — Markovian State-Delta Evaluation.
+PROBLEM: Passing full conversation history to the Critic costs O(N²) tokens
+as the trajectory grows. This makes long tasks unaffordable.
+SOLUTION: Recognize this IS a Markov Decision Process. The Critic only needs:
+  - The delta between state(t) and state(t+1)
+  - The action taken
+  - The purpose (constant)
+This reduces the Critic's input to ~300 tokens regardless of trajectory length.
+Mathematical basis:
+  In an MDP, the value of state s' depends only on s' itself, not on the path
+  taken to reach it. Therefore Φ(s') can be computed from s' alone.
+  We compute: diff = state_diff(s_t, s_{t+1})
+  Critic sees: (purpose, action, diff) → Φ score
+  Token cost: O(1) per step, not O(N)
+"""
+from __future__ import annotations
+import difflib
+import json
+from dataclasses import dataclass
+from typing import Any
+from purpose_agent.types import State
+@dataclass
+class StateDelta:
+    """
+    The computed difference between two states.
+    This is ALL the Critic needs to see — not the full history.
+    Keeps token cost constant (~100-300 tokens) regardless of trajectory length.
+    """
+    added_keys: dict[str, Any]      # New keys in s_{t+1} that weren't in s_t
+    removed_keys: list[str]          # Keys in s_t that are gone in s_{t+1}
+    changed_keys: dict[str, tuple[Any, Any]]  # key → (old_value, new_value)
+    summary_diff: str                # Human-readable diff (what the Critic sees)
+    token_estimate: int = 0          # Estimated tokens for this delta
+    @property
+    def is_empty(self) -> bool:
+        """No state change occurred."""
+        return not self.added_keys and not self.removed_keys and not self.changed_keys
+    @property
+    def change_magnitude(self) -> int:
+        """How much changed — useful for detecting no-ops."""
+        return len(self.added_keys) + len(self.removed_keys) + len(self.changed_keys)
+def compute_state_delta(s_before: State, s_after: State) -> StateDelta:
+    """
+    Compute the minimal diff between two states.
+    This is the core O(1) optimization: instead of passing both full states
+    to the Critic (which grows with trajectory), we pass only what changed.
+    Token cost: ~100-300 tokens regardless of state size or trajectory length.
+    """
+    d_before = s_before.data
+    d_after = s_after.data
+    added = {}
+    removed = []
+    changed = {}
+    # Find additions and changes
+    for key, val in d_after.items():
+        if key.startswith("_"):  # Skip internal keys
+            continue
+        if key not in d_before:
+            added[key] = val
+        elif d_before[key] != val:
+            changed[key] = (d_before[key], val)
+    # Find removals
+    for key in d_before:
+        if key.startswith("_"):
+            continue
+        if key not in d_after:
+            removed.append(key)
+    # Build human-readable summary (what the Critic actually sees)
+    lines = []
+    if added:
+        for k, v in added.items():
+            lines.append(f"+ {k} = {_truncate(v)}")
+    if removed:
+        for k in removed:
+            lines.append(f"- {k} (removed)")
+    if changed:
+        for k, (old, new) in changed.items():
+            lines.append(f"~ {k}: {_truncate(old)} → {_truncate(new)}")
+    # If summaries available, use text diff
+    if s_before.summary and s_after.summary and s_before.summary != s_after.summary:
+        text_diff = _text_diff(s_before.summary, s_after.summary)
+        if text_diff and not lines:
+            lines.append(text_diff)
+    summary = "\n".join(lines) if lines else "(no observable change)"
+    token_est = len(summary) // 4  # ~4 chars per token
+    return StateDelta(
+        added_keys=added,
+        removed_keys=removed,
+        changed_keys=changed,
+        summary_diff=summary,
+        token_estimate=token_est,
+    )
+def format_critic_input(
+    purpose: str,
+    action_name: str,
+    action_thought: str,
+    delta: StateDelta,
+    max_tokens: int = 300,
+) -> str:
+    """
+    Format the minimal input for the Markovian Critic.
+    Total budget: ~300 tokens. This is ALL the Critic gets.
+    No history. No full state. Just: purpose + action + what changed.
+    This is the fundamental insight: in an MDP, Φ(s') depends only on s',
+    not on the path taken to reach it.
+    """
+    parts = [
+        f"PURPOSE: {purpose[:100]}",
+        f"ACTION: {action_name}",
+        f"THOUGHT: {action_thought[:80]}",
+        f"STATE CHANGE:\n{delta.summary_diff[:400]}",
+    ]
+    text = "\n".join(parts)
+    # Hard cap
+    char_budget = max_tokens * 4
+    if len(text) > char_budget:
+        text = text[:char_budget] + "\n...(truncated)"
+    return text
+def _truncate(val: Any, max_len: int = 80) -> str:
+    """Truncate a value for display."""
+    s = str(val)
+    return s[:max_len] + "..." if len(s) > max_len else s
+def _text_diff(before: str, after: str) -> str:
+    """Compute a concise text diff between two summaries."""
+    if before == after:
+        return ""
+    # Use unified diff for short texts
+    before_lines = before.split("\n")
+    after_lines = after.split("\n")
+    diff_lines = []
+    for line in difflib.unified_diff(before_lines, after_lines, lineterm="", n=0):
+        if line.startswith("---") or line.startswith("+++") or line.startswith("@@"):
+            continue
+        if line.startswith("+"):
+            diff_lines.append(f"+ {line[1:]}")
+        elif line.startswith("-"):
+            diff_lines.append(f"- {line[1:]}")
+    return "\n".join(diff_lines[:5])  # Cap at 5 diff lines