| """ | |
| purpose_agent — A Self-Improving Agentic Framework via State-Value Evaluation | |
| Architecture based on: | |
| - MUSE (arxiv:2510.08002): 3-tier hierarchical memory (strategic/procedural/tool) | |
| - LATS (arxiv:2310.04406): LLM-as-value-function V(s) = λ·LM_score + (1-λ)·SC_score | |
| - REMEMBERER (arxiv:2306.07929): Q-value experience replay with Bellman updates | |
| - Reflexion (arxiv:2303.11366): Verbal reinforcement via episodic self-reflection | |
| - SPC (arxiv:2504.19162): Anti-reward-hacking via adversarial critic patterns | |
| Core philosophy: The agent improves via a "Purpose Function" Φ(s) that evaluates | |
| intermediate state improvements (distance to goal) rather than binary outcome success. | |
| No real-time backprop — improvement comes from expanding external memory with | |
| learned heuristics extracted from high-reward trajectories. | |
| """ | |
| __version__ = "0.1.0" | |
| from purpose_agent.types import ( | |
| State, | |
| Action, | |
| Trajectory, | |
| TrajectoryStep, | |
| Heuristic, | |
| PurposeScore, | |
| MemoryRecord, | |
| ) | |
| from purpose_agent.llm_backend import LLMBackend, MockLLMBackend | |
| from purpose_agent.actor import Actor | |
| from purpose_agent.purpose_function import PurposeFunction | |
| from purpose_agent.experience_replay import ExperienceReplay | |
| from purpose_agent.optimizer import HeuristicOptimizer | |
| from purpose_agent.orchestrator import Orchestrator | |
| __all__ = [ | |
| "State", | |
| "Action", | |
| "Trajectory", | |
| "TrajectoryStep", | |
| "Heuristic", | |
| "PurposeScore", | |
| "MemoryRecord", | |
| "LLMBackend", | |
| "MockLLMBackend", | |
| "Actor", | |
| "PurposeFunction", | |
| "ExperienceReplay", | |
| "HeuristicOptimizer", | |
| "Orchestrator", | |
| ] | |