Add purpose_agent package init
Browse files- purpose_agent/__init__.py +50 -0
purpose_agent/__init__.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
purpose_agent — A Self-Improving Agentic Framework via State-Value Evaluation
|
| 3 |
+
|
| 4 |
+
Architecture based on:
|
| 5 |
+
- MUSE (arxiv:2510.08002): 3-tier hierarchical memory (strategic/procedural/tool)
|
| 6 |
+
- LATS (arxiv:2310.04406): LLM-as-value-function V(s) = λ·LM_score + (1-λ)·SC_score
|
| 7 |
+
- REMEMBERER (arxiv:2306.07929): Q-value experience replay with Bellman updates
|
| 8 |
+
- Reflexion (arxiv:2303.11366): Verbal reinforcement via episodic self-reflection
|
| 9 |
+
- SPC (arxiv:2504.19162): Anti-reward-hacking via adversarial critic patterns
|
| 10 |
+
|
| 11 |
+
Core philosophy: The agent improves via a "Purpose Function" Φ(s) that evaluates
|
| 12 |
+
intermediate state improvements (distance to goal) rather than binary outcome success.
|
| 13 |
+
No real-time backprop — improvement comes from expanding external memory with
|
| 14 |
+
learned heuristics extracted from high-reward trajectories.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
__version__ = "0.1.0"
|
| 18 |
+
|
| 19 |
+
from purpose_agent.types import (
|
| 20 |
+
State,
|
| 21 |
+
Action,
|
| 22 |
+
Trajectory,
|
| 23 |
+
TrajectoryStep,
|
| 24 |
+
Heuristic,
|
| 25 |
+
PurposeScore,
|
| 26 |
+
MemoryRecord,
|
| 27 |
+
)
|
| 28 |
+
from purpose_agent.llm_backend import LLMBackend, MockLLMBackend
|
| 29 |
+
from purpose_agent.actor import Actor
|
| 30 |
+
from purpose_agent.purpose_function import PurposeFunction
|
| 31 |
+
from purpose_agent.experience_replay import ExperienceReplay
|
| 32 |
+
from purpose_agent.optimizer import HeuristicOptimizer
|
| 33 |
+
from purpose_agent.orchestrator import Orchestrator
|
| 34 |
+
|
| 35 |
+
__all__ = [
|
| 36 |
+
"State",
|
| 37 |
+
"Action",
|
| 38 |
+
"Trajectory",
|
| 39 |
+
"TrajectoryStep",
|
| 40 |
+
"Heuristic",
|
| 41 |
+
"PurposeScore",
|
| 42 |
+
"MemoryRecord",
|
| 43 |
+
"LLMBackend",
|
| 44 |
+
"MockLLMBackend",
|
| 45 |
+
"Actor",
|
| 46 |
+
"PurposeFunction",
|
| 47 |
+
"ExperienceReplay",
|
| 48 |
+
"HeuristicOptimizer",
|
| 49 |
+
"Orchestrator",
|
| 50 |
+
]
|