Rohan03 commited on
Commit
8f09a93
·
verified ·
1 Parent(s): bc30484

Sprint 10B: optimizer.py — agentic decision policy (improving/plateau/degrading/rollback)

Browse files
purpose_agent/optimization/optimizer.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ optimizer.py — Agentic optimization decision policy.
3
+
4
+ Monitors agent performance and decides what to optimize:
5
+ improving → continue (don't fix what works)
6
+ plateau → optimize prompts/skills/router first (epigenetic)
7
+ epigenetic_plateau → suggest LoRA/distillation only if ROI positive
8
+ degrading → rollback immediately
9
+
10
+ Never distills without eval data AND ROI check.
11
+ """
12
+ from __future__ import annotations
13
+ from dataclasses import dataclass, field
14
+ from enum import Enum
15
+ from typing import Any
16
+
17
+
18
+ class OptimizationState(str, Enum):
19
+ IMPROVING = "improving"
20
+ PLATEAU = "plateau"
21
+ EPIGENETIC_PLATEAU = "epigenetic_plateau"
22
+ DEGRADING = "degrading"
23
+
24
+
25
+ @dataclass
26
+ class PerformanceWindow:
27
+ """Rolling window of performance scores."""
28
+ scores: list[float] = field(default_factory=list)
29
+ window_size: int = 10
30
+
31
+ def add(self, score: float) -> None:
32
+ self.scores.append(score)
33
+ if len(self.scores) > self.window_size * 3:
34
+ self.scores = self.scores[-self.window_size * 2:]
35
+
36
+ @property
37
+ def recent(self) -> list[float]:
38
+ return self.scores[-self.window_size:]
39
+
40
+ @property
41
+ def previous(self) -> list[float]:
42
+ if len(self.scores) < self.window_size * 2:
43
+ return self.scores[:len(self.scores)//2]
44
+ return self.scores[-self.window_size*2:-self.window_size]
45
+
46
+ @property
47
+ def trend(self) -> float:
48
+ """Positive = improving, negative = degrading, ~0 = plateau."""
49
+ r = self.recent
50
+ p = self.previous
51
+ if not r or not p:
52
+ return 0.0
53
+ return (sum(r)/len(r)) - (sum(p)/len(p))
54
+
55
+
56
+ @dataclass
57
+ class OptimizationRecommendation:
58
+ """What the optimizer recommends."""
59
+ state: OptimizationState
60
+ action: str # "continue", "optimize_prompts", "optimize_skills", "suggest_lora", "rollback"
61
+ reason: str
62
+ details: dict[str, Any] = field(default_factory=dict)
63
+
64
+
65
+ class AgenticOptimizer:
66
+ """
67
+ Monitors performance and recommends optimization actions.
68
+
69
+ Usage:
70
+ optimizer = AgenticOptimizer()
71
+
72
+ # After each task:
73
+ optimizer.record_score(phi_score)
74
+
75
+ # Periodically check:
76
+ rec = optimizer.recommend()
77
+ if rec.action == "optimize_prompts":
78
+ # Rebuild prompt pack with new skills
79
+ elif rec.action == "rollback":
80
+ # Revert to previous configuration
81
+ """
82
+
83
+ def __init__(
84
+ self,
85
+ plateau_threshold: float = 0.05,
86
+ degradation_threshold: float = -0.1,
87
+ min_samples: int = 5,
88
+ epigenetic_attempts_before_lora: int = 3,
89
+ ):
90
+ self.plateau_threshold = plateau_threshold
91
+ self.degradation_threshold = degradation_threshold
92
+ self.min_samples = min_samples
93
+ self.epigenetic_attempts = epigenetic_attempts_before_lora
94
+ self.window = PerformanceWindow()
95
+ self._epigenetic_count = 0
96
+ self._recommendations: list[OptimizationRecommendation] = []
97
+
98
+ def record_score(self, score: float) -> None:
99
+ """Record a performance score (typically final Φ)."""
100
+ self.window.add(score)
101
+
102
+ def recommend(self) -> OptimizationRecommendation:
103
+ """Get optimization recommendation based on current performance trend."""
104
+ if len(self.window.scores) < self.min_samples:
105
+ rec = OptimizationRecommendation(
106
+ state=OptimizationState.IMPROVING,
107
+ action="continue",
108
+ reason=f"Insufficient data ({len(self.window.scores)}/{self.min_samples} samples)",
109
+ )
110
+ self._recommendations.append(rec)
111
+ return rec
112
+
113
+ trend = self.window.trend
114
+ state = self._classify_state(trend)
115
+
116
+ if state == OptimizationState.IMPROVING:
117
+ rec = OptimizationRecommendation(
118
+ state=state, action="continue",
119
+ reason=f"Performance improving (trend={trend:+.3f})",
120
+ )
121
+ elif state == OptimizationState.DEGRADING:
122
+ rec = OptimizationRecommendation(
123
+ state=state, action="rollback",
124
+ reason=f"Performance degrading (trend={trend:+.3f}). Rollback to previous config.",
125
+ )
126
+ elif state == OptimizationState.PLATEAU:
127
+ self._epigenetic_count += 1
128
+ if self._epigenetic_count >= self.epigenetic_attempts:
129
+ rec = OptimizationRecommendation(
130
+ state=OptimizationState.EPIGENETIC_PLATEAU,
131
+ action="suggest_lora",
132
+ reason=f"Plateau persists after {self._epigenetic_count} epigenetic attempts. Consider LoRA/distillation.",
133
+ details={"epigenetic_attempts": self._epigenetic_count},
134
+ )
135
+ else:
136
+ rec = OptimizationRecommendation(
137
+ state=state,
138
+ action="optimize_prompts" if self._epigenetic_count <= 1 else "optimize_skills",
139
+ reason=f"Performance plateau (trend={trend:+.3f}). Trying epigenetic optimization #{self._epigenetic_count}.",
140
+ )
141
+ else:
142
+ rec = OptimizationRecommendation(state=state, action="continue", reason="Unknown state")
143
+
144
+ self._recommendations.append(rec)
145
+ return rec
146
+
147
+ def _classify_state(self, trend: float) -> OptimizationState:
148
+ if trend > self.plateau_threshold:
149
+ return OptimizationState.IMPROVING
150
+ elif trend < self.degradation_threshold:
151
+ return OptimizationState.DEGRADING
152
+ else:
153
+ return OptimizationState.PLATEAU
154
+
155
+ def reset_epigenetic_counter(self) -> None:
156
+ """Call after successful epigenetic optimization breaks plateau."""
157
+ self._epigenetic_count = 0
158
+
159
+ @property
160
+ def current_state(self) -> OptimizationState:
161
+ if len(self.window.scores) < self.min_samples:
162
+ return OptimizationState.IMPROVING
163
+ return self._classify_state(self.window.trend)
164
+
165
+ @property
166
+ def history(self) -> list[OptimizationRecommendation]:
167
+ return self._recommendations