| """ |
| optimizer.py — Agentic optimization decision policy. |
| |
| Monitors agent performance and decides what to optimize: |
| improving → continue (don't fix what works) |
| plateau → optimize prompts/skills/router first (epigenetic) |
| epigenetic_plateau → suggest LoRA/distillation only if ROI positive |
| degrading → rollback immediately |
| |
| Never distills without eval data AND ROI check. |
| """ |
| from __future__ import annotations |
| from dataclasses import dataclass, field |
| from enum import Enum |
| from typing import Any |
|
|
|
|
| class OptimizationState(str, Enum): |
| IMPROVING = "improving" |
| PLATEAU = "plateau" |
| EPIGENETIC_PLATEAU = "epigenetic_plateau" |
| DEGRADING = "degrading" |
|
|
|
|
| @dataclass |
| class PerformanceWindow: |
| """Rolling window of performance scores.""" |
| scores: list[float] = field(default_factory=list) |
| window_size: int = 10 |
|
|
| def add(self, score: float) -> None: |
| self.scores.append(score) |
| if len(self.scores) > self.window_size * 3: |
| self.scores = self.scores[-self.window_size * 2:] |
|
|
| @property |
| def recent(self) -> list[float]: |
| return self.scores[-self.window_size:] |
|
|
| @property |
| def previous(self) -> list[float]: |
| if len(self.scores) < self.window_size * 2: |
| return self.scores[:len(self.scores)//2] |
| return self.scores[-self.window_size*2:-self.window_size] |
|
|
| @property |
| def trend(self) -> float: |
| """Positive = improving, negative = degrading, ~0 = plateau.""" |
| r = self.recent |
| p = self.previous |
| if not r or not p: |
| return 0.0 |
| return (sum(r)/len(r)) - (sum(p)/len(p)) |
|
|
|
|
| @dataclass |
| class OptimizationRecommendation: |
| """What the optimizer recommends.""" |
| state: OptimizationState |
| action: str |
| reason: str |
| details: dict[str, Any] = field(default_factory=dict) |
|
|
|
|
| class AgenticOptimizer: |
| """ |
| Monitors performance and recommends optimization actions. |
| |
| Usage: |
| optimizer = AgenticOptimizer() |
| |
| # After each task: |
| optimizer.record_score(phi_score) |
| |
| # Periodically check: |
| rec = optimizer.recommend() |
| if rec.action == "optimize_prompts": |
| # Rebuild prompt pack with new skills |
| elif rec.action == "rollback": |
| # Revert to previous configuration |
| """ |
|
|
| def __init__( |
| self, |
| plateau_threshold: float = 0.05, |
| degradation_threshold: float = -0.1, |
| min_samples: int = 5, |
| epigenetic_attempts_before_lora: int = 3, |
| ): |
| self.plateau_threshold = plateau_threshold |
| self.degradation_threshold = degradation_threshold |
| self.min_samples = min_samples |
| self.epigenetic_attempts = epigenetic_attempts_before_lora |
| self.window = PerformanceWindow() |
| self._epigenetic_count = 0 |
| self._recommendations: list[OptimizationRecommendation] = [] |
|
|
| def record_score(self, score: float) -> None: |
| """Record a performance score (typically final Φ).""" |
| self.window.add(score) |
|
|
| def recommend(self) -> OptimizationRecommendation: |
| """Get optimization recommendation based on current performance trend.""" |
| if len(self.window.scores) < self.min_samples: |
| rec = OptimizationRecommendation( |
| state=OptimizationState.IMPROVING, |
| action="continue", |
| reason=f"Insufficient data ({len(self.window.scores)}/{self.min_samples} samples)", |
| ) |
| self._recommendations.append(rec) |
| return rec |
|
|
| trend = self.window.trend |
| state = self._classify_state(trend) |
|
|
| if state == OptimizationState.IMPROVING: |
| rec = OptimizationRecommendation( |
| state=state, action="continue", |
| reason=f"Performance improving (trend={trend:+.3f})", |
| ) |
| elif state == OptimizationState.DEGRADING: |
| rec = OptimizationRecommendation( |
| state=state, action="rollback", |
| reason=f"Performance degrading (trend={trend:+.3f}). Rollback to previous config.", |
| ) |
| elif state == OptimizationState.PLATEAU: |
| self._epigenetic_count += 1 |
| if self._epigenetic_count >= self.epigenetic_attempts: |
| rec = OptimizationRecommendation( |
| state=OptimizationState.EPIGENETIC_PLATEAU, |
| action="suggest_lora", |
| reason=f"Plateau persists after {self._epigenetic_count} epigenetic attempts. Consider LoRA/distillation.", |
| details={"epigenetic_attempts": self._epigenetic_count}, |
| ) |
| else: |
| rec = OptimizationRecommendation( |
| state=state, |
| action="optimize_prompts" if self._epigenetic_count <= 1 else "optimize_skills", |
| reason=f"Performance plateau (trend={trend:+.3f}). Trying epigenetic optimization #{self._epigenetic_count}.", |
| ) |
| else: |
| rec = OptimizationRecommendation(state=state, action="continue", reason="Unknown state") |
|
|
| self._recommendations.append(rec) |
| return rec |
|
|
| def _classify_state(self, trend: float) -> OptimizationState: |
| if trend > self.plateau_threshold: |
| return OptimizationState.IMPROVING |
| elif trend < self.degradation_threshold: |
| return OptimizationState.DEGRADING |
| else: |
| return OptimizationState.PLATEAU |
|
|
| def reset_epigenetic_counter(self) -> None: |
| """Call after successful epigenetic optimization breaks plateau.""" |
| self._epigenetic_count = 0 |
|
|
| @property |
| def current_state(self) -> OptimizationState: |
| if len(self.window.scores) < self.min_samples: |
| return OptimizationState.IMPROVING |
| return self._classify_state(self.window.trend) |
|
|
| @property |
| def history(self) -> list[OptimizationRecommendation]: |
| return self._recommendations |
|
|