Spaces:
Running
Running
| """ | |
| UndertriAI — Performance Tracker (Theme 4: Self-Improvement) | |
| Tracks the agent's running performance profile across dimensions | |
| and uses it to drive adaptive curriculum decisions. | |
| Pure Python — no server/training/FastAPI dependencies. | |
| """ | |
| import warnings | |
| from collections import deque | |
| from typing import Any, Dict, List, Optional | |
| class ExponentialMean: | |
| """Exponential moving average with configurable decay.""" | |
| __slots__ = ("alpha", "value", "count") | |
| def __init__(self, alpha: float = 0.1, initial: float = 0.5): | |
| self.alpha = alpha | |
| self.value = initial | |
| self.count = 0 | |
| def update(self, x: float) -> None: | |
| self.value = self.alpha * x + (1 - self.alpha) * self.value | |
| self.count += 1 | |
| def get(self) -> float: | |
| return self.value | |
| class PerformanceTracker: | |
| """ | |
| Tracks agent performance across crime types, stages, and reward | |
| components. Drives adaptive episode selection and stage promotion. | |
| Thread-safe for single-session use (no locks needed). | |
| All public methods handle missing/malformed input gracefully. | |
| NOTE: Tracker state is in-memory only. Server restart clears history. | |
| For production: persist via tracker.get_profile() → JSON file on /reset. | |
| """ | |
| def __init__(self, alpha: float = 0.1): | |
| self._alpha = alpha | |
| # Per-crime-type EMA of total reward | |
| self.per_crime_type: Dict[str, ExponentialMean] = {} | |
| # Per-stage EMA of total reward | |
| self.per_stage: Dict[int, ExponentialMean] = { | |
| s: ExponentialMean(alpha=alpha) for s in range(1, 5) | |
| } | |
| # Last 50 total rewards (for stage promotion smoothing) | |
| self.recent_rewards: deque = deque(maxlen=50) | |
| # Bias fire rate: 1.0 when penalty fired, 0.0 when not | |
| self.bias_fire_rate: ExponentialMean = ExponentialMean(alpha=alpha) | |
| # Tool usage counts (cumulative per session) | |
| self.tool_usage: Dict[str, int] = {} | |
| # Episode counters | |
| self.episodes_seen: int = 0 | |
| self.stage_episodes: Dict[int, int] = {1: 0, 2: 0, 3: 0, 4: 0} | |
| # Recent case performance for failure-replay | |
| self._recent_case_rewards: deque = deque(maxlen=30) | |
| # ------------------------------------------------------------------ | |
| # Core update | |
| # ------------------------------------------------------------------ | |
| def update( | |
| self, | |
| episode: Dict[str, Any], | |
| reward_components: Dict[str, Any], | |
| tools_used: Optional[List[str]] = None, | |
| ) -> None: | |
| """ | |
| Update all internal state from a completed episode. | |
| Handles missing keys gracefully — never raises on malformed input. | |
| """ | |
| try: | |
| total = float(reward_components.get("total_reward", | |
| reward_components.get("total", 0.0))) | |
| except (TypeError, ValueError): | |
| total = 0.0 | |
| # Update recent rewards | |
| self.recent_rewards.append(total) | |
| self.episodes_seen += 1 | |
| # Per-crime-type tracking | |
| crime_type = "" | |
| try: | |
| crime_type = str(episode.get("crime_type", "")).strip() | |
| except Exception: | |
| pass | |
| if crime_type: | |
| if crime_type not in self.per_crime_type: | |
| self.per_crime_type[crime_type] = ExponentialMean( | |
| alpha=self._alpha | |
| ) | |
| self.per_crime_type[crime_type].update(total) | |
| # Per-stage tracking | |
| stage = 1 | |
| try: | |
| stage = int(episode.get("curriculum_stage", 1)) | |
| except (TypeError, ValueError): | |
| stage = 1 | |
| if 1 <= stage <= 4: | |
| self.per_stage[stage].update(total) | |
| self.stage_episodes[stage] = self.stage_episodes.get(stage, 0) + 1 | |
| # Bias fire rate | |
| try: | |
| bias_val = float(reward_components.get("bias_penalty", 0.0)) | |
| self.bias_fire_rate.update(1.0 if bias_val > 0.01 else 0.0) | |
| except (TypeError, ValueError): | |
| pass | |
| # Tool usage | |
| if tools_used: | |
| for tool in tools_used: | |
| t = str(tool) | |
| self.tool_usage[t] = self.tool_usage.get(t, 0) + 1 | |
| # Track case_id → reward for failure-replay | |
| case_id = "" | |
| try: | |
| case_id = str(episode.get("case_id", "")) | |
| except Exception: | |
| pass | |
| if case_id: | |
| self._recent_case_rewards.append((case_id, total, stage)) | |
| # ------------------------------------------------------------------ | |
| # Queries | |
| # ------------------------------------------------------------------ | |
| def weakest_domain(self) -> Optional[str]: | |
| """ | |
| Returns the crime_type with the lowest EMA reward. | |
| Returns None if fewer than 5 episodes seen total or no crime type | |
| has at least 3 observations. | |
| """ | |
| if self.episodes_seen < 5: | |
| return None | |
| candidates = [ | |
| (ct, ema.get()) | |
| for ct, ema in self.per_crime_type.items() | |
| if ema.count >= 3 | |
| ] | |
| if not candidates: | |
| return None | |
| return min(candidates, key=lambda x: x[1])[0] | |
| def suggest_next_stage(self) -> int: | |
| """ | |
| Returns the recommended stage (1-4) based on readiness thresholds. | |
| Never demotes — returns highest eligible stage. | |
| """ | |
| current = 1 | |
| # Stage 1 → 2: EMA >= 0.65 AND at least 20 episodes | |
| if (self.per_stage[1].get() >= 0.65 | |
| and self.stage_episodes.get(1, 0) >= 20): | |
| current = 2 | |
| # Stage 2 → 3: EMA >= 0.55 AND at least 50 episodes | |
| if (current >= 2 | |
| and self.per_stage[2].get() >= 0.55 | |
| and self.stage_episodes.get(2, 0) >= 50): | |
| current = 3 | |
| # Stage 3 → 4: EMA >= 0.50 AND at least 20 episodes | |
| if (current >= 3 | |
| and self.per_stage[3].get() >= 0.50 | |
| and self.stage_episodes.get(3, 0) >= 20): | |
| current = 4 | |
| return current | |
| def should_generate_synthetic(self, crime_type: str) -> bool: | |
| """ | |
| Returns True if the agent has mastered this crime type domain | |
| (EMA > 0.70 with at least 10 observations). | |
| """ | |
| ema = self.per_crime_type.get(crime_type) | |
| if ema is None: | |
| return False | |
| return ema.get() > 0.70 and ema.count >= 10 | |
| def get_recent_failures(self, threshold: float = 0.40) -> List[str]: | |
| """ | |
| Returns case_ids from recent episodes where reward was below threshold. | |
| Used by AdaptiveSelector for failure-replay. | |
| """ | |
| return [ | |
| case_id | |
| for case_id, reward, _ in self._recent_case_rewards | |
| if reward < threshold | |
| ] | |
| # ------------------------------------------------------------------ | |
| # Serialization | |
| # ------------------------------------------------------------------ | |
| def get_profile(self) -> Dict[str, Any]: | |
| """ | |
| Returns a fully JSON-serializable profile dict. | |
| No class instances — all values are primitive types. | |
| """ | |
| recent = list(self.recent_rewards) | |
| recent_mean = sum(recent) / len(recent) if recent else 0.0 | |
| return { | |
| "per_crime_type": { | |
| ct: round(ema.get(), 4) | |
| for ct, ema in self.per_crime_type.items() | |
| }, | |
| "per_stage": { | |
| str(s): round(ema.get(), 4) | |
| for s, ema in self.per_stage.items() | |
| }, | |
| "bias_fire_rate": round(self.bias_fire_rate.get(), 4), | |
| "tool_usage": dict(self.tool_usage), | |
| "episodes_seen": self.episodes_seen, | |
| "stage_episodes": dict(self.stage_episodes), | |
| "weakest_domain": self.weakest_domain(), | |
| "suggested_stage": self.suggest_next_stage(), | |
| "recent_mean_reward": round(recent_mean, 4), | |
| } | |
| # ------------------------------------------------------------------ | |
| # Session management | |
| # ------------------------------------------------------------------ | |
| def reset_session(self) -> None: | |
| """ | |
| Clears transient session state but preserves accumulated | |
| per-crime-type and per-stage learning. | |
| """ | |
| self.recent_rewards.clear() | |
| self.tool_usage.clear() | |
| self._recent_case_rewards.clear() | |