Upload alpha_factory/personas/performance_surgeon.py with huggingface_hub

Browse files

Files changed (1) hide show

alpha_factory/personas/performance_surgeon.py +92 -38

alpha_factory/personas/performance_surgeon.py CHANGED Viewed

@@ -25,6 +25,64 @@ After 3 iterations with the same family → kill the family, don't iterate furth
 """
 async def diagnose_performance(
     llm: LLMClient,
     metrics: BrainMetrics,
@@ -35,14 +93,16 @@ async def diagnose_performance(
 ) -> SurgeonResult:
     """
     Diagnose alpha performance and recommend next action.
     """
     yearly = metrics.yearly_sharpe
     # ─── Deterministic checks first (no LLM needed) ───────────────
-    # Sign error detector: ≥4 years negative
-    negative_years = sum(1 for s in yearly if s < 0)
-    if negative_years >= 4:
         return SurgeonResult(
             regime_dependent=False,
             decay_detected=False,
@@ -50,10 +110,10 @@ async def diagnose_performance(
             dominant_regime=None,
             iteration_suggestion="Likely sign error in ≥1 component. Run sign sweep on individual components. If iteration > 2, concentrate on dominant component only.",
             verdict=Verdict.ITERATE if iteration < 3 else Verdict.KILL,
-            reason=f"{negative_years}/{len(yearly)} years negative — probable sign error",
         )
-    # Zero-information detector: flipped signs but Sharpe barely moved
     if previous_sharpe is not None and previous_sign_flips >= 2:
         sharpe_delta = abs(metrics.sharpe_os - previous_sharpe)
         if sharpe_delta < 0.1:
@@ -67,39 +127,33 @@ async def diagnose_performance(
                 reason=f"Sign-flip moved Sharpe by only {sharpe_delta:.2f} — zero-information components",
             )
-    # Decay detector: monotonic decline in last 3+ years
-    if len(yearly) >= 3:
-        last_3 = yearly[-3:]
-        if all(last_3[i] > last_3[i+1] for i in range(len(last_3)-1)):
-            return SurgeonResult(
-                regime_dependent=False,
-                decay_detected=True,
-                sign_error_likely=False,
-                dominant_regime=None,
-                iteration_suggestion="Signal is decaying. Consider shorter lookback horizon or adding ts_decay_linear. May be a crowded anomaly losing edge.",
-                verdict=Verdict.ITERATE if metrics.sharpe_os > 0.5 else Verdict.KILL,
-                reason=f"Monotonic Sharpe decline: {' → '.join(f'{s:.2f}' for s in last_3)}",
-            )
-    # Regime dependency: one year dramatically different
-    if len(yearly) >= 3:
-        import numpy as np
-        mean_s = np.mean(yearly)
-        std_s = np.std(yearly)
-        if std_s > 0:
-            outliers = [i for i, s in enumerate(yearly) if abs(s - mean_s) > 2 * std_s]
-            if outliers:
-                return SurgeonResult(
-                    regime_dependent=True,
-                    decay_detected=False,
-                    sign_error_likely=False,
-                    dominant_regime=f"Year {2019 + outliers[0]} is an outlier",
-                    iteration_suggestion="Alpha is regime-dependent. Consider adding a regime filter (trade_when) or accept lower allocation weight.",
-                    verdict=Verdict.ITERATE if metrics.sharpe_os > 1.0 else Verdict.KILL,
-                    reason=f"Regime dependency: year(s) {[2019+i for i in outliers]} are >2σ from mean",
-                )
-    # If we got here and Sharpe is positive, it passed basic checks
     if metrics.sharpe_os >= 1.25:
         return SurgeonResult(
             regime_dependent=False,
@@ -111,7 +165,7 @@ async def diagnose_performance(
             reason=f"Sharpe OS = {metrics.sharpe_os:.2f} ≥ 1.25, no pathologies detected",
         )
-    # For borderline cases, use LLM for nuanced diagnosis
     user_prompt = f"""Diagnose this alpha's performance:
 Sharpe (full): {metrics.sharpe_full:.3f}

 """
+def _detect_sign_error(yearly: list[float]) -> tuple[bool, str]:
+    """Detect probable sign error. Returns (detected, reason)."""
+    n = len(yearly)
+    if n == 0:
+        return False, ""
+    negative_years = sum(1 for s in yearly if s < 0)
+    # For N=5, ≥3 negative is suspicious; for N=10, ≥4
+    threshold = max(3, n // 2)
+    if negative_years >= threshold:
+        return True, f"{negative_years}/{n} years negative — probable sign error"
+    return False, ""
+def _detect_decay(yearly: list[float]) -> tuple[bool, str]:
+    """Detect monotonic decay in recent years. Returns (detected, reason)."""
+    n = len(yearly)
+    if n < 3:
+        return False, ""
+    # Check last 3 years for monotonic decline
+    last_3 = yearly[-3:]
+    if all(last_3[i] > last_3[i + 1] for i in range(len(last_3) - 1)):
+        return True, f"Monotonic Sharpe decline: {' → '.join(f'{s:.2f}' for s in last_3)}"
+    # Also check: last year is worst AND significantly worse than mean
+    mean_all = sum(yearly) / n
+    if yearly[-1] < mean_all * 0.5 and yearly[-1] < yearly[-2]:
+        return True, f"Recent decay: last year {yearly[-1]:.2f} << mean {mean_all:.2f}"
+    return False, ""
+def _detect_regime_dependency(yearly: list[float], start_year: int = 2019) -> tuple[bool, str, str | None]:
+    """Detect regime dependency using IQR-based outlier detection (robust for small N)."""
+    n = len(yearly)
+    if n < 3:
+        return False, "", None
+    sorted_yearly = sorted(yearly)
+    q1 = sorted_yearly[n // 4] if n >= 4 else sorted_yearly[0]
+    q3 = sorted_yearly[(3 * n) // 4] if n >= 4 else sorted_yearly[-1]
+    iqr = q3 - q1
+    # Use 1.5 * IQR rule (more robust than 2σ for small N)
+    lower_bound = q1 - 1.5 * iqr
+    upper_bound = q3 + 1.5 * iqr
+    outliers = []
+    for i, s in enumerate(yearly):
+        if s < lower_bound or s > upper_bound:
+            outliers.append(i)
+    if outliers:
+        outlier_years = [start_year + i for i in outliers]
+        best_year = start_year + yearly.index(max(yearly))
+        worst_year = start_year + yearly.index(min(yearly))
+        return True, f"Regime dependency: year(s) {outlier_years} are outliers (IQR method)", f"best={best_year}, worst={worst_year}"
+    return False, "", None
 async def diagnose_performance(
     llm: LLMClient,
     metrics: BrainMetrics,
 ) -> SurgeonResult:
     """
     Diagnose alpha performance and recommend next action.
+    Uses robust heuristics for small sample sizes (N=5 typical).
     """
     yearly = metrics.yearly_sharpe
+    n_years = len(yearly)
     # ─── Deterministic checks first (no LLM needed) ───────────────
+    # 1. Sign error detector
+    sign_error, sign_reason = _detect_sign_error(yearly)
+    if sign_error:
         return SurgeonResult(
             regime_dependent=False,
             decay_detected=False,
             dominant_regime=None,
             iteration_suggestion="Likely sign error in ≥1 component. Run sign sweep on individual components. If iteration > 2, concentrate on dominant component only.",
             verdict=Verdict.ITERATE if iteration < 3 else Verdict.KILL,
+            reason=sign_reason,
         )
+    # 2. Zero-information detector: flipped signs but Sharpe barely moved
     if previous_sharpe is not None and previous_sign_flips >= 2:
         sharpe_delta = abs(metrics.sharpe_os - previous_sharpe)
         if sharpe_delta < 0.1:
                 reason=f"Sign-flip moved Sharpe by only {sharpe_delta:.2f} — zero-information components",
             )
+    # 3. Decay detector
+    decay_detected, decay_reason = _detect_decay(yearly)
+    if decay_detected:
+        return SurgeonResult(
+            regime_dependent=False,
+            decay_detected=True,
+            sign_error_likely=False,
+            dominant_regime=None,
+            iteration_suggestion="Signal is decaying. Consider shorter lookback horizon or adding ts_decay_linear. May be a crowded anomaly losing edge.",
+            verdict=Verdict.ITERATE if metrics.sharpe_os > 0.5 else Verdict.KILL,
+            reason=decay_reason,
+        )
+    # 4. Regime dependency (IQR-based, robust for small N)
+    regime_dep, regime_reason, dominant_regime = _detect_regime_dependency(yearly)
+    if regime_dep:
+        return SurgeonResult(
+            regime_dependent=True,
+            decay_detected=False,
+            sign_error_likely=False,
+            dominant_regime=dominant_regime,
+            iteration_suggestion="Alpha is regime-dependent. Consider adding a regime filter (trade_when) or accept lower allocation weight.",
+            verdict=Verdict.ITERATE if metrics.sharpe_os > 1.0 else Verdict.KILL,
+            reason=regime_reason,
+        )
+    # 5. Strong alpha — promote immediately
     if metrics.sharpe_os >= 1.25:
         return SurgeonResult(
             regime_dependent=False,
             reason=f"Sharpe OS = {metrics.sharpe_os:.2f} ≥ 1.25, no pathologies detected",
         )
+    # 6. Borderline case — use LLM for nuanced diagnosis
     user_prompt = f"""Diagnose this alpha's performance:
 Sharpe (full): {metrics.sharpe_full:.3f}