Upload alpha_factory/personas/performance_surgeon.py with huggingface_hub
Browse files
alpha_factory/personas/performance_surgeon.py
CHANGED
|
@@ -25,6 +25,64 @@ After 3 iterations with the same family β kill the family, don't iterate furth
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
async def diagnose_performance(
|
| 29 |
llm: LLMClient,
|
| 30 |
metrics: BrainMetrics,
|
|
@@ -35,14 +93,16 @@ async def diagnose_performance(
|
|
| 35 |
) -> SurgeonResult:
|
| 36 |
"""
|
| 37 |
Diagnose alpha performance and recommend next action.
|
|
|
|
| 38 |
"""
|
| 39 |
yearly = metrics.yearly_sharpe
|
|
|
|
| 40 |
|
| 41 |
# βββ Deterministic checks first (no LLM needed) βββββββββββββββ
|
| 42 |
|
| 43 |
-
# Sign error detector
|
| 44 |
-
|
| 45 |
-
if
|
| 46 |
return SurgeonResult(
|
| 47 |
regime_dependent=False,
|
| 48 |
decay_detected=False,
|
|
@@ -50,10 +110,10 @@ async def diagnose_performance(
|
|
| 50 |
dominant_regime=None,
|
| 51 |
iteration_suggestion="Likely sign error in β₯1 component. Run sign sweep on individual components. If iteration > 2, concentrate on dominant component only.",
|
| 52 |
verdict=Verdict.ITERATE if iteration < 3 else Verdict.KILL,
|
| 53 |
-
reason=
|
| 54 |
)
|
| 55 |
|
| 56 |
-
# Zero-information detector: flipped signs but Sharpe barely moved
|
| 57 |
if previous_sharpe is not None and previous_sign_flips >= 2:
|
| 58 |
sharpe_delta = abs(metrics.sharpe_os - previous_sharpe)
|
| 59 |
if sharpe_delta < 0.1:
|
|
@@ -67,39 +127,33 @@ async def diagnose_performance(
|
|
| 67 |
reason=f"Sign-flip moved Sharpe by only {sharpe_delta:.2f} β zero-information components",
|
| 68 |
)
|
| 69 |
|
| 70 |
-
# Decay detector
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
-
#
|
| 85 |
-
if len(yearly) >= 3:
|
| 86 |
-
import numpy as np
|
| 87 |
-
mean_s = np.mean(yearly)
|
| 88 |
-
std_s = np.std(yearly)
|
| 89 |
-
if std_s > 0:
|
| 90 |
-
outliers = [i for i, s in enumerate(yearly) if abs(s - mean_s) > 2 * std_s]
|
| 91 |
-
if outliers:
|
| 92 |
-
return SurgeonResult(
|
| 93 |
-
regime_dependent=True,
|
| 94 |
-
decay_detected=False,
|
| 95 |
-
sign_error_likely=False,
|
| 96 |
-
dominant_regime=f"Year {2019 + outliers[0]} is an outlier",
|
| 97 |
-
iteration_suggestion="Alpha is regime-dependent. Consider adding a regime filter (trade_when) or accept lower allocation weight.",
|
| 98 |
-
verdict=Verdict.ITERATE if metrics.sharpe_os > 1.0 else Verdict.KILL,
|
| 99 |
-
reason=f"Regime dependency: year(s) {[2019+i for i in outliers]} are >2Ο from mean",
|
| 100 |
-
)
|
| 101 |
-
|
| 102 |
-
# If we got here and Sharpe is positive, it passed basic checks
|
| 103 |
if metrics.sharpe_os >= 1.25:
|
| 104 |
return SurgeonResult(
|
| 105 |
regime_dependent=False,
|
|
@@ -111,7 +165,7 @@ async def diagnose_performance(
|
|
| 111 |
reason=f"Sharpe OS = {metrics.sharpe_os:.2f} β₯ 1.25, no pathologies detected",
|
| 112 |
)
|
| 113 |
|
| 114 |
-
#
|
| 115 |
user_prompt = f"""Diagnose this alpha's performance:
|
| 116 |
|
| 117 |
Sharpe (full): {metrics.sharpe_full:.3f}
|
|
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
|
| 28 |
+
def _detect_sign_error(yearly: list[float]) -> tuple[bool, str]:
|
| 29 |
+
"""Detect probable sign error. Returns (detected, reason)."""
|
| 30 |
+
n = len(yearly)
|
| 31 |
+
if n == 0:
|
| 32 |
+
return False, ""
|
| 33 |
+
negative_years = sum(1 for s in yearly if s < 0)
|
| 34 |
+
# For N=5, β₯3 negative is suspicious; for N=10, β₯4
|
| 35 |
+
threshold = max(3, n // 2)
|
| 36 |
+
if negative_years >= threshold:
|
| 37 |
+
return True, f"{negative_years}/{n} years negative β probable sign error"
|
| 38 |
+
return False, ""
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _detect_decay(yearly: list[float]) -> tuple[bool, str]:
|
| 42 |
+
"""Detect monotonic decay in recent years. Returns (detected, reason)."""
|
| 43 |
+
n = len(yearly)
|
| 44 |
+
if n < 3:
|
| 45 |
+
return False, ""
|
| 46 |
+
# Check last 3 years for monotonic decline
|
| 47 |
+
last_3 = yearly[-3:]
|
| 48 |
+
if all(last_3[i] > last_3[i + 1] for i in range(len(last_3) - 1)):
|
| 49 |
+
return True, f"Monotonic Sharpe decline: {' β '.join(f'{s:.2f}' for s in last_3)}"
|
| 50 |
+
# Also check: last year is worst AND significantly worse than mean
|
| 51 |
+
mean_all = sum(yearly) / n
|
| 52 |
+
if yearly[-1] < mean_all * 0.5 and yearly[-1] < yearly[-2]:
|
| 53 |
+
return True, f"Recent decay: last year {yearly[-1]:.2f} << mean {mean_all:.2f}"
|
| 54 |
+
return False, ""
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _detect_regime_dependency(yearly: list[float], start_year: int = 2019) -> tuple[bool, str, str | None]:
|
| 58 |
+
"""Detect regime dependency using IQR-based outlier detection (robust for small N)."""
|
| 59 |
+
n = len(yearly)
|
| 60 |
+
if n < 3:
|
| 61 |
+
return False, "", None
|
| 62 |
+
|
| 63 |
+
sorted_yearly = sorted(yearly)
|
| 64 |
+
q1 = sorted_yearly[n // 4] if n >= 4 else sorted_yearly[0]
|
| 65 |
+
q3 = sorted_yearly[(3 * n) // 4] if n >= 4 else sorted_yearly[-1]
|
| 66 |
+
iqr = q3 - q1
|
| 67 |
+
|
| 68 |
+
# Use 1.5 * IQR rule (more robust than 2Ο for small N)
|
| 69 |
+
lower_bound = q1 - 1.5 * iqr
|
| 70 |
+
upper_bound = q3 + 1.5 * iqr
|
| 71 |
+
|
| 72 |
+
outliers = []
|
| 73 |
+
for i, s in enumerate(yearly):
|
| 74 |
+
if s < lower_bound or s > upper_bound:
|
| 75 |
+
outliers.append(i)
|
| 76 |
+
|
| 77 |
+
if outliers:
|
| 78 |
+
outlier_years = [start_year + i for i in outliers]
|
| 79 |
+
best_year = start_year + yearly.index(max(yearly))
|
| 80 |
+
worst_year = start_year + yearly.index(min(yearly))
|
| 81 |
+
return True, f"Regime dependency: year(s) {outlier_years} are outliers (IQR method)", f"best={best_year}, worst={worst_year}"
|
| 82 |
+
|
| 83 |
+
return False, "", None
|
| 84 |
+
|
| 85 |
+
|
| 86 |
async def diagnose_performance(
|
| 87 |
llm: LLMClient,
|
| 88 |
metrics: BrainMetrics,
|
|
|
|
| 93 |
) -> SurgeonResult:
|
| 94 |
"""
|
| 95 |
Diagnose alpha performance and recommend next action.
|
| 96 |
+
Uses robust heuristics for small sample sizes (N=5 typical).
|
| 97 |
"""
|
| 98 |
yearly = metrics.yearly_sharpe
|
| 99 |
+
n_years = len(yearly)
|
| 100 |
|
| 101 |
# βββ Deterministic checks first (no LLM needed) βββββββββββββββ
|
| 102 |
|
| 103 |
+
# 1. Sign error detector
|
| 104 |
+
sign_error, sign_reason = _detect_sign_error(yearly)
|
| 105 |
+
if sign_error:
|
| 106 |
return SurgeonResult(
|
| 107 |
regime_dependent=False,
|
| 108 |
decay_detected=False,
|
|
|
|
| 110 |
dominant_regime=None,
|
| 111 |
iteration_suggestion="Likely sign error in β₯1 component. Run sign sweep on individual components. If iteration > 2, concentrate on dominant component only.",
|
| 112 |
verdict=Verdict.ITERATE if iteration < 3 else Verdict.KILL,
|
| 113 |
+
reason=sign_reason,
|
| 114 |
)
|
| 115 |
|
| 116 |
+
# 2. Zero-information detector: flipped signs but Sharpe barely moved
|
| 117 |
if previous_sharpe is not None and previous_sign_flips >= 2:
|
| 118 |
sharpe_delta = abs(metrics.sharpe_os - previous_sharpe)
|
| 119 |
if sharpe_delta < 0.1:
|
|
|
|
| 127 |
reason=f"Sign-flip moved Sharpe by only {sharpe_delta:.2f} β zero-information components",
|
| 128 |
)
|
| 129 |
|
| 130 |
+
# 3. Decay detector
|
| 131 |
+
decay_detected, decay_reason = _detect_decay(yearly)
|
| 132 |
+
if decay_detected:
|
| 133 |
+
return SurgeonResult(
|
| 134 |
+
regime_dependent=False,
|
| 135 |
+
decay_detected=True,
|
| 136 |
+
sign_error_likely=False,
|
| 137 |
+
dominant_regime=None,
|
| 138 |
+
iteration_suggestion="Signal is decaying. Consider shorter lookback horizon or adding ts_decay_linear. May be a crowded anomaly losing edge.",
|
| 139 |
+
verdict=Verdict.ITERATE if metrics.sharpe_os > 0.5 else Verdict.KILL,
|
| 140 |
+
reason=decay_reason,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
# 4. Regime dependency (IQR-based, robust for small N)
|
| 144 |
+
regime_dep, regime_reason, dominant_regime = _detect_regime_dependency(yearly)
|
| 145 |
+
if regime_dep:
|
| 146 |
+
return SurgeonResult(
|
| 147 |
+
regime_dependent=True,
|
| 148 |
+
decay_detected=False,
|
| 149 |
+
sign_error_likely=False,
|
| 150 |
+
dominant_regime=dominant_regime,
|
| 151 |
+
iteration_suggestion="Alpha is regime-dependent. Consider adding a regime filter (trade_when) or accept lower allocation weight.",
|
| 152 |
+
verdict=Verdict.ITERATE if metrics.sharpe_os > 1.0 else Verdict.KILL,
|
| 153 |
+
reason=regime_reason,
|
| 154 |
+
)
|
| 155 |
|
| 156 |
+
# 5. Strong alpha β promote immediately
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
if metrics.sharpe_os >= 1.25:
|
| 158 |
return SurgeonResult(
|
| 159 |
regime_dependent=False,
|
|
|
|
| 165 |
reason=f"Sharpe OS = {metrics.sharpe_os:.2f} β₯ 1.25, no pathologies detected",
|
| 166 |
)
|
| 167 |
|
| 168 |
+
# 6. Borderline case β use LLM for nuanced diagnosis
|
| 169 |
user_prompt = f"""Diagnose this alpha's performance:
|
| 170 |
|
| 171 |
Sharpe (full): {metrics.sharpe_full:.3f}
|