gaurv007 commited on
Commit
ea8b2ff
Β·
verified Β·
1 Parent(s): b32eb19

Upload alpha_factory/personas/performance_surgeon.py with huggingface_hub

Browse files
alpha_factory/personas/performance_surgeon.py CHANGED
@@ -25,6 +25,64 @@ After 3 iterations with the same family β†’ kill the family, don't iterate furth
25
  """
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  async def diagnose_performance(
29
  llm: LLMClient,
30
  metrics: BrainMetrics,
@@ -35,14 +93,16 @@ async def diagnose_performance(
35
  ) -> SurgeonResult:
36
  """
37
  Diagnose alpha performance and recommend next action.
 
38
  """
39
  yearly = metrics.yearly_sharpe
 
40
 
41
  # ─── Deterministic checks first (no LLM needed) ───────────────
42
 
43
- # Sign error detector: β‰₯4 years negative
44
- negative_years = sum(1 for s in yearly if s < 0)
45
- if negative_years >= 4:
46
  return SurgeonResult(
47
  regime_dependent=False,
48
  decay_detected=False,
@@ -50,10 +110,10 @@ async def diagnose_performance(
50
  dominant_regime=None,
51
  iteration_suggestion="Likely sign error in β‰₯1 component. Run sign sweep on individual components. If iteration > 2, concentrate on dominant component only.",
52
  verdict=Verdict.ITERATE if iteration < 3 else Verdict.KILL,
53
- reason=f"{negative_years}/{len(yearly)} years negative β€” probable sign error",
54
  )
55
 
56
- # Zero-information detector: flipped signs but Sharpe barely moved
57
  if previous_sharpe is not None and previous_sign_flips >= 2:
58
  sharpe_delta = abs(metrics.sharpe_os - previous_sharpe)
59
  if sharpe_delta < 0.1:
@@ -67,39 +127,33 @@ async def diagnose_performance(
67
  reason=f"Sign-flip moved Sharpe by only {sharpe_delta:.2f} β€” zero-information components",
68
  )
69
 
70
- # Decay detector: monotonic decline in last 3+ years
71
- if len(yearly) >= 3:
72
- last_3 = yearly[-3:]
73
- if all(last_3[i] > last_3[i+1] for i in range(len(last_3)-1)):
74
- return SurgeonResult(
75
- regime_dependent=False,
76
- decay_detected=True,
77
- sign_error_likely=False,
78
- dominant_regime=None,
79
- iteration_suggestion="Signal is decaying. Consider shorter lookback horizon or adding ts_decay_linear. May be a crowded anomaly losing edge.",
80
- verdict=Verdict.ITERATE if metrics.sharpe_os > 0.5 else Verdict.KILL,
81
- reason=f"Monotonic Sharpe decline: {' β†’ '.join(f'{s:.2f}' for s in last_3)}",
82
- )
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # Regime dependency: one year dramatically different
85
- if len(yearly) >= 3:
86
- import numpy as np
87
- mean_s = np.mean(yearly)
88
- std_s = np.std(yearly)
89
- if std_s > 0:
90
- outliers = [i for i, s in enumerate(yearly) if abs(s - mean_s) > 2 * std_s]
91
- if outliers:
92
- return SurgeonResult(
93
- regime_dependent=True,
94
- decay_detected=False,
95
- sign_error_likely=False,
96
- dominant_regime=f"Year {2019 + outliers[0]} is an outlier",
97
- iteration_suggestion="Alpha is regime-dependent. Consider adding a regime filter (trade_when) or accept lower allocation weight.",
98
- verdict=Verdict.ITERATE if metrics.sharpe_os > 1.0 else Verdict.KILL,
99
- reason=f"Regime dependency: year(s) {[2019+i for i in outliers]} are >2Οƒ from mean",
100
- )
101
-
102
- # If we got here and Sharpe is positive, it passed basic checks
103
  if metrics.sharpe_os >= 1.25:
104
  return SurgeonResult(
105
  regime_dependent=False,
@@ -111,7 +165,7 @@ async def diagnose_performance(
111
  reason=f"Sharpe OS = {metrics.sharpe_os:.2f} β‰₯ 1.25, no pathologies detected",
112
  )
113
 
114
- # For borderline cases, use LLM for nuanced diagnosis
115
  user_prompt = f"""Diagnose this alpha's performance:
116
 
117
  Sharpe (full): {metrics.sharpe_full:.3f}
 
25
  """
26
 
27
 
28
+ def _detect_sign_error(yearly: list[float]) -> tuple[bool, str]:
29
+ """Detect probable sign error. Returns (detected, reason)."""
30
+ n = len(yearly)
31
+ if n == 0:
32
+ return False, ""
33
+ negative_years = sum(1 for s in yearly if s < 0)
34
+ # For N=5, β‰₯3 negative is suspicious; for N=10, β‰₯4
35
+ threshold = max(3, n // 2)
36
+ if negative_years >= threshold:
37
+ return True, f"{negative_years}/{n} years negative β€” probable sign error"
38
+ return False, ""
39
+
40
+
41
+ def _detect_decay(yearly: list[float]) -> tuple[bool, str]:
42
+ """Detect monotonic decay in recent years. Returns (detected, reason)."""
43
+ n = len(yearly)
44
+ if n < 3:
45
+ return False, ""
46
+ # Check last 3 years for monotonic decline
47
+ last_3 = yearly[-3:]
48
+ if all(last_3[i] > last_3[i + 1] for i in range(len(last_3) - 1)):
49
+ return True, f"Monotonic Sharpe decline: {' β†’ '.join(f'{s:.2f}' for s in last_3)}"
50
+ # Also check: last year is worst AND significantly worse than mean
51
+ mean_all = sum(yearly) / n
52
+ if yearly[-1] < mean_all * 0.5 and yearly[-1] < yearly[-2]:
53
+ return True, f"Recent decay: last year {yearly[-1]:.2f} << mean {mean_all:.2f}"
54
+ return False, ""
55
+
56
+
57
+ def _detect_regime_dependency(yearly: list[float], start_year: int = 2019) -> tuple[bool, str, str | None]:
58
+ """Detect regime dependency using IQR-based outlier detection (robust for small N)."""
59
+ n = len(yearly)
60
+ if n < 3:
61
+ return False, "", None
62
+
63
+ sorted_yearly = sorted(yearly)
64
+ q1 = sorted_yearly[n // 4] if n >= 4 else sorted_yearly[0]
65
+ q3 = sorted_yearly[(3 * n) // 4] if n >= 4 else sorted_yearly[-1]
66
+ iqr = q3 - q1
67
+
68
+ # Use 1.5 * IQR rule (more robust than 2Οƒ for small N)
69
+ lower_bound = q1 - 1.5 * iqr
70
+ upper_bound = q3 + 1.5 * iqr
71
+
72
+ outliers = []
73
+ for i, s in enumerate(yearly):
74
+ if s < lower_bound or s > upper_bound:
75
+ outliers.append(i)
76
+
77
+ if outliers:
78
+ outlier_years = [start_year + i for i in outliers]
79
+ best_year = start_year + yearly.index(max(yearly))
80
+ worst_year = start_year + yearly.index(min(yearly))
81
+ return True, f"Regime dependency: year(s) {outlier_years} are outliers (IQR method)", f"best={best_year}, worst={worst_year}"
82
+
83
+ return False, "", None
84
+
85
+
86
  async def diagnose_performance(
87
  llm: LLMClient,
88
  metrics: BrainMetrics,
 
93
  ) -> SurgeonResult:
94
  """
95
  Diagnose alpha performance and recommend next action.
96
+ Uses robust heuristics for small sample sizes (N=5 typical).
97
  """
98
  yearly = metrics.yearly_sharpe
99
+ n_years = len(yearly)
100
 
101
  # ─── Deterministic checks first (no LLM needed) ───────────────
102
 
103
+ # 1. Sign error detector
104
+ sign_error, sign_reason = _detect_sign_error(yearly)
105
+ if sign_error:
106
  return SurgeonResult(
107
  regime_dependent=False,
108
  decay_detected=False,
 
110
  dominant_regime=None,
111
  iteration_suggestion="Likely sign error in β‰₯1 component. Run sign sweep on individual components. If iteration > 2, concentrate on dominant component only.",
112
  verdict=Verdict.ITERATE if iteration < 3 else Verdict.KILL,
113
+ reason=sign_reason,
114
  )
115
 
116
+ # 2. Zero-information detector: flipped signs but Sharpe barely moved
117
  if previous_sharpe is not None and previous_sign_flips >= 2:
118
  sharpe_delta = abs(metrics.sharpe_os - previous_sharpe)
119
  if sharpe_delta < 0.1:
 
127
  reason=f"Sign-flip moved Sharpe by only {sharpe_delta:.2f} β€” zero-information components",
128
  )
129
 
130
+ # 3. Decay detector
131
+ decay_detected, decay_reason = _detect_decay(yearly)
132
+ if decay_detected:
133
+ return SurgeonResult(
134
+ regime_dependent=False,
135
+ decay_detected=True,
136
+ sign_error_likely=False,
137
+ dominant_regime=None,
138
+ iteration_suggestion="Signal is decaying. Consider shorter lookback horizon or adding ts_decay_linear. May be a crowded anomaly losing edge.",
139
+ verdict=Verdict.ITERATE if metrics.sharpe_os > 0.5 else Verdict.KILL,
140
+ reason=decay_reason,
141
+ )
142
+
143
+ # 4. Regime dependency (IQR-based, robust for small N)
144
+ regime_dep, regime_reason, dominant_regime = _detect_regime_dependency(yearly)
145
+ if regime_dep:
146
+ return SurgeonResult(
147
+ regime_dependent=True,
148
+ decay_detected=False,
149
+ sign_error_likely=False,
150
+ dominant_regime=dominant_regime,
151
+ iteration_suggestion="Alpha is regime-dependent. Consider adding a regime filter (trade_when) or accept lower allocation weight.",
152
+ verdict=Verdict.ITERATE if metrics.sharpe_os > 1.0 else Verdict.KILL,
153
+ reason=regime_reason,
154
+ )
155
 
156
+ # 5. Strong alpha β€” promote immediately
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  if metrics.sharpe_os >= 1.25:
158
  return SurgeonResult(
159
  regime_dependent=False,
 
165
  reason=f"Sharpe OS = {metrics.sharpe_os:.2f} β‰₯ 1.25, no pathologies detected",
166
  )
167
 
168
+ # 6. Borderline case β€” use LLM for nuanced diagnosis
169
  user_prompt = f"""Diagnose this alpha's performance:
170
 
171
  Sharpe (full): {metrics.sharpe_full:.3f}