gaurv007's picture
Upload alpha_factory/local/brain_sim.py with huggingface_hub
653d726 verified
"""
Local BRAIN Simulator β€” Layer 4 (the killer feature).
Mimics BRAIN's IS tests locally using free price data.
Rejects obvious losers BEFORE spending BRAIN credits.
Saves 30-50% of submissions.
"""
import numpy as np
from dataclasses import dataclass
from typing import Optional
@dataclass
class LocalSimResult:
"""Result from local simulation."""
sharpe: float
turnover: float
returns: float
fitness: float
sub_universe_sharpe_p10: float
max_drawdown: float
would_pass_brain: bool
rejection_reasons: list[str]
def simulate_alpha_local(
signal_scores: np.ndarray,
returns: np.ndarray,
min_sharpe: float = 1.0,
min_fitness: float = 0.7,
max_turnover: float = 0.70,
min_turnover: float = 0.01,
) -> LocalSimResult:
"""
Run a local quick-and-dirty backtest to triage alphas before BRAIN submission.
Args:
signal_scores: (T, N) array β€” daily signal/score for each stock
returns: (T, N) array β€” daily returns for each stock
min_sharpe: minimum Sharpe to pass local sim
min_fitness: minimum fitness to pass local sim
max_turnover: maximum turnover allowed
min_turnover: minimum turnover (too low = no trading)
Returns:
LocalSimResult with pass/fail verdict
Note: BRAIN's actual prices differ from free sources by 5-15% Sharpe.
This is for TRIAGE only β€” tells you if you're in the ballpark.
"""
T, N = signal_scores.shape
rejection_reasons = []
# Normalize signals to weights (cross-sectional rank β†’ dollar-neutral)
weights = np.zeros_like(signal_scores)
for t in range(T):
row = signal_scores[t]
valid = ~np.isnan(row)
if valid.sum() < 50:
continue
ranked = np.zeros(N)
ranked[valid] = _rank_normalize(row[valid])
# Dollar-neutral: demean
ranked -= ranked.mean()
# Normalize to unit leverage
abs_sum = np.abs(ranked).sum()
if abs_sum > 0:
weights[t] = ranked / abs_sum
# PnL
# Shift weights by 1 day (you trade on today's signal, get tomorrow's return)
pnl = (weights[:-1] * returns[1:]).sum(axis=1)
if len(pnl) == 0 or np.std(pnl) == 0:
return LocalSimResult(
sharpe=0, turnover=0, returns=0, fitness=0,
sub_universe_sharpe_p10=0, max_drawdown=0,
would_pass_brain=False,
rejection_reasons=["No valid PnL computed"]
)
# Sharpe
sharpe = np.mean(pnl) / np.std(pnl) * np.sqrt(252)
# Turnover
weight_diffs = np.abs(weights[1:] - weights[:-1]).sum(axis=1)
weight_sums = np.abs(weights[:-1]).sum(axis=1)
valid_turns = weight_sums > 0
turnover = np.mean(weight_diffs[valid_turns] / weight_sums[valid_turns]) if valid_turns.any() else 0
# Returns
total_returns = pnl.sum()
# Fitness = Sharpe * sqrt(|returns| / turnover)
fitness = sharpe * np.sqrt(abs(total_returns) / max(turnover, 0.001)) if turnover > 0 else 0
# Max drawdown
cum_pnl = np.cumsum(pnl)
running_max = np.maximum.accumulate(cum_pnl)
drawdowns = running_max - cum_pnl
max_drawdown = drawdowns.max() if len(drawdowns) > 0 else 0
# Sub-universe Sharpe (simulate BRAIN's sub-universe check)
sub_sharpes = []
for _ in range(20):
idx = np.random.choice(N, size=min(1000, N), replace=False)
sub_pnl = (weights[:-1, idx] * returns[1:, idx]).sum(axis=1)
if np.std(sub_pnl) > 0:
sub_sharpes.append(np.mean(sub_pnl) / np.std(sub_pnl) * np.sqrt(252))
sub_p10 = np.percentile(sub_sharpes, 10) if sub_sharpes else 0
# Verdict
if sharpe < min_sharpe:
rejection_reasons.append(f"Sharpe {sharpe:.2f} < {min_sharpe}")
if fitness < min_fitness:
rejection_reasons.append(f"Fitness {fitness:.2f} < {min_fitness}")
if turnover > max_turnover:
rejection_reasons.append(f"Turnover {turnover:.2f} > {max_turnover}")
if turnover < min_turnover:
rejection_reasons.append(f"Turnover {turnover:.4f} < {min_turnover} (no trading)")
if sub_p10 < 0.2:
rejection_reasons.append(f"Sub-universe Sharpe p10 {sub_p10:.2f} < 0.2")
would_pass = len(rejection_reasons) == 0
return LocalSimResult(
sharpe=round(sharpe, 4),
turnover=round(turnover, 4),
returns=round(total_returns, 4),
fitness=round(fitness, 4),
sub_universe_sharpe_p10=round(sub_p10, 4),
max_drawdown=round(max_drawdown, 4),
would_pass_brain=would_pass,
rejection_reasons=rejection_reasons,
)
def correlation_with_returns(signal: np.ndarray, returns: np.ndarray) -> float:
"""
Layer 5: Quick correlation check.
If |corr| > 0.95 β†’ momentum mirror (kill).
If |corr| < 0.05 β†’ orthogonal to price (interesting).
"""
flat_signal = signal.flatten()
flat_returns = returns.flatten()
valid = ~(np.isnan(flat_signal) | np.isnan(flat_returns))
if valid.sum() < 100:
return 0.0
return float(np.corrcoef(flat_signal[valid], flat_returns[valid])[0, 1])
def sign_sweep_local(
signal: np.ndarray,
returns: np.ndarray,
) -> dict:
"""
Layer 3: Local sign sweep.
Test both directions of the alpha to determine correct sign.
"""
pos_result = simulate_alpha_local(signal, returns)
neg_result = simulate_alpha_local(-signal, returns)
info_value = abs(pos_result.sharpe - neg_result.sharpe)
verdict = "pos" if pos_result.sharpe > neg_result.sharpe else "neg"
return {
"pos_sharpe": pos_result.sharpe,
"neg_sharpe": neg_result.sharpe,
"info_value": round(info_value, 4),
"verdict": verdict,
"has_signal": info_value > 0.3,
}
def _rank_normalize(arr: np.ndarray) -> np.ndarray:
"""Convert values to ranks normalized to [-1, 1].
Uses numpy argsort instead of scipy to avoid the scipy dependency.
"""
n = len(arr)
# Get ranks (1-indexed)
ranks = np.empty(n, dtype=float)
# Handle ties by averaging
sorted_idx = np.argsort(arr)
ranks[sorted_idx] = np.arange(1, n + 1)
# Normalize to [-1, 1]
return 2 * (ranks - 1) / (n - 1) - 1