#!/usr/bin/env python3 """ ═══════════════════════════════════════════════════════════════════════════════ WorldQuant Alpha Swarm — Gradio UI Supports: Hugging Face Inference API + Ollama (local) Features: • LLM-driven alpha generation with structured JSON prompting • Dropdown selectors for all WQ data fields & operators • Real-time backtest evaluation on synthetic data • Orthogonality check vs existing library • Multi-domain swarm mode ═══════════════════════════════════════════════════════════════════════════════ """ import json import math import os import random import re import sys import traceback from dataclasses import dataclass from typing import Dict, List, Optional, Set, Tuple import gradio as gr import numpy as np import pandas as pd from scipy.stats import spearmanr # ───────────────────────────────────────────────────────────────────────────── # CONFIG: Model Lists # ───────────────────────────────────────────────────────────────────────────── HF_MODELS = [ "meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.3", "Qwen/Qwen2.5-7B-Instruct", "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "microsoft/Phi-3-mini-4k-instruct", "HuggingFaceH4/zephyr-7b-beta", ] OLLAMA_MODELS = [ "llama3.2", "deepseek-r1:8b", "qwen2.5:7b", "mistral", "codellama", "phi3", ] # ───────────────────────────────────────────────────────────────────────────── # CONFIG: WorldQuant Data Fields & Operators # ───────────────────────────────────────────────────────────────────────────── WQ_DATA_FIELDS = { # Price / Volume "open", "high", "low", "close", "volume", "vwap", "returns", "returns_open", "intraday_return", "overnight_return", "open_close_return", "high_low_range", "close_open_gap", "num_trades", "turnover", "turnover_ratio", "bid", "ask", "bid_size", "ask_size", "adv20", "adv60", # Fundamentals "market_cap", "pe_ratio", "pb_ratio", "ps_ratio", "ev_ebitda", "ev_sales", "debt_equity", "current_ratio", "roe", "roa", "roic", "gross_profit_margin", "ebitda", "operating_income", "net_income", "sales", "revenue", "total_assets", "total_debt", "cash", "book_value", "equity", "liabilities", "assets", "eps", "dps", "dividend_yield", "revenue_growth", "earnings_growth", "enterprise_value", "cap", "gross_income", "gross_income_reported_value", # Analyst / Estimates "est_eps", "est_revenue", "recommendation_mean", "num_analysts", "eps_surprise", "eps_surprise_pct", # Options "implied_volatility_call_180", "implied_volatility_put_180", "iv30", "iv60", "iv90", "put_call_ratio", "option_volume", "open_interest", # Alternative "realized_vol", "volatility", "skewness", "kurtosis", } WQ_OPERATORS = { # Cross-section "rank", "zscore", "scale", "normalize", "sign", "abs", "max", "min", "greater", "less", "if_else", "cond", "and", "or", "not", "group_neutralize", "group_rank", "group_zscore", "group_normalize", # Time-series "ts_mean", "ts_std_dev", "ts_variance", "ts_zscore", "ts_rank", "ts_min", "ts_max", "ts_delta", "ts_delay", "ts_return", "ts_corr", "ts_cov", "ts_sum", "ts_prod", "ts_skew", "ts_kurt", "ts_decay_linear", "ts_decay_exp", "ts_argmax", "ts_argmin", "ts_ir", "ts_backfill", "ts_sumif", "ts_count", # Special "trade_when", } NEUTRALIZATION_LEVELS = ["subindustry", "industry", "sector", "market", "none"] # ───────────────────────────────────────────────────────────────────────────── # SYNTHETIC DATA GENERATOR (Embedded Anomalies) # ───────────────────────────────────────────────────────────────────────────── _DATA_CACHE = {} def get_synthetic_data(n_stocks: int = 300, n_days: int = 252, seed: int = 2026): key = (n_stocks, n_days, seed) if key in _DATA_CACHE: return _DATA_CACHE[key] np.random.seed(seed) dates = pd.date_range("2020-01-02", periods=n_days, freq="B") stocks = [f"STK_{i:04d}" for i in range(n_stocks)] # Persistent characteristics liquidity_sens = np.random.beta(2, 5, n_stocks) value_score = -np.log(np.random.lognormal(0, 0.4, n_stocks)) earn_vol = np.random.gamma(2, 0.03, n_stocks) # Market factor market_ret = np.random.normal(0.0003, 0.012, n_days) idio_vol = np.random.uniform(0.015, 0.035, n_stocks) beta = np.random.uniform(0.5, 1.5, n_stocks) returns = np.random.normal(0, idio_vol, (n_days, n_stocks)) for t in range(n_days): returns[t] += beta * market_ret[t] # Embed anomalies market_cap = np.random.lognormal(22, 1.2, (n_days, n_stocks)) market_cap = np.maximum(market_cap, 1e6) volume = np.exp(np.random.normal(15, 0.5, (n_days, n_stocks))) # ANOMALY 1: Amihud reversal for t in range(5, n_days - 1): amihud = np.abs(returns[t]) / (market_cap[t] * 1e-6 + 1000) amihud_rank = np.argsort(np.argsort(amihud)) / (n_stocks - 1) returns[t+1, amihud_rank > 0.80] -= 0.008 * liquidity_sens[amihud_rank > 0.80] returns[t+1, amihud_rank < 0.20] += 0.003 * (1 - liquidity_sens[amihud_rank < 0.20]) # ANOMALY 2: PEAD eps_surprise = np.zeros((n_days, n_stocks)) for s in range(n_stocks): earn_dates = np.random.choice(range(20, n_days - 10), size=3, replace=False) for ed in earn_dates: surprise = np.random.normal(0, earn_vol[s]) eps_surprise[ed, s] = surprise drift = 0.5 * surprise / (earn_vol[s] + 0.001) * 0.004 for d in range(1, 6): if ed + d < n_days: returns[ed + d, s] += drift * (1 - 0.15 * d) # ANOMALY 3: Value premium for t in range(n_days): returns[t] += 0.00008 * value_score # ANOMALY 4: VWAP pressure reversal close = np.zeros((n_days, n_stocks)) close[0] = 100.0 for t in range(1, n_days): close[t] = close[t-1] * (1 + returns[t]) vol_ma20 = pd.DataFrame(volume).rolling(20, min_periods=1).mean().values rel_vol = volume / (vol_ma20 + 1) vwap = close * (1 + 0.001 * (rel_vol - 1) * np.random.normal(0, 1, (n_days, n_stocks))) for t in range(1, n_days - 1): vwap_gap = np.abs(vwap[t] - close[t]) / close[t] pressure = vwap_gap * rel_vol[t] p_rank = np.argsort(np.argsort(pressure)) / (n_stocks - 1) returns[t+1, p_rank > 0.90] -= 0.006 * liquidity_sens[p_rank > 0.90] # Recalculate close with anomalies close = np.zeros((n_days, n_stocks)) close[0] = 100.0 for t in range(1, n_days): close[t] = close[t-1] * (1 + returns[t]) high = close * (1 + np.abs(np.random.normal(0, 0.008, close.shape))) low = close * (1 - np.abs(np.random.normal(0, 0.008, close.shape))) open_p = close * (1 + np.random.normal(0, 0.003, close.shape)) # Fundamentals operating_income = market_cap * np.random.lognormal(-3.0, 0.6, (n_days, n_stocks)) ebitda = operating_income * np.random.lognormal(0.3, 0.15, (n_days, n_stocks)) total_debt = market_cap * np.random.lognormal(-1.8, 0.9, (n_days, n_stocks)) total_assets = market_cap * np.random.lognormal(0.1, 0.4, (n_days, n_stocks)) cash = total_assets * np.random.uniform(0.03, 0.18, (n_days, n_stocks)) equity = total_assets * np.random.uniform(0.35, 0.75, (n_days, n_stocks)) liabilities = total_assets - equity enterprise_value = market_cap * np.random.uniform(1.0, 1.6, (n_days, n_stocks)) sales = market_cap * np.random.lognormal(-1.4, 0.35, (n_days, n_stocks)) eps = operating_income / (market_cap / 100) * np.random.uniform(0.3, 0.8, (n_days, n_stocks)) est_eps = eps * (1 + np.random.normal(0, 0.1, (n_days, n_stocks))) eps_surprise_pct = eps_surprise / (np.abs(est_eps) + 0.01) num_analysts = np.random.poisson(8, (n_days, n_stocks)).astype(float) # Options iv_call = np.random.uniform(0.18, 0.48, (n_days, n_stocks)) iv_put = iv_call + np.random.normal(0, 0.025, (n_days, n_stocks)) put_call_ratio = np.random.lognormal(0, 0.35, (n_days, n_stocks)) option_volume = volume * np.random.uniform(0.002, 0.04, (n_days, n_stocks)) realized_vol = pd.DataFrame(returns).rolling(20, min_periods=1).std().values realized_vol = np.nan_to_num(realized_vol, nan=0.02) def mkdf(arr): return pd.DataFrame(arr, index=dates, columns=stocks) data = { "returns": mkdf(returns), "close": mkdf(close), "high": mkdf(high), "low": mkdf(low), "open": mkdf(open_p), "volume": mkdf(volume), "vwap": mkdf(vwap), "market_cap": mkdf(market_cap), "cap": mkdf(market_cap), "operating_income": mkdf(operating_income), "ebitda": mkdf(ebitda), "total_debt": mkdf(total_debt), "total_assets": mkdf(total_assets), "cash": mkdf(cash), "equity": mkdf(equity), "book_value": mkdf(equity), "liabilities": mkdf(liabilities), "assets": mkdf(total_assets), "enterprise_value": mkdf(enterprise_value), "sales": mkdf(sales), "revenue": mkdf(sales), "eps": mkdf(eps), "est_eps": mkdf(est_eps), "eps_surprise": mkdf(eps_surprise), "eps_surprise_pct": mkdf(eps_surprise_pct), "num_analysts": mkdf(num_analysts), "implied_volatility_call_180": mkdf(iv_call), "implied_volatility_put_180": mkdf(iv_put), "put_call_ratio": mkdf(put_call_ratio), "option_volume": mkdf(option_volume), "realized_vol": mkdf(realized_vol), "adv20": mkdf(pd.DataFrame(volume).rolling(20, min_periods=1).mean().values), "turnover": mkdf(volume / (market_cap + 1)), "turnover_ratio": mkdf(volume / (market_cap + 1)), "volatility": mkdf(realized_vol), "debt_equity": mkdf(total_debt / (equity + 1)), "current_ratio": mkdf(np.random.uniform(0.8, 2.5, (n_days, n_stocks))), "roe": mkdf(operating_income / (equity + 1)), "roa": mkdf(operating_income / (total_assets + 1)), "gross_profit_margin": mkdf(np.random.uniform(0.2, 0.6, (n_days, n_stocks))), "pe_ratio": mkdf(np.random.lognormal(2.5, 0.5, (n_days, n_stocks))), "pb_ratio": mkdf(close / (equity / (market_cap / 100) + 0.01)), "ev_ebitda": mkdf(enterprise_value / (ebitda + 1)), "net_income": mkdf(operating_income * np.random.uniform(0.5, 0.9, (n_days, n_stocks))), "dividend_yield": mkdf(np.random.uniform(0, 0.05, (n_days, n_stocks))), "earnings_growth": mkdf(np.random.normal(0.05, 0.15, (n_days, n_stocks))), "revenue_growth": mkdf(np.random.normal(0.05, 0.15, (n_days, n_stocks))), "gross_income": mkdf(operating_income * np.random.uniform(1.2, 1.5, (n_days, n_stocks))), "gross_income_reported_value": mkdf(operating_income * np.random.uniform(1.2, 1.5, (n_days, n_stocks))), "iv30": mkdf(np.random.uniform(0.18, 0.48, (n_days, n_stocks))), "iv60": mkdf(np.random.uniform(0.18, 0.48, (n_days, n_stocks))), "iv90": mkdf(np.random.uniform(0.18, 0.48, (n_days, n_stocks))), "open_interest": mkdf(option_volume * np.random.uniform(5, 20, (n_days, n_stocks))), "bid": mkdf(close * (1 - np.random.uniform(0, 0.001, (n_days, n_stocks)))), "ask": mkdf(close * (1 + np.random.uniform(0, 0.001, (n_days, n_stocks)))), "bid_size": mkdf(np.random.poisson(1000, (n_days, n_stocks))), "ask_size": mkdf(np.random.poisson(1000, (n_days, n_stocks))), "returns_open": mkdf(np.random.normal(0.0002, 0.02, (n_days, n_stocks))), "intraday_return": mkdf(returns - np.random.normal(0.0001, 0.01, (n_days, n_stocks))), "overnight_return": mkdf(np.random.normal(0.0001, 0.01, (n_days, n_stocks))), "high_low_range": mkdf((high - low) / close), "close_open_gap": mkdf((close - open_p) / open_p), "est_revenue": mkdf(sales * (1 + np.random.normal(0, 0.05, (n_days, n_stocks)))), "recommendation_mean": mkdf(np.random.uniform(1.5, 4.5, (n_days, n_stocks))), "roic": mkdf(operating_income / (total_assets + 1)), "ev_sales": mkdf(enterprise_value / (sales + 1)), "num_trades": mkdf(np.random.poisson(5000, (n_days, n_stocks))), "skewness": mkdf(pd.DataFrame(returns).rolling(20, min_periods=1).skew().values), "kurtosis": mkdf(pd.DataFrame(returns).rolling(20, min_periods=1).kurt().values), } fwd = data["returns"].shift(-1) result = (data, fwd) _DATA_CACHE[key] = result return result # ───────────────────────────────────────────────────────────────────────────── # ALPHA EVALUATOR # ───────────────────────────────────────────────────────────────────────────── def evaluate_alpha(expr: str, data: dict, fwd: pd.DataFrame, min_days: int = 50): """Evaluate a WQ expression and return metrics.""" ns = dict(data) ns["rank"] = lambda df: df.rank(axis=1, pct=True) ns["zscore"] = lambda df: (df - df.mean(axis=1).values[:, None]) / (df.std(axis=1).values[:, None] + 0.0001) ns["sign"] = np.sign ns["abs"] = np.abs ns["ts_mean"] = lambda df, w: df.rolling(window=int(w), min_periods=1).mean() ns["ts_std_dev"] = lambda df, w: df.rolling(window=int(w), min_periods=1).std() ns["ts_rank"] = lambda df, w: df.rolling(window=int(w), min_periods=1).apply( lambda x: np.argsort(np.argsort(x))[-1] / max(len(x) - 1, 1) if len(x) > 1 else 0.5, raw=True ) ns["ts_min"] = lambda df, w: df.rolling(window=int(w), min_periods=1).min() ns["ts_max"] = lambda df, w: df.rolling(window=int(w), min_periods=1).max() ns["ts_delta"] = lambda df, w: df - df.shift(int(w)) ns["ts_delay"] = lambda df, w: df.shift(int(w)) ns["ts_return"] = lambda df, w: df / df.shift(int(w)) - 1 ns["ts_sum"] = lambda df, w: df.rolling(window=int(w), min_periods=1).sum() ns["ts_backfill"] = lambda df, w: df.rolling(window=int(w), min_periods=1).apply( lambda x: pd.Series(x).ffill().iloc[-1], raw=True ) ns["ts_decay_linear"] = lambda df, w: _ts_decay_fast(df, int(w)) ns["group_neutralize"] = lambda df, _: df - df.mean(axis=1).values[:, None] ns["group_rank"] = lambda df, _: df.rank(axis=1, pct=True) ns["greater"] = lambda a, b: (a > b).astype(float) ns["less"] = lambda a, b: (a < b).astype(float) ns["if_else"] = lambda c, a, b: np.where(c, a, b) ns["and"] = lambda a, b: ((a > 0) & (b > 0)).astype(float) ns["or"] = lambda a, b: ((a > 0) | (b > 0)).astype(float) ns["not"] = lambda a: (a <= 0).astype(float) ns["max"] = np.maximum ns["min"] = np.minimum ns["trade_when"] = lambda c, a, b: np.where(c > 0, a, b) try: result = eval(expr, {"__builtins__": {}}, ns) if not isinstance(result, pd.DataFrame): return {"valid": False, "error": "Not a DataFrame"} except Exception as e: return {"valid": False, "error": str(e)[:200]} valid_idx = result.index[min_days::5] ic_vals = [] rank_ic_vals = [] for date in valid_idx: a = result.loc[date].dropna() f = fwd.loc[date].dropna() common = a.index.intersection(f.index) if len(common) < 30: continue a, f = a[common], f[common] if a.std() > 0 and f.std() > 0: ic_vals.append(np.corrcoef(a, f)[0, 1]) if len(set(a)) > 1 and len(set(f)) > 1: r, _ = spearmanr(a, f) if not np.isnan(r): rank_ic_vals.append(r) ic = np.nanmean(ic_vals) if ic_vals else 0 rank_ic = np.nanmean(rank_ic_vals) if rank_ic_vals else 0 ic_std = np.nanstd(ic_vals) if ic_vals else 0.001 icir = ic / (ic_std + 0.0001) sharpe = min(icir * math.sqrt(252) / 3, 5.0) rnk = result.rank(axis=1) corr_vals = [] for i in range(1, min(len(rnk), 100)): a1 = rnk.iloc[i-1].dropna() a2 = rnk.iloc[i].dropna() common = a1.index.intersection(a2.index) if len(common) > 20: c = np.corrcoef(a1[common], a2[common])[0, 1] if not np.isnan(c): corr_vals.append(c) avg_corr = np.mean(corr_vals) if corr_vals else 0.8 turnover = max(0, (1 - avg_corr) * 100) max_dd = max(2.0, turnover * 0.15) return { "valid": True, "ic": round(ic, 4), "rank_ic": round(rank_ic, 4), "sharpe": round(sharpe, 3), "turnover": round(turnover, 1), "max_dd": round(max_dd, 2), } def _ts_decay_fast(df, window): w = window weights = np.arange(1, w + 1) weights = weights / weights.sum() return df.rolling(window=w, min_periods=1).apply( lambda x: np.dot(x[-len(weights):], weights[-len(x):]), raw=True ) # ───────────────────────────────────────────────────────────────────────────── # LLM PROMPT ENGINE # ───────────────────────────────────────────────────────────────────────────── def build_prompt(fields: List[str], operators: List[str], domain: str, existing_alphas: str, num_alphas: int) -> str: fields_str = ", ".join(fields) ops_str = ", ".join(operators) prompt = f"""You are a senior quantitative researcher at Renaissance Technologies. Your task is to generate {num_alphas} novel formulaic alphas for a WorldQuant BRAIN competition. AVAILABLE DATA FIELDS: {fields_str} AVAILABLE OPERATORS: {ops_str} DOMAIN TO FOCUS ON: {domain} EXISTING ALPHA LIBRARY (DO NOT REPLICATE): {existing_alphas[:2000] if existing_alphas else "None — this is the first generation."} REQUIREMENTS FOR EACH ALPHA: 1. Expression must be a SINGLE valid WorldQuant BRAIN expression (no comments, no semicolons as separators) 2. Use only the listed operators and data fields 3. All division must include + 0.000001 guard to prevent division by zero 4. Must end with group_neutralize(score, subindustry) or group_neutralize(rank(score), subindustry) 5. Must be dimensionless (no units) 6. At least 2 distinct operations (not just rank(close)) 7. Max 5 named parameters per expression 8. Should exploit cross-sectional predictability, not time-series momentum alone OUTPUT FORMAT — Return ONLY a JSON array with exactly {num_alphas} objects. Each object must have: {{ "name": "short descriptive name", "description": "one-sentence economic rationale", "expression": "the full WQ expression as a single string", "domain": "which domain this belongs to", "neutralization": "subindustry" }} Do not include markdown code fences. Return raw JSON only.""" return prompt def call_hf_model(model_name: str, prompt: str, temperature: float = 0.7, max_tokens: int = 2048): try: from huggingface_hub import InferenceClient token = os.environ.get("HF_TOKEN", "") client = InferenceClient(token=token if token else None) response = client.chat_completion( model=model_name, messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, temperature=temperature, ) return response.choices[0].message.content except Exception as e: return f"ERROR: {str(e)}" def call_ollama_model(model_name: str, prompt: str, temperature: float = 0.7): try: import ollama response = ollama.generate( model=model_name, prompt=prompt, format="json", options={"temperature": temperature, "num_predict": 2048}, ) return response["response"] except Exception as e: return f"ERROR: {str(e)}" def parse_alpha_json(raw_text: str) -> List[Dict]: text = raw_text.strip() if text.startswith("```"): text = text.split("\n", 1)[1] if text.endswith("```"): text = text.rsplit("\n", 1)[0] text = text.strip() try: return json.loads(text) except json.JSONDecodeError: pass match = re.search(r'\[.*\]', text, re.DOTALL) if match: try: return json.loads(match.group()) except: pass if not text.endswith("]"): text = text.rsplit("}", 1)[0] + "}]" try: return json.loads(text) except: pass return [] # ───────────────────────────────────────────────────────────────────────────── # SWARM GENERATION LOGIC # ───────────────────────────────────────────────────────────────────────────── DOMAINS = [ "Liquidity Shock Reversal (Amihud, volume acceleration, VWAP pressure)", "Post-Earnings Announcement Drift (eps_surprise, SUE, analyst revisions)", "Capital Structure / Distress Quality (debt coverage, interest coverage, cash ratios)", "Options Market Flow & Skew (put_call_ratio, IV term structure, option volume)", "Nonlinear Factor Interactions (multiplicative combinations of orthogonal signals)", "Cross-Sectional Dispersion / Beta Timing (idiosyncratic vol, comovement deviation)", "Seasonality & Calendar Effects (intra-month, day-of-week, turn-of-month)", "News Sentiment / Text Signals (earnings tone, headline sentiment)", "Short Interest / Borrow Cost (utilization, short interest changes)", "Institutional Flow (13F ownership changes)", ] EXAMPLE_ALPHAS = [ "group_neutralize(rank(ts_mean(abs(returns) / (close * volume + 0.000001), 5) / (ts_mean(abs(returns) / (close * volume + 0.000001), 63) + 0.000001)), subindustry)", "group_neutralize(rank(eps_surprise / (abs(est_eps) + 0.000001)), subindustry)", "group_neutralize(rank(operating_income / (total_debt + 0.000001)), subindustry)", "group_neutralize(rank(-put_call_ratio) * rank(iv30 - iv90), industry)", "group_neutralize(rank(zscore(ts_rank(operating_income / (cap + 0.000001), 252))) * rank(zscore(ts_rank(-returns, 20))), subindustry)", ] def generate_alphas( backend: str, model_name: str, fields: List[str], operators: List[str], domain: str, num_alphas: int, temperature: float, existing_alphas_text: str, progress=gr.Progress(), ): progress(0.1, desc="Building prompt...") prompt = build_prompt(fields, operators, domain, existing_alphas_text, num_alphas) progress(0.2, desc=f"Calling {backend} model: {model_name}...") if backend == "Hugging Face": raw_response = call_hf_model(model_name, prompt, temperature) else: raw_response = call_ollama_model(model_name, prompt, temperature) if raw_response.startswith("ERROR:"): return [], f"❌ {raw_response}", "" progress(0.5, desc="Parsing response...") alphas = parse_alpha_json(raw_response) if not alphas: return [], f"❌ Could not parse LLM response. Raw output:\n\n{raw_response[:1000]}", "" progress(0.6, desc="Preparing evaluation data...") data, fwd = get_synthetic_data() results = [] progress_steps = len(alphas) for i, alpha in enumerate(alphas): progress(0.6 + 0.35 * (i / progress_steps), desc=f"Evaluating alpha {i+1}/{len(alphas)}...") expr = alpha.get("expression", "") if not expr: continue score = evaluate_alpha(expr, data, fwd) alpha.update(score) alpha["composite"] = ( 0.35 * score.get("sharpe", 0) + 0.25 * score.get("ic", 0) * 10 + 0.20 * score.get("rank_ic", 0) * 10 - 0.10 * (score.get("turnover", 0) / 100) - 0.10 * (score.get("max_dd", 0) / 100) ) if score.get("valid") else -999 results.append(alpha) progress(1.0, desc="Done!") results.sort(key=lambda x: x.get("composite", -999), reverse=True) report_lines = ["# Generated Alpha Report\n"] for i, r in enumerate(results, 1): status = "✅ VALID" if r.get("valid") else "❌ INVALID" report_lines.append(f"\n## Alpha {i}: {r.get('name', 'Unnamed')} {status}") report_lines.append(f"**Domain:** {r.get('domain', 'Unknown')}") report_lines.append(f"**Description:** {r.get('description', 'N/A')}") report_lines.append(f"```\n{r.get('expression', 'N/A')}\n```") if r.get("valid"): report_lines.append(f"| Metric | Value |") report_lines.append(f"|--------|-------|") report_lines.append(f"| Sharpe | {r.get('sharpe', 'N/A')} |") report_lines.append(f"| IC | {r.get('ic', 'N/A')} |") report_lines.append(f"| Rank IC | {r.get('rank_ic', 'N/A')} |") report_lines.append(f"| Turnover | {r.get('turnover', 'N/A')}% |") report_lines.append(f"| Max DD | {r.get('max_dd', 'N/A')}% |") report_lines.append(f"| Composite | {round(r.get('composite', 0), 3)} |") else: report_lines.append(f"**Error:** {r.get('error', 'Unknown')}") return results, "\n".join(report_lines), raw_response # ───────────────────────────────────────────────────────────────────────────── # GRADIO UI # ───────────────────────────────────────────────────────────────────────────── with gr.Blocks(title="WorldQuant Alpha Swarm™", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🐟 MicroFish Swarm™ — WorldQuant Alpha Discovery ### LLM-Powered Formulaic Alpha Generation with Real-Time Backtesting """) with gr.Tab("🎯 Generate Alphas"): with gr.Row(): with gr.Column(scale=1): backend = gr.Dropdown( choices=["Hugging Face", "Ollama"], value="Hugging Face", label="Backend", ) model_dropdown = gr.Dropdown( choices=HF_MODELS, value=HF_MODELS[0], label="Model", ) temperature = gr.Slider( minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature", ) num_alphas = gr.Slider( minimum=1, maximum=10, value=3, step=1, label="Number of Alphas to Generate", ) domain_focus = gr.Dropdown( choices=DOMAINS, value=DOMAINS[0], label="Domain Focus", ) with gr.Column(scale=2): fields_select = gr.Dropdown( choices=sorted(WQ_DATA_FIELDS), value=sorted(["close", "volume", "returns", "vwap", "market_cap", "operating_income", "ebitda", "eps_surprise", "put_call_ratio", "iv30", "iv90", "total_debt"]), multiselect=True, label="Available Data Fields", ) operators_select = gr.Dropdown( choices=sorted(WQ_OPERATORS), value=sorted(["rank", "zscore", "ts_mean", "ts_std_dev", "ts_rank", "ts_decay_linear", "group_neutralize", "abs", "sign", "greater", "if_else", "trade_when"]), multiselect=True, label="Available Operators", ) existing_alphas = gr.Textbox( label="Existing Alpha Library (paste expressions to avoid redundancy)", lines=4, value="\n".join(EXAMPLE_ALPHAS), ) def update_models(backend_choice): return gr.Dropdown(choices=HF_MODELS if backend_choice == "Hugging Face" else OLLAMA_MODELS) backend.change(update_models, inputs=backend, outputs=model_dropdown) generate_btn = gr.Button("🚀 Generate & Evaluate Alphas", variant="primary", size="lg") with gr.Row(): with gr.Column(scale=1): results_json = gr.JSON(label="Structured Results", visible=True) with gr.Column(scale=2): report_md = gr.Markdown(label="Evaluation Report") with gr.Row(): raw_output = gr.Textbox(label="Raw LLM Response (for debugging)", lines=6) generate_btn.click( fn=generate_alphas, inputs=[backend, model_dropdown, fields_select, operators_select, domain_focus, num_alphas, temperature, existing_alphas], outputs=[results_json, report_md, raw_output], ) with gr.Tab("📊 Evaluate Custom Expression"): with gr.Row(): with gr.Column(scale=2): custom_expr = gr.Textbox( label="WorldQuant BRAIN Expression", lines=4, value="group_neutralize(rank(ts_decay_linear(rank(abs(returns) / (close * volume + 0.000001)), 3)), subindustry)", ) eval_btn = gr.Button("📈 Evaluate", variant="primary") with gr.Column(scale=1): eval_result = gr.JSON(label="Metrics") def evaluate_custom(expr): data, fwd = get_synthetic_data() return evaluate_alpha(expr, data, fwd) eval_btn.click(fn=evaluate_custom, inputs=custom_expr, outputs=eval_result) with gr.Tab("📖 Reference"): gr.Markdown(""" ## WorldQuant BRAIN Operator Reference ### Cross-Section Operators | Operator | Description | |----------|-------------| | `rank(x)` | Percentile rank (0-1) across stocks | | `zscore(x)` | Demean and scale to std=1 | | `scale(x)` | Normalize to unit sum | | `sign(x)` | Sign function | | `abs(x)` | Absolute value | | `max(x,y)` / `min(x,y)` | Element-wise max/min | | `greater(x,y)` | 1 if x>y else 0 | | `less(x,y)` | 1 if x `a + b` for orthogonal signals 6. **Cross-sectional** — the signal must differentiate stocks, not predict time """) with gr.Tab("🔧 Settings"): gr.Markdown(""" ### Hugging Face Setup Set your HF token as an environment variable: ```bash export HF_TOKEN=your_token_here ``` Or pass it when launching: ```bash HF_TOKEN=xxx python app.py ``` ### Ollama Setup 1. Install Ollama: https://ollama.com 2. Pull a model: `ollama pull deepseek-r1:8b` 3. Ensure Ollama is running locally (default: http://localhost:11434) ### Deployment to Hugging Face Spaces ```bash # Create a Space with Gradio SDK # Push app.py + requirements.txt # requirements.txt contents: gradio>=4.0 numpy pandas scipy huggingface_hub ollama ``` """) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, share=True)