Upload alpha_factory/deterministic/theme_sampler.py with huggingface_hub
Browse files
alpha_factory/deterministic/theme_sampler.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Theme Sampler — deterministic gap analysis.
|
| 3 |
+
Picks under-explored themes from the factor store.
|
| 4 |
+
"""
|
| 5 |
+
import math
|
| 6 |
+
from collections import Counter
|
| 7 |
+
from typing import Optional
|
| 8 |
+
from ..schemas import AnomalyTag
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# Theme definitions mapped to BRAIN field families
|
| 12 |
+
THEME_FIELDS = {
|
| 13 |
+
"value_quality": ["book_to_price", "earnings_yield", "roe", "roa", "debt_to_equity", "current_ratio"],
|
| 14 |
+
"momentum": ["close", "returns", "volume", "ts_returns", "high", "low"],
|
| 15 |
+
"reversal": ["close", "returns", "volume", "bid_ask_spread"],
|
| 16 |
+
"volatility": ["volatility", "ivol", "beta", "hv", "atr"],
|
| 17 |
+
"analyst": ["analyst_rating", "estimate_revision", "target_price", "recommendation"],
|
| 18 |
+
"sentiment_social": ["sentiment", "social_volume", "social_score", "news_sentiment"],
|
| 19 |
+
"option_surface": ["iv30", "iv60", "iv90", "iv180", "pcr", "skew", "term_structure"],
|
| 20 |
+
"earnings_event": ["earnings_surprise", "post_earnings_drift", "guidance"],
|
| 21 |
+
"liquidity_micro": ["bid_ask_spread", "volume", "turnover", "amihud_illiquidity"],
|
| 22 |
+
"growth": ["revenue_growth", "earnings_growth", "asset_growth", "sales_growth"],
|
| 23 |
+
"intraday": ["open", "high", "low", "close", "vwap", "intraday_range"],
|
| 24 |
+
"fundamental_yield": ["dividend_yield", "buyback_yield", "shareholder_yield", "fcf_yield"],
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
# Known archetypes from the existing 18-alpha library
|
| 28 |
+
PROVEN_ARCHETYPES = [
|
| 29 |
+
"value_quality_blend",
|
| 30 |
+
"intraday_mr_decay",
|
| 31 |
+
"vol_scaled_shock",
|
| 32 |
+
"pead_revisions",
|
| 33 |
+
"skew_term",
|
| 34 |
+
"social_momentum",
|
| 35 |
+
"multi_horizon_mr",
|
| 36 |
+
"fundamental_yield_composite",
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def compute_gap_scores(
|
| 41 |
+
existing_themes: list[str],
|
| 42 |
+
existing_anomaly_tags: list[str],
|
| 43 |
+
dead_themes: Optional[list[str]] = None,
|
| 44 |
+
) -> list[tuple[str, float]]:
|
| 45 |
+
"""
|
| 46 |
+
Rank themes by how under-explored they are.
|
| 47 |
+
|
| 48 |
+
gap_score = log(field_count) - 2 * log(1 + alphas_in_theme) - dead_penalty
|
| 49 |
+
|
| 50 |
+
Higher score = bigger opportunity.
|
| 51 |
+
"""
|
| 52 |
+
theme_counts = Counter(existing_themes)
|
| 53 |
+
anomaly_counts = Counter(existing_anomaly_tags)
|
| 54 |
+
dead_set = set(dead_themes or [])
|
| 55 |
+
|
| 56 |
+
scores = []
|
| 57 |
+
for theme, fields in THEME_FIELDS.items():
|
| 58 |
+
if theme in dead_set:
|
| 59 |
+
continue # Skip dead themes (§11.5)
|
| 60 |
+
|
| 61 |
+
field_count = len(fields)
|
| 62 |
+
alpha_count = theme_counts.get(theme, 0)
|
| 63 |
+
|
| 64 |
+
gap = math.log(field_count + 1) - 2 * math.log(1 + alpha_count)
|
| 65 |
+
|
| 66 |
+
# Bonus if the anomaly tag is under-represented
|
| 67 |
+
tag = _theme_to_tag(theme)
|
| 68 |
+
tag_count = anomaly_counts.get(tag, 0)
|
| 69 |
+
if tag_count < 2:
|
| 70 |
+
gap += 0.5 # novelty bonus
|
| 71 |
+
|
| 72 |
+
scores.append((theme, gap))
|
| 73 |
+
|
| 74 |
+
# Sort descending
|
| 75 |
+
scores.sort(key=lambda x: -x[1])
|
| 76 |
+
return scores
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def pick_theme(
|
| 80 |
+
existing_themes: list[str],
|
| 81 |
+
existing_anomaly_tags: list[str],
|
| 82 |
+
dead_themes: Optional[list[str]] = None,
|
| 83 |
+
top_k: int = 3,
|
| 84 |
+
) -> str:
|
| 85 |
+
"""Pick the best theme to explore next (highest gap score)."""
|
| 86 |
+
import random
|
| 87 |
+
scores = compute_gap_scores(existing_themes, existing_anomaly_tags, dead_themes)
|
| 88 |
+
# Sample from top-k for diversity
|
| 89 |
+
top = scores[:top_k]
|
| 90 |
+
if not top:
|
| 91 |
+
return random.choice(list(THEME_FIELDS.keys()))
|
| 92 |
+
return random.choice(top)[0]
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _theme_to_tag(theme: str) -> str:
|
| 96 |
+
"""Map theme name to AnomalyTag value."""
|
| 97 |
+
mapping = {
|
| 98 |
+
"value_quality": "value",
|
| 99 |
+
"momentum": "momentum",
|
| 100 |
+
"reversal": "reversal",
|
| 101 |
+
"volatility": "low_vol",
|
| 102 |
+
"analyst": "analyst",
|
| 103 |
+
"sentiment_social": "sentiment",
|
| 104 |
+
"option_surface": "option_surface",
|
| 105 |
+
"earnings_event": "pead",
|
| 106 |
+
"liquidity_micro": "liquidity",
|
| 107 |
+
"growth": "fundamental",
|
| 108 |
+
"intraday": "technical",
|
| 109 |
+
"fundamental_yield": "fundamental",
|
| 110 |
+
}
|
| 111 |
+
return mapping.get(theme, "other")
|