gaurv007 commited on
Commit
0d00c1c
·
verified ·
1 Parent(s): 46ac2bf

Upload alpha_factory/deterministic/theme_sampler.py with huggingface_hub

Browse files
alpha_factory/deterministic/theme_sampler.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Theme Sampler — deterministic gap analysis.
3
+ Picks under-explored themes from the factor store.
4
+ """
5
+ import math
6
+ from collections import Counter
7
+ from typing import Optional
8
+ from ..schemas import AnomalyTag
9
+
10
+
11
+ # Theme definitions mapped to BRAIN field families
12
+ THEME_FIELDS = {
13
+ "value_quality": ["book_to_price", "earnings_yield", "roe", "roa", "debt_to_equity", "current_ratio"],
14
+ "momentum": ["close", "returns", "volume", "ts_returns", "high", "low"],
15
+ "reversal": ["close", "returns", "volume", "bid_ask_spread"],
16
+ "volatility": ["volatility", "ivol", "beta", "hv", "atr"],
17
+ "analyst": ["analyst_rating", "estimate_revision", "target_price", "recommendation"],
18
+ "sentiment_social": ["sentiment", "social_volume", "social_score", "news_sentiment"],
19
+ "option_surface": ["iv30", "iv60", "iv90", "iv180", "pcr", "skew", "term_structure"],
20
+ "earnings_event": ["earnings_surprise", "post_earnings_drift", "guidance"],
21
+ "liquidity_micro": ["bid_ask_spread", "volume", "turnover", "amihud_illiquidity"],
22
+ "growth": ["revenue_growth", "earnings_growth", "asset_growth", "sales_growth"],
23
+ "intraday": ["open", "high", "low", "close", "vwap", "intraday_range"],
24
+ "fundamental_yield": ["dividend_yield", "buyback_yield", "shareholder_yield", "fcf_yield"],
25
+ }
26
+
27
+ # Known archetypes from the existing 18-alpha library
28
+ PROVEN_ARCHETYPES = [
29
+ "value_quality_blend",
30
+ "intraday_mr_decay",
31
+ "vol_scaled_shock",
32
+ "pead_revisions",
33
+ "skew_term",
34
+ "social_momentum",
35
+ "multi_horizon_mr",
36
+ "fundamental_yield_composite",
37
+ ]
38
+
39
+
40
+ def compute_gap_scores(
41
+ existing_themes: list[str],
42
+ existing_anomaly_tags: list[str],
43
+ dead_themes: Optional[list[str]] = None,
44
+ ) -> list[tuple[str, float]]:
45
+ """
46
+ Rank themes by how under-explored they are.
47
+
48
+ gap_score = log(field_count) - 2 * log(1 + alphas_in_theme) - dead_penalty
49
+
50
+ Higher score = bigger opportunity.
51
+ """
52
+ theme_counts = Counter(existing_themes)
53
+ anomaly_counts = Counter(existing_anomaly_tags)
54
+ dead_set = set(dead_themes or [])
55
+
56
+ scores = []
57
+ for theme, fields in THEME_FIELDS.items():
58
+ if theme in dead_set:
59
+ continue # Skip dead themes (§11.5)
60
+
61
+ field_count = len(fields)
62
+ alpha_count = theme_counts.get(theme, 0)
63
+
64
+ gap = math.log(field_count + 1) - 2 * math.log(1 + alpha_count)
65
+
66
+ # Bonus if the anomaly tag is under-represented
67
+ tag = _theme_to_tag(theme)
68
+ tag_count = anomaly_counts.get(tag, 0)
69
+ if tag_count < 2:
70
+ gap += 0.5 # novelty bonus
71
+
72
+ scores.append((theme, gap))
73
+
74
+ # Sort descending
75
+ scores.sort(key=lambda x: -x[1])
76
+ return scores
77
+
78
+
79
+ def pick_theme(
80
+ existing_themes: list[str],
81
+ existing_anomaly_tags: list[str],
82
+ dead_themes: Optional[list[str]] = None,
83
+ top_k: int = 3,
84
+ ) -> str:
85
+ """Pick the best theme to explore next (highest gap score)."""
86
+ import random
87
+ scores = compute_gap_scores(existing_themes, existing_anomaly_tags, dead_themes)
88
+ # Sample from top-k for diversity
89
+ top = scores[:top_k]
90
+ if not top:
91
+ return random.choice(list(THEME_FIELDS.keys()))
92
+ return random.choice(top)[0]
93
+
94
+
95
+ def _theme_to_tag(theme: str) -> str:
96
+ """Map theme name to AnomalyTag value."""
97
+ mapping = {
98
+ "value_quality": "value",
99
+ "momentum": "momentum",
100
+ "reversal": "reversal",
101
+ "volatility": "low_vol",
102
+ "analyst": "analyst",
103
+ "sentiment_social": "sentiment",
104
+ "option_surface": "option_surface",
105
+ "earnings_event": "pead",
106
+ "liquidity_micro": "liquidity",
107
+ "growth": "fundamental",
108
+ "intraday": "technical",
109
+ "fundamental_yield": "fundamental",
110
+ }
111
+ return mapping.get(theme, "other")