gaurv007 commited on
Commit
c3afe55
·
verified ·
1 Parent(s): 23cfdf9

fix: upload actual brain_fields.py content (not path string)

Browse files
Files changed (1) hide show
  1. alpha_factory/data/brain_fields.py +140 -1
alpha_factory/data/brain_fields.py CHANGED
@@ -1 +1,140 @@
1
- /app/brain_fields.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BRAIN Field Registry — Canonical reference for the Alpha Factory pipeline.
3
+ Contains the highest-EV fields organized by tier and domain.
4
+
5
+ Source: fields_USA_TOP3000_D1.csv (5,905 total, 3,447 MATRIX candidates)
6
+ Generated: 2026-05-07
7
+
8
+ Usage:
9
+ from alpha_factory.data.brain_fields import GOLDMINE_FIELDS, TIER1_FIELDS, pick_field
10
+ """
11
+ from dataclasses import dataclass
12
+ from enum import Enum
13
+ from typing import Optional
14
+ import random
15
+
16
+
17
+ class SignConvention(str, Enum):
18
+ LONG_HIGH = "long_high"
19
+ LONG_LOW = "long_low"
20
+ CONTRARIAN = "contrarian"
21
+ AMBIGUOUS = "ambiguous"
22
+
23
+
24
+ class DatasetTier(str, Enum):
25
+ TIER1 = "tier1"
26
+ TIER2 = "tier2"
27
+ TIER3 = "tier3"
28
+ TIER4 = "tier4"
29
+
30
+
31
+ @dataclass
32
+ class BrainField:
33
+ id: str
34
+ dataset: str
35
+ coverage: float
36
+ alpha_count: int
37
+ description: str
38
+ category: str
39
+ sign: SignConvention
40
+ tier: DatasetTier
41
+ backfill_days: int = 10
42
+
43
+
44
+ GOLDMINE_FIELDS = [
45
+ BrainField("time_weighted_cash_flow_to_price", "model77", 1.00, 0, "Time-weighted avg cash flows/share for next 2 years divided by price", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
46
+ BrainField("north_america_sales_exposure", "model77", 0.94, 0, "Proportion of company sales from North America", "Model", SignConvention.AMBIGUOUS, DatasetTier.TIER1),
47
+ BrainField("standardized_unexpected_earnings_2", "model77", 0.92, 0, "Standardized Unexpected Earnings", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
48
+ BrainField("mdl177_2_globaldevnorthamerica_v502_liqcoeff", "model77", 0.59, 0, "Slope of regression between monthly turnover and price return", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1, 30),
49
+ BrainField("mdl177_2_globaldevnorthamerica_v502_chgalpha12m", "model77", 0.58, 0, "Six-month nominal change in 12-month alpha", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1, 30),
50
+ ]
51
+
52
+ TIER1_MODEL77_FIELDS = [
53
+ BrainField("fundamental_growth_module_score", "model77", 1.00, 1, "Fundamental Growth submodule of Momentum Analyst II", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
54
+ BrainField("mdl77_2valuemomemtummodel_earningsqualitymodule", "model77", 1.00, 1, "Earnings Quality Module", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
55
+ BrainField("quarterly_earnings_surprise_stddev", "model77", 1.00, 1, "Most recent quarterly earnings surprise in standardized units", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
56
+ BrainField("quarterly_eps_surprise_change", "model77", 1.00, 1, "Change in EPS surprise between recent periods", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
57
+ BrainField("six_month_eps_revision_fy2", "model77", 1.00, 1, "Avg of prior six-month changes in consensus FY2 earnings forecasts", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
58
+ BrainField("mdl77_ohistoricalgrowthfactor_pctchgqtrast", "model77", 1.00, 1, "1-Yr Change in Total Assets (asset growth anomaly)", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1),
59
+ BrainField("mdl77_valueanalystmodelqva_chginv", "model77", 1.00, 1, "1-year change in trailing 4Q inventory scaled by total assets", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1),
60
+ BrainField("mdl77_valueanalystmodelqva_yoychgdebt", "model77", 1.00, 1, "Change in Debt Issuance Rank", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1),
61
+ BrainField("three_year_change_gross_profit_margin_2", "model77", 1.00, 1, "Three-year change in gross profit margin", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
62
+ BrainField("time_weighted_ebitda_to_enterprise_value_2", "model77", 1.00, 1, "Time-weighted EBITDA/EV for next two years", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
63
+ BrainField("ttm_sales_to_enterprise_value", "model77", 1.00, 1, "TTM sales divided by enterprise value", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
64
+ BrainField("yearly_percentage_change_roe", "model77", 1.00, 1, "Year-over-year percentage change in return on equity", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
65
+ BrainField("mdl77_2liquidityriskfactor_milliq", "model77", 1.00, 1, "Stock Illiquidity (Amihud)", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1),
66
+ ]
67
+
68
+ TIER3_ANALYST_FIELDS = [
69
+ BrainField("dividend_estimate_average", "analyst4", 0.62, 5, "Dividend per share - average of estimations", "Analyst", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
70
+ BrainField("max_ebitda_guidance", "analyst4", 1.00, 16, "Maximum guidance value for EBITDA (annual)", "Analyst", SignConvention.LONG_HIGH, DatasetTier.TIER3),
71
+ BrainField("cash_flow_operations_min_guidance", "analyst4", 1.00, 17, "Minimum guidance for Cash Flow from Operations", "Analyst", SignConvention.LONG_HIGH, DatasetTier.TIER3),
72
+ BrainField("pretax_income_reported", "analyst4", 0.56, 15, "Reported Pretax income for annual period", "Analyst", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
73
+ ]
74
+
75
+ TIER2_NEWS_FIELDS = [
76
+ BrainField("news_short_interest", "news12", 0.87, 535, "Ratio of shares sold short to shares outstanding", "News", SignConvention.LONG_LOW, DatasetTier.TIER2),
77
+ BrainField("news_pct_5_min", "news12", 0.77, 353, "Price change in first 5 min after news", "News", SignConvention.LONG_HIGH, DatasetTier.TIER2, 30),
78
+ BrainField("news_vol_stddev", "news12", 0.97, 902, "Z-score of current volume vs 30-day average", "News", SignConvention.CONTRARIAN, DatasetTier.TIER2),
79
+ ]
80
+
81
+ TIER3_OPTION_FIELDS = [
82
+ BrainField("pcr_vol_90", "option9", 0.70, 184, "Put/call volume ratio for 90-day options", "Option", SignConvention.CONTRARIAN, DatasetTier.TIER3, 30),
83
+ BrainField("pcr_vol_20", "option9", 0.70, 233, "Put/call volume ratio for 20-day options", "Option", SignConvention.CONTRARIAN, DatasetTier.TIER3, 30),
84
+ BrainField("forward_price_120", "option9", 0.70, 359, "Synthetic forward price at 120 days from ATM options", "Option", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
85
+ ]
86
+
87
+ TIER3_SUPPLY_CHAIN_FIELDS = [
88
+ BrainField("pv13_ustomergraphrank_auth_rank", "pv13", 0.79, 595, "HITS authority score of customers", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
89
+ BrainField("pv13_ustomergraphrank_page_rank", "pv13", 0.79, 921, "PageRank of customers", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
90
+ BrainField("rel_ret_all", "pv13", 0.96, 2280, "Averaged 1-day return of product-overlapping companies", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3),
91
+ BrainField("rel_ret_comp", "pv13", 0.82, 3078, "Averaged 1-day return of competing companies", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
92
+ BrainField("pv13_custretsig_retsig", "pv13", 0.93, 2718, "Sign of customer return", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3),
93
+ ]
94
+
95
+ TIER3_SOCIAL_FIELDS = [
96
+ BrainField("snt_buzz_ret_fast_d1", "socialmedia12", 0.98, 56, "Negative return of relative sentiment volume", "Social Media", SignConvention.CONTRARIAN, DatasetTier.TIER3),
97
+ BrainField("scl12_sentiment_fast_d1", "socialmedia12", 0.98, 134, "Daily sentiment score", "Social Media", SignConvention.CONTRARIAN, DatasetTier.TIER3),
98
+ ]
99
+
100
+ TIER2_MODEL16_FIELDS = [
101
+ BrainField("multi_factor_static_score_derivative", "model16", 1.00, 98, "Change in static multi-factor score", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER2),
102
+ BrainField("relative_valuation_rank_derivative", "model16", 1.00, 119, "Under/overpriced based on valuation multiples (change)", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER2),
103
+ BrainField("growth_potential_rank_derivative", "model16", 1.00, 152, "Composite growth score change", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER2),
104
+ BrainField("earnings_certainty_rank_derivative", "model16", 1.00, 175, "Earnings quality certainty (change)", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER2),
105
+ ]
106
+
107
+ ALL_FIELDS: list[BrainField] = (
108
+ GOLDMINE_FIELDS + TIER1_MODEL77_FIELDS + TIER3_ANALYST_FIELDS
109
+ + TIER2_NEWS_FIELDS + TIER3_OPTION_FIELDS + TIER3_SUPPLY_CHAIN_FIELDS
110
+ + TIER3_SOCIAL_FIELDS + TIER2_MODEL16_FIELDS
111
+ )
112
+
113
+ FIELD_INDEX: dict[str, BrainField] = {f.id: f for f in ALL_FIELDS}
114
+
115
+
116
+ def pick_field(tier=None, category=None, max_ac=50, min_coverage=0.55, exclude_ids=None):
117
+ exclude = exclude_ids or set()
118
+ candidates = [f for f in ALL_FIELDS if f.alpha_count <= max_ac and f.coverage >= min_coverage and f.id not in exclude and (tier is None or f.tier == tier) and (category is None or f.category == category)]
119
+ if not candidates:
120
+ return None
121
+ weights = [1.0 / (f.alpha_count + 1) for f in candidates]
122
+ total = sum(weights)
123
+ weights = [w / total for w in weights]
124
+ return random.choices(candidates, weights=weights, k=1)[0]
125
+
126
+
127
+ def get_backfill_days(field: BrainField) -> int:
128
+ if field.coverage < 0.70:
129
+ return 30
130
+ elif field.coverage < 0.85:
131
+ return 20
132
+ return field.backfill_days
133
+
134
+
135
+ def get_sign_multiplier(field: BrainField) -> int:
136
+ if field.sign == SignConvention.LONG_HIGH:
137
+ return 1
138
+ elif field.sign == SignConvention.LONG_LOW:
139
+ return -1
140
+ return 1