fix: upload actual brain_fields.py content (not path string)
Browse files
alpha_factory/data/brain_fields.py
CHANGED
|
@@ -1 +1,140 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
BRAIN Field Registry — Canonical reference for the Alpha Factory pipeline.
|
| 3 |
+
Contains the highest-EV fields organized by tier and domain.
|
| 4 |
+
|
| 5 |
+
Source: fields_USA_TOP3000_D1.csv (5,905 total, 3,447 MATRIX candidates)
|
| 6 |
+
Generated: 2026-05-07
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
from alpha_factory.data.brain_fields import GOLDMINE_FIELDS, TIER1_FIELDS, pick_field
|
| 10 |
+
"""
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from enum import Enum
|
| 13 |
+
from typing import Optional
|
| 14 |
+
import random
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class SignConvention(str, Enum):
|
| 18 |
+
LONG_HIGH = "long_high"
|
| 19 |
+
LONG_LOW = "long_low"
|
| 20 |
+
CONTRARIAN = "contrarian"
|
| 21 |
+
AMBIGUOUS = "ambiguous"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class DatasetTier(str, Enum):
|
| 25 |
+
TIER1 = "tier1"
|
| 26 |
+
TIER2 = "tier2"
|
| 27 |
+
TIER3 = "tier3"
|
| 28 |
+
TIER4 = "tier4"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@dataclass
|
| 32 |
+
class BrainField:
|
| 33 |
+
id: str
|
| 34 |
+
dataset: str
|
| 35 |
+
coverage: float
|
| 36 |
+
alpha_count: int
|
| 37 |
+
description: str
|
| 38 |
+
category: str
|
| 39 |
+
sign: SignConvention
|
| 40 |
+
tier: DatasetTier
|
| 41 |
+
backfill_days: int = 10
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
GOLDMINE_FIELDS = [
|
| 45 |
+
BrainField("time_weighted_cash_flow_to_price", "model77", 1.00, 0, "Time-weighted avg cash flows/share for next 2 years divided by price", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 46 |
+
BrainField("north_america_sales_exposure", "model77", 0.94, 0, "Proportion of company sales from North America", "Model", SignConvention.AMBIGUOUS, DatasetTier.TIER1),
|
| 47 |
+
BrainField("standardized_unexpected_earnings_2", "model77", 0.92, 0, "Standardized Unexpected Earnings", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 48 |
+
BrainField("mdl177_2_globaldevnorthamerica_v502_liqcoeff", "model77", 0.59, 0, "Slope of regression between monthly turnover and price return", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1, 30),
|
| 49 |
+
BrainField("mdl177_2_globaldevnorthamerica_v502_chgalpha12m", "model77", 0.58, 0, "Six-month nominal change in 12-month alpha", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1, 30),
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
TIER1_MODEL77_FIELDS = [
|
| 53 |
+
BrainField("fundamental_growth_module_score", "model77", 1.00, 1, "Fundamental Growth submodule of Momentum Analyst II", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 54 |
+
BrainField("mdl77_2valuemomemtummodel_earningsqualitymodule", "model77", 1.00, 1, "Earnings Quality Module", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 55 |
+
BrainField("quarterly_earnings_surprise_stddev", "model77", 1.00, 1, "Most recent quarterly earnings surprise in standardized units", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 56 |
+
BrainField("quarterly_eps_surprise_change", "model77", 1.00, 1, "Change in EPS surprise between recent periods", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 57 |
+
BrainField("six_month_eps_revision_fy2", "model77", 1.00, 1, "Avg of prior six-month changes in consensus FY2 earnings forecasts", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 58 |
+
BrainField("mdl77_ohistoricalgrowthfactor_pctchgqtrast", "model77", 1.00, 1, "1-Yr Change in Total Assets (asset growth anomaly)", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1),
|
| 59 |
+
BrainField("mdl77_valueanalystmodelqva_chginv", "model77", 1.00, 1, "1-year change in trailing 4Q inventory scaled by total assets", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1),
|
| 60 |
+
BrainField("mdl77_valueanalystmodelqva_yoychgdebt", "model77", 1.00, 1, "Change in Debt Issuance Rank", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1),
|
| 61 |
+
BrainField("three_year_change_gross_profit_margin_2", "model77", 1.00, 1, "Three-year change in gross profit margin", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 62 |
+
BrainField("time_weighted_ebitda_to_enterprise_value_2", "model77", 1.00, 1, "Time-weighted EBITDA/EV for next two years", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 63 |
+
BrainField("ttm_sales_to_enterprise_value", "model77", 1.00, 1, "TTM sales divided by enterprise value", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 64 |
+
BrainField("yearly_percentage_change_roe", "model77", 1.00, 1, "Year-over-year percentage change in return on equity", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER1),
|
| 65 |
+
BrainField("mdl77_2liquidityriskfactor_milliq", "model77", 1.00, 1, "Stock Illiquidity (Amihud)", "Model", SignConvention.LONG_LOW, DatasetTier.TIER1),
|
| 66 |
+
]
|
| 67 |
+
|
| 68 |
+
TIER3_ANALYST_FIELDS = [
|
| 69 |
+
BrainField("dividend_estimate_average", "analyst4", 0.62, 5, "Dividend per share - average of estimations", "Analyst", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
|
| 70 |
+
BrainField("max_ebitda_guidance", "analyst4", 1.00, 16, "Maximum guidance value for EBITDA (annual)", "Analyst", SignConvention.LONG_HIGH, DatasetTier.TIER3),
|
| 71 |
+
BrainField("cash_flow_operations_min_guidance", "analyst4", 1.00, 17, "Minimum guidance for Cash Flow from Operations", "Analyst", SignConvention.LONG_HIGH, DatasetTier.TIER3),
|
| 72 |
+
BrainField("pretax_income_reported", "analyst4", 0.56, 15, "Reported Pretax income for annual period", "Analyst", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
|
| 73 |
+
]
|
| 74 |
+
|
| 75 |
+
TIER2_NEWS_FIELDS = [
|
| 76 |
+
BrainField("news_short_interest", "news12", 0.87, 535, "Ratio of shares sold short to shares outstanding", "News", SignConvention.LONG_LOW, DatasetTier.TIER2),
|
| 77 |
+
BrainField("news_pct_5_min", "news12", 0.77, 353, "Price change in first 5 min after news", "News", SignConvention.LONG_HIGH, DatasetTier.TIER2, 30),
|
| 78 |
+
BrainField("news_vol_stddev", "news12", 0.97, 902, "Z-score of current volume vs 30-day average", "News", SignConvention.CONTRARIAN, DatasetTier.TIER2),
|
| 79 |
+
]
|
| 80 |
+
|
| 81 |
+
TIER3_OPTION_FIELDS = [
|
| 82 |
+
BrainField("pcr_vol_90", "option9", 0.70, 184, "Put/call volume ratio for 90-day options", "Option", SignConvention.CONTRARIAN, DatasetTier.TIER3, 30),
|
| 83 |
+
BrainField("pcr_vol_20", "option9", 0.70, 233, "Put/call volume ratio for 20-day options", "Option", SignConvention.CONTRARIAN, DatasetTier.TIER3, 30),
|
| 84 |
+
BrainField("forward_price_120", "option9", 0.70, 359, "Synthetic forward price at 120 days from ATM options", "Option", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
TIER3_SUPPLY_CHAIN_FIELDS = [
|
| 88 |
+
BrainField("pv13_ustomergraphrank_auth_rank", "pv13", 0.79, 595, "HITS authority score of customers", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
|
| 89 |
+
BrainField("pv13_ustomergraphrank_page_rank", "pv13", 0.79, 921, "PageRank of customers", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
|
| 90 |
+
BrainField("rel_ret_all", "pv13", 0.96, 2280, "Averaged 1-day return of product-overlapping companies", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3),
|
| 91 |
+
BrainField("rel_ret_comp", "pv13", 0.82, 3078, "Averaged 1-day return of competing companies", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3, 30),
|
| 92 |
+
BrainField("pv13_custretsig_retsig", "pv13", 0.93, 2718, "Sign of customer return", "Price Volume", SignConvention.LONG_HIGH, DatasetTier.TIER3),
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
TIER3_SOCIAL_FIELDS = [
|
| 96 |
+
BrainField("snt_buzz_ret_fast_d1", "socialmedia12", 0.98, 56, "Negative return of relative sentiment volume", "Social Media", SignConvention.CONTRARIAN, DatasetTier.TIER3),
|
| 97 |
+
BrainField("scl12_sentiment_fast_d1", "socialmedia12", 0.98, 134, "Daily sentiment score", "Social Media", SignConvention.CONTRARIAN, DatasetTier.TIER3),
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
TIER2_MODEL16_FIELDS = [
|
| 101 |
+
BrainField("multi_factor_static_score_derivative", "model16", 1.00, 98, "Change in static multi-factor score", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER2),
|
| 102 |
+
BrainField("relative_valuation_rank_derivative", "model16", 1.00, 119, "Under/overpriced based on valuation multiples (change)", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER2),
|
| 103 |
+
BrainField("growth_potential_rank_derivative", "model16", 1.00, 152, "Composite growth score change", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER2),
|
| 104 |
+
BrainField("earnings_certainty_rank_derivative", "model16", 1.00, 175, "Earnings quality certainty (change)", "Model", SignConvention.LONG_HIGH, DatasetTier.TIER2),
|
| 105 |
+
]
|
| 106 |
+
|
| 107 |
+
ALL_FIELDS: list[BrainField] = (
|
| 108 |
+
GOLDMINE_FIELDS + TIER1_MODEL77_FIELDS + TIER3_ANALYST_FIELDS
|
| 109 |
+
+ TIER2_NEWS_FIELDS + TIER3_OPTION_FIELDS + TIER3_SUPPLY_CHAIN_FIELDS
|
| 110 |
+
+ TIER3_SOCIAL_FIELDS + TIER2_MODEL16_FIELDS
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
FIELD_INDEX: dict[str, BrainField] = {f.id: f for f in ALL_FIELDS}
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def pick_field(tier=None, category=None, max_ac=50, min_coverage=0.55, exclude_ids=None):
|
| 117 |
+
exclude = exclude_ids or set()
|
| 118 |
+
candidates = [f for f in ALL_FIELDS if f.alpha_count <= max_ac and f.coverage >= min_coverage and f.id not in exclude and (tier is None or f.tier == tier) and (category is None or f.category == category)]
|
| 119 |
+
if not candidates:
|
| 120 |
+
return None
|
| 121 |
+
weights = [1.0 / (f.alpha_count + 1) for f in candidates]
|
| 122 |
+
total = sum(weights)
|
| 123 |
+
weights = [w / total for w in weights]
|
| 124 |
+
return random.choices(candidates, weights=weights, k=1)[0]
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def get_backfill_days(field: BrainField) -> int:
|
| 128 |
+
if field.coverage < 0.70:
|
| 129 |
+
return 30
|
| 130 |
+
elif field.coverage < 0.85:
|
| 131 |
+
return 20
|
| 132 |
+
return field.backfill_days
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def get_sign_multiplier(field: BrainField) -> int:
|
| 136 |
+
if field.sign == SignConvention.LONG_HIGH:
|
| 137 |
+
return 1
|
| 138 |
+
elif field.sign == SignConvention.LONG_LOW:
|
| 139 |
+
return -1
|
| 140 |
+
return 1
|