alpha-factory / alpha_factory /deterministic /proven_templates.py
gaurv007's picture
Fix alpha generation pipeline runnable-output blockers
1f2f2b2 verified
"""
Proven Templates v2 — Deterministic alpha generation without LLMs.
These templates use known-valid BRAIN expression structures with
novel fields swapped into the "value leg" or primary signal slot.
NOTE: Weights inside rank() are monotonic — the coefficient values
(e.g., 0.60, 0.40) are mostly decorative. The real signal comes from
which fields are combined, not how they're weighted.
"""
from ..data.brain_fields import (
FIELD_INDEX, ALL_FIELDS, GOLDMINE_FIELDS, TIER1_MODEL77_FIELDS,
TIER3_ANALYST_FIELDS, TIER2_NEWS_FIELDS, TIER3_OPTION_FIELDS,
TIER3_SUPPLY_CHAIN_FIELDS, TIER3_SOCIAL_FIELDS, TIER2_MODEL16_FIELDS,
BrainField, SignConvention,
get_backfill_days, get_sign_multiplier,
)
from ..data.brain_groups import get_group_for_expression, PRODUCTION_GROUPS
import random
def _get_sign(field: BrainField) -> str:
"""Get sign prefix for a field based on convention."""
if field.sign == SignConvention.LONG_LOW:
return "-"
return ""
def _get_bf(field: BrainField) -> str:
"""Get ts_backfill wrapper if needed."""
bf_days = get_backfill_days(field)
if field.coverage < 0.85:
return f"ts_backfill({field.id}, {bf_days})"
return field.id
def generate_alpha15_variant(field: BrainField, group_key: str = None, decay: int = 5) -> str:
"""
Alpha 15 archetype: intraday position proxy + value/momentum leg.
Structure: rank( zscore(ts_rank((high+low)/2 - close, 252)) + zscore(ts_rank(field, 252)) )
NOTE: The 0.60/0.40 coefficients are inside rank() and are effectively
decorative — rank() is a monotonic transform. The signal comes from combining
two orthogonal sources: (1) intraday position (midpoint vs close)
and (2) a fundamental/momentum field ranked over 252 days.
"""
if group_key is None:
group_key = get_group_for_expression(prefer_novel=True)
sign = _get_sign(field)
field_ref = _get_bf(field)
expr = (
f"ts_decay_linear("
f"group_neutralize("
f"rank("
f"zscore(ts_rank((high + low) / 2 - close, 252)) "
f"+ {sign}zscore(ts_rank({field_ref}, 252))"
f"), {group_key}), {decay})"
)
return expr
def generate_alpha6_variant(field: BrainField, group_key: str = None, decay: int = 5) -> str:
"""
Alpha 6 archetype: microstructure score + value leg.
Microstructure: rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20))
Value leg: zscore(ts_rank(field, 252))
NOTE: The structure is multiplicative in the micro part and additive
with the value leg. The coefficients are decorative inside rank().
"""
if group_key is None:
group_key = get_group_for_expression(prefer_novel=True)
sign = _get_sign(field)
field_ref = _get_bf(field)
expr = (
f"ts_decay_linear("
f"group_neutralize("
f"rank("
f"rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20)) "
f"+ {sign}zscore(ts_rank({field_ref}, 252))"
f"), {group_key}), {decay})"
)
return expr
def generate_pure_field_rank(field: BrainField, horizon: int = 252, group_key: str = None, decay: int = 5) -> str:
"""
Simplest possible expression — just rank the field cross-sectionally.
Often works for high-quality pre-computed model fields (model77).
"""
if group_key is None:
group_key = get_group_for_expression(prefer_novel=True)
sign = _get_sign(field)
field_ref = _get_bf(field)
expr = (
f"ts_decay_linear("
f"group_neutralize("
f"{sign}rank(ts_rank({field_ref}, {horizon}))"
f", {group_key}), {decay})"
)
return expr
def generate_delta_momentum(field: BrainField, horizon: int = 21, group_key: str = None, decay: int = 5) -> str:
"""
Delta/change momentum — buy stocks where the field is improving.
Works well for score-type fields (model16 derivatives, earnings revisions).
"""
if group_key is None:
group_key = get_group_for_expression(prefer_novel=True)
sign = _get_sign(field)
field_ref = _get_bf(field)
expr = (
f"ts_decay_linear("
f"group_neutralize("
f"{sign}zscore(ts_delta({field_ref}, {horizon}))"
f", {group_key}), {decay})"
)
return expr
def generate_mean_reversion(field: BrainField, horizon: int = 20, group_key: str = None, decay: int = 5) -> str:
"""
Mean reversion on a field — short stocks where field is high, long where low.
Effective for contrarian indicators (option PCR, social buzz).
"""
if group_key is None:
group_key = get_group_for_expression(prefer_novel=True)
# Mean reversion: flip the natural sign
if field.sign == SignConvention.LONG_HIGH:
sign = "-"
elif field.sign == SignConvention.LONG_LOW:
sign = ""
else:
sign = "-"
field_ref = _get_bf(field)
expr = (
f"ts_decay_linear("
f"group_neutralize("
f"{sign}zscore(ts_rank({field_ref}, {horizon}))"
f", {group_key}), {decay})"
)
return expr
def generate_batch_from_proven_templates(count: int = 5, decay: int = 5) -> list[dict]:
"""
Generate a batch of alphas using PROVEN templates with novel fields.
This is the PRIMARY generation method — guaranteed valid structure.
Args:
count: Number of alphas to generate.
decay: Decay parameter for ts_decay_linear (default 5).
Returns list of dicts with expression, field, template, group_key.
"""
# Priority: goldmine (AC=0) first, then AC=1, then AC≤5
priority_fields = GOLDMINE_FIELDS + TIER1_MODEL77_FIELDS + TIER2_MODEL16_FIELDS + TIER3_ANALYST_FIELDS
# Shuffle for variety but keep priority ordering
available = list(priority_fields)
random.shuffle(available)
templates = [
("alpha15", generate_alpha15_variant),
("alpha6", generate_alpha6_variant),
("pure_rank", generate_pure_field_rank),
("delta_momentum", generate_delta_momentum),
("mean_reversion", generate_mean_reversion),
]
results = []
used_fields = set()
for i in range(min(count, len(available))):
field = available[i]
if field.id in used_fields:
continue
used_fields.add(field.id)
# Cycle through templates
template_name, template_fn = templates[i % len(templates)]
group_key = get_group_for_expression(prefer_novel=True)
expr = template_fn(field, group_key=group_key, decay=decay)
results.append({
"expression": expr,
"field_id": field.id,
"field_ac": field.alpha_count,
"template": template_name,
"group_key": group_key,
"theme": "proven_template",
"archetype": template_name,
"sign": field.sign.value,
"decay": decay,
"neutralization": group_key,
})
return results