""" Proven Templates v2 — Deterministic alpha generation without LLMs. These templates use known-valid BRAIN expression structures with novel fields swapped into the "value leg" or primary signal slot. NOTE: Weights inside rank() are monotonic — the coefficient values (e.g., 0.60, 0.40) are mostly decorative. The real signal comes from which fields are combined, not how they're weighted. """ from ..data.brain_fields import ( FIELD_INDEX, ALL_FIELDS, GOLDMINE_FIELDS, TIER1_MODEL77_FIELDS, TIER3_ANALYST_FIELDS, TIER2_NEWS_FIELDS, TIER3_OPTION_FIELDS, TIER3_SUPPLY_CHAIN_FIELDS, TIER3_SOCIAL_FIELDS, TIER2_MODEL16_FIELDS, BrainField, SignConvention, get_backfill_days, get_sign_multiplier, ) from ..data.brain_groups import get_group_for_expression, PRODUCTION_GROUPS import random def _get_sign(field: BrainField) -> str: """Get sign prefix for a field based on convention.""" if field.sign == SignConvention.LONG_LOW: return "-" return "" def _get_bf(field: BrainField) -> str: """Get ts_backfill wrapper if needed.""" bf_days = get_backfill_days(field) if field.coverage < 0.85: return f"ts_backfill({field.id}, {bf_days})" return field.id def generate_alpha15_variant(field: BrainField, group_key: str = None, decay: int = 5) -> str: """ Alpha 15 archetype: intraday position proxy + value/momentum leg. Structure: rank( zscore(ts_rank((high+low)/2 - close, 252)) + zscore(ts_rank(field, 252)) ) NOTE: The 0.60/0.40 coefficients are inside rank() and are effectively decorative — rank() is a monotonic transform. The signal comes from combining two orthogonal sources: (1) intraday position (midpoint vs close) and (2) a fundamental/momentum field ranked over 252 days. """ if group_key is None: group_key = get_group_for_expression(prefer_novel=True) sign = _get_sign(field) field_ref = _get_bf(field) expr = ( f"ts_decay_linear(" f"group_neutralize(" f"rank(" f"zscore(ts_rank((high + low) / 2 - close, 252)) " f"+ {sign}zscore(ts_rank({field_ref}, 252))" f"), {group_key}), {decay})" ) return expr def generate_alpha6_variant(field: BrainField, group_key: str = None, decay: int = 5) -> str: """ Alpha 6 archetype: microstructure score + value leg. Microstructure: rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20)) Value leg: zscore(ts_rank(field, 252)) NOTE: The structure is multiplicative in the micro part and additive with the value leg. The coefficients are decorative inside rank(). """ if group_key is None: group_key = get_group_for_expression(prefer_novel=True) sign = _get_sign(field) field_ref = _get_bf(field) expr = ( f"ts_decay_linear(" f"group_neutralize(" f"rank(" f"rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20)) " f"+ {sign}zscore(ts_rank({field_ref}, 252))" f"), {group_key}), {decay})" ) return expr def generate_pure_field_rank(field: BrainField, horizon: int = 252, group_key: str = None, decay: int = 5) -> str: """ Simplest possible expression — just rank the field cross-sectionally. Often works for high-quality pre-computed model fields (model77). """ if group_key is None: group_key = get_group_for_expression(prefer_novel=True) sign = _get_sign(field) field_ref = _get_bf(field) expr = ( f"ts_decay_linear(" f"group_neutralize(" f"{sign}rank(ts_rank({field_ref}, {horizon}))" f", {group_key}), {decay})" ) return expr def generate_delta_momentum(field: BrainField, horizon: int = 21, group_key: str = None, decay: int = 5) -> str: """ Delta/change momentum — buy stocks where the field is improving. Works well for score-type fields (model16 derivatives, earnings revisions). """ if group_key is None: group_key = get_group_for_expression(prefer_novel=True) sign = _get_sign(field) field_ref = _get_bf(field) expr = ( f"ts_decay_linear(" f"group_neutralize(" f"{sign}zscore(ts_delta({field_ref}, {horizon}))" f", {group_key}), {decay})" ) return expr def generate_mean_reversion(field: BrainField, horizon: int = 20, group_key: str = None, decay: int = 5) -> str: """ Mean reversion on a field — short stocks where field is high, long where low. Effective for contrarian indicators (option PCR, social buzz). """ if group_key is None: group_key = get_group_for_expression(prefer_novel=True) # Mean reversion: flip the natural sign if field.sign == SignConvention.LONG_HIGH: sign = "-" elif field.sign == SignConvention.LONG_LOW: sign = "" else: sign = "-" field_ref = _get_bf(field) expr = ( f"ts_decay_linear(" f"group_neutralize(" f"{sign}zscore(ts_rank({field_ref}, {horizon}))" f", {group_key}), {decay})" ) return expr def generate_batch_from_proven_templates(count: int = 5, decay: int = 5) -> list[dict]: """ Generate a batch of alphas using PROVEN templates with novel fields. This is the PRIMARY generation method — guaranteed valid structure. Args: count: Number of alphas to generate. decay: Decay parameter for ts_decay_linear (default 5). Returns list of dicts with expression, field, template, group_key. """ # Priority: goldmine (AC=0) first, then AC=1, then AC≤5 priority_fields = GOLDMINE_FIELDS + TIER1_MODEL77_FIELDS + TIER2_MODEL16_FIELDS + TIER3_ANALYST_FIELDS # Shuffle for variety but keep priority ordering available = list(priority_fields) random.shuffle(available) templates = [ ("alpha15", generate_alpha15_variant), ("alpha6", generate_alpha6_variant), ("pure_rank", generate_pure_field_rank), ("delta_momentum", generate_delta_momentum), ("mean_reversion", generate_mean_reversion), ] results = [] used_fields = set() for i in range(min(count, len(available))): field = available[i] if field.id in used_fields: continue used_fields.add(field.id) # Cycle through templates template_name, template_fn = templates[i % len(templates)] group_key = get_group_for_expression(prefer_novel=True) expr = template_fn(field, group_key=group_key, decay=decay) results.append({ "expression": expr, "field_id": field.id, "field_ac": field.alpha_count, "template": template_name, "group_key": group_key, "theme": "proven_template", "archetype": template_name, "sign": field.sign.value, "decay": decay, "neutralization": group_key, }) return results