Upload alpha_factory/deterministic/proven_templates.py

Browse files

Files changed (1) hide show

alpha_factory/deterministic/proven_templates.py +58 -23

alpha_factory/deterministic/proven_templates.py CHANGED Viewed

@@ -1,24 +1,26 @@
 """
-Proven Templates — hardcoded expressions based on your 18-alpha library.
-These are GUARANTEED to produce upward equity curves because they are
-direct implementations of your Sharpe 2.76 (Alpha 15) and 2.78 (Alpha 6) formulas.
-The only thing that changes is the VALUE LEG field — we swap in novel fields
-from model77 (AC=0-1) to create new uncorrelated alphas with proven structure.
-THIS IS THE PRIMARY GENERATION METHOD. LLM-novel should be secondary.
 """
 from ..data.brain_fields import (
     FIELD_INDEX, ALL_FIELDS, GOLDMINE_FIELDS, TIER1_MODEL77_FIELDS,
-    TIER3_ANALYST_FIELDS, TIER2_MODEL16_FIELDS,
-    get_backfill_days, get_sign_multiplier, BrainField, SignConvention,
 )
-from ..data.brain_groups import get_group_for_expression
 import random
 def _get_sign(field: BrainField) -> str:
-    """Get sign prefix for a field."""
     if field.sign == SignConvention.LONG_LOW:
         return "-"
     return ""
@@ -34,11 +36,14 @@ def _get_bf(field: BrainField) -> str:
 def generate_alpha15_variant(field: BrainField, group_key: str = None) -> str:
     """
-    Alpha 15 archetype (Sharpe ~2.76, Turnover ~50%, DD 5.34%)
-    Structure: 0.60 * intraday_MR + 0.40 * value_leg
-    The intraday MR component is FIXED (proven).
-    Only the value_leg changes — this is where we plug novel fields.
     """
     if group_key is None:
         group_key = get_group_for_expression(prefer_novel=True)
@@ -50,8 +55,8 @@ def generate_alpha15_variant(field: BrainField, group_key: str = None) -> str:
         f"ts_decay_linear("
         f"group_neutralize("
         f"rank("
-        f"0.60 * zscore(ts_rank((high + low) / 2 - close, 252)) "
-        f"+ 0.40 * {sign}zscore(ts_rank({field_ref}, 252))"
         f"), {group_key}), 5)"
     )
     return expr
@@ -59,11 +64,13 @@ def generate_alpha15_variant(field: BrainField, group_key: str = None) -> str:
 def generate_alpha6_variant(field: BrainField, group_key: str = None) -> str:
     """
-    Alpha 6 archetype (Sharpe 2.78, Turnover 64.97%, DD 6.74%)
-    Structure: 0.50*micro + 0.20*MR + 0.15*op_yield + 0.10*value_leg + 0.05*leverage
-    Simplified to: 0.60*micro_score + 0.40*novel_field_score
-    Micro = vwap_gap * range_pos * rel_vol (proven, hardcoded)
     """
     if group_key is None:
         group_key = get_group_for_expression(prefer_novel=True)
@@ -75,8 +82,8 @@ def generate_alpha6_variant(field: BrainField, group_key: str = None) -> str:
         f"ts_decay_linear("
         f"group_neutralize("
         f"rank("
-        f"0.60 * rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20)) "
-        f"+ 0.40 * {sign}zscore(ts_rank({field_ref}, 252))"
         f"), {group_key}), 5)"
     )
     return expr
@@ -122,6 +129,33 @@ def generate_delta_momentum(field: BrainField, horizon: int = 21, group_key: str
     return expr
 def generate_batch_from_proven_templates(count: int = 5) -> list[dict]:
     """
     Generate a batch of alphas using PROVEN templates with novel fields.
@@ -129,7 +163,7 @@ def generate_batch_from_proven_templates(count: int = 5) -> list[dict]:
     Returns list of dicts with expression, field, template, group_key.
     """
-    # Priority: goldmine (AC=0) first, then AC=1
     priority_fields = GOLDMINE_FIELDS + TIER1_MODEL77_FIELDS + TIER2_MODEL16_FIELDS + TIER3_ANALYST_FIELDS
     # Shuffle for variety but keep priority ordering
@@ -141,6 +175,7 @@ def generate_batch_from_proven_templates(count: int = 5) -> list[dict]:
         ("alpha6", generate_alpha6_variant),
         ("pure_rank", generate_pure_field_rank),
         ("delta_momentum", generate_delta_momentum),
     ]
     results = []

 """
+Proven Templates v2 — Deterministic alpha generation without LLMs.
+These templates use known-valid BRAIN expression structures with
+novel fields swapped into the "value leg" or primary signal slot.
+NOTE: Weights inside rank() are monotonic — the coefficient values
+(e.g., 0.60, 0.40) are mostly decorative. The real signal comes from
+which fields are combined, not how they're weighted.
 """
 from ..data.brain_fields import (
     FIELD_INDEX, ALL_FIELDS, GOLDMINE_FIELDS, TIER1_MODEL77_FIELDS,
+    TIER3_ANALYST_FIELDS, TIER2_NEWS_FIELDS, TIER3_OPTION_FIELDS,
+    TIER3_SUPPLY_CHAIN_FIELDS, TIER3_SOCIAL_FIELDS, TIER2_MODEL16_FIELDS,
+    BrainField, SignConvention,
+    get_backfill_days, get_sign_multiplier,
 )
+from ..data.brain_groups import get_group_for_expression, PRODUCTION_GROUPS
 import random
 def _get_sign(field: BrainField) -> str:
+    """Get sign prefix for a field based on convention."""
     if field.sign == SignConvention.LONG_LOW:
         return "-"
     return ""
 def generate_alpha15_variant(field: BrainField, group_key: str = None) -> str:
     """
+    Alpha 15 archetype: intraday position proxy + value/momentum leg.
+    Structure: rank( zscore(ts_rank((high+low)/2 - close, 252)) + zscore(ts_rank(field, 252)) )
+    NOTE: The 0.60/0.40 coefficients are inside rank() and are effectively
+    decorative — rank() is a monotonic transform. The signal comes from combining
+    two orthogonal sources: (1) intraday position (midpoint vs close)
+    and (2) a fundamental/momentum field ranked over 252 days.
     """
     if group_key is None:
         group_key = get_group_for_expression(prefer_novel=True)
         f"ts_decay_linear("
         f"group_neutralize("
         f"rank("
+        f"zscore(ts_rank((high + low) / 2 - close, 252)) "
+        f"+ {sign}zscore(ts_rank({field_ref}, 252))"
         f"), {group_key}), 5)"
     )
     return expr
 def generate_alpha6_variant(field: BrainField, group_key: str = None) -> str:
     """
+    Alpha 6 archetype: microstructure score + value leg.
+    Microstructure: rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20))
+    Value leg: zscore(ts_rank(field, 252))
+    NOTE: The structure is multiplicative in the micro part and additive
+    with the value leg. The coefficients are decorative inside rank().
     """
     if group_key is None:
         group_key = get_group_for_expression(prefer_novel=True)
         f"ts_decay_linear("
         f"group_neutralize("
         f"rank("
+        f"rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20)) "
+        f"+ {sign}zscore(ts_rank({field_ref}, 252))"
         f"), {group_key}), 5)"
     )
     return expr
     return expr
+def generate_mean_reversion(field: BrainField, horizon: int = 20, group_key: str = None) -> str:
+    """
+    Mean reversion on a field — short stocks where field is high, long where low.
+    Effective for contrarian indicators (option PCR, social buzz).
+    """
+    if group_key is None:
+        group_key = get_group_for_expression(prefer_novel=True)
+    # Mean reversion: flip the natural sign
+    if field.sign == SignConvention.LONG_HIGH:
+        sign = "-"
+    elif field.sign == SignConvention.LONG_LOW:
+        sign = ""
+    else:
+        sign = "-"
+    field_ref = _get_bf(field)
+    expr = (
+        f"ts_decay_linear("
+        f"group_neutralize("
+        f"{sign}zscore(ts_rank({field_ref}, {horizon}))"
+        f", {group_key}), 5)"
+    )
+    return expr
 def generate_batch_from_proven_templates(count: int = 5) -> list[dict]:
     """
     Generate a batch of alphas using PROVEN templates with novel fields.
     Returns list of dicts with expression, field, template, group_key.
     """
+    # Priority: goldmine (AC=0) first, then AC=1, then AC≤5
     priority_fields = GOLDMINE_FIELDS + TIER1_MODEL77_FIELDS + TIER2_MODEL16_FIELDS + TIER3_ANALYST_FIELDS
     # Shuffle for variety but keep priority ordering
         ("alpha6", generate_alpha6_variant),
         ("pure_rank", generate_pure_field_rank),
         ("delta_momentum", generate_delta_momentum),
+        ("mean_reversion", generate_mean_reversion),
     ]
     results = []