gaurv007 commited on
Commit
515a75a
·
verified ·
1 Parent(s): 8b456b8

Upload alpha_factory/deterministic/proven_templates.py

Browse files
alpha_factory/deterministic/proven_templates.py CHANGED
@@ -1,24 +1,26 @@
1
  """
2
- Proven Templates — hardcoded expressions based on your 18-alpha library.
3
- These are GUARANTEED to produce upward equity curves because they are
4
- direct implementations of your Sharpe 2.76 (Alpha 15) and 2.78 (Alpha 6) formulas.
5
 
6
- The only thing that changes is the VALUE LEG field — we swap in novel fields
7
- from model77 (AC=0-1) to create new uncorrelated alphas with proven structure.
8
 
9
- THIS IS THE PRIMARY GENERATION METHOD. LLM-novel should be secondary.
 
 
10
  """
11
  from ..data.brain_fields import (
12
  FIELD_INDEX, ALL_FIELDS, GOLDMINE_FIELDS, TIER1_MODEL77_FIELDS,
13
- TIER3_ANALYST_FIELDS, TIER2_MODEL16_FIELDS,
14
- get_backfill_days, get_sign_multiplier, BrainField, SignConvention,
 
 
15
  )
16
- from ..data.brain_groups import get_group_for_expression
17
  import random
18
 
19
 
20
  def _get_sign(field: BrainField) -> str:
21
- """Get sign prefix for a field."""
22
  if field.sign == SignConvention.LONG_LOW:
23
  return "-"
24
  return ""
@@ -34,11 +36,14 @@ def _get_bf(field: BrainField) -> str:
34
 
35
  def generate_alpha15_variant(field: BrainField, group_key: str = None) -> str:
36
  """
37
- Alpha 15 archetype (Sharpe ~2.76, Turnover ~50%, DD 5.34%)
38
- Structure: 0.60 * intraday_MR + 0.40 * value_leg
39
 
40
- The intraday MR component is FIXED (proven).
41
- Only the value_leg changes — this is where we plug novel fields.
 
 
 
 
42
  """
43
  if group_key is None:
44
  group_key = get_group_for_expression(prefer_novel=True)
@@ -50,8 +55,8 @@ def generate_alpha15_variant(field: BrainField, group_key: str = None) -> str:
50
  f"ts_decay_linear("
51
  f"group_neutralize("
52
  f"rank("
53
- f"0.60 * zscore(ts_rank((high + low) / 2 - close, 252)) "
54
- f"+ 0.40 * {sign}zscore(ts_rank({field_ref}, 252))"
55
  f"), {group_key}), 5)"
56
  )
57
  return expr
@@ -59,11 +64,13 @@ def generate_alpha15_variant(field: BrainField, group_key: str = None) -> str:
59
 
60
  def generate_alpha6_variant(field: BrainField, group_key: str = None) -> str:
61
  """
62
- Alpha 6 archetype (Sharpe 2.78, Turnover 64.97%, DD 6.74%)
63
- Structure: 0.50*micro + 0.20*MR + 0.15*op_yield + 0.10*value_leg + 0.05*leverage
 
 
64
 
65
- Simplified to: 0.60*micro_score + 0.40*novel_field_score
66
- Micro = vwap_gap * range_pos * rel_vol (proven, hardcoded)
67
  """
68
  if group_key is None:
69
  group_key = get_group_for_expression(prefer_novel=True)
@@ -75,8 +82,8 @@ def generate_alpha6_variant(field: BrainField, group_key: str = None) -> str:
75
  f"ts_decay_linear("
76
  f"group_neutralize("
77
  f"rank("
78
- f"0.60 * rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20)) "
79
- f"+ 0.40 * {sign}zscore(ts_rank({field_ref}, 252))"
80
  f"), {group_key}), 5)"
81
  )
82
  return expr
@@ -122,6 +129,33 @@ def generate_delta_momentum(field: BrainField, horizon: int = 21, group_key: str
122
  return expr
123
 
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  def generate_batch_from_proven_templates(count: int = 5) -> list[dict]:
126
  """
127
  Generate a batch of alphas using PROVEN templates with novel fields.
@@ -129,7 +163,7 @@ def generate_batch_from_proven_templates(count: int = 5) -> list[dict]:
129
 
130
  Returns list of dicts with expression, field, template, group_key.
131
  """
132
- # Priority: goldmine (AC=0) first, then AC=1
133
  priority_fields = GOLDMINE_FIELDS + TIER1_MODEL77_FIELDS + TIER2_MODEL16_FIELDS + TIER3_ANALYST_FIELDS
134
 
135
  # Shuffle for variety but keep priority ordering
@@ -141,6 +175,7 @@ def generate_batch_from_proven_templates(count: int = 5) -> list[dict]:
141
  ("alpha6", generate_alpha6_variant),
142
  ("pure_rank", generate_pure_field_rank),
143
  ("delta_momentum", generate_delta_momentum),
 
144
  ]
145
 
146
  results = []
 
1
  """
2
+ Proven Templates v2 Deterministic alpha generation without LLMs.
 
 
3
 
4
+ These templates use known-valid BRAIN expression structures with
5
+ novel fields swapped into the "value leg" or primary signal slot.
6
 
7
+ NOTE: Weights inside rank() are monotonic the coefficient values
8
+ (e.g., 0.60, 0.40) are mostly decorative. The real signal comes from
9
+ which fields are combined, not how they're weighted.
10
  """
11
  from ..data.brain_fields import (
12
  FIELD_INDEX, ALL_FIELDS, GOLDMINE_FIELDS, TIER1_MODEL77_FIELDS,
13
+ TIER3_ANALYST_FIELDS, TIER2_NEWS_FIELDS, TIER3_OPTION_FIELDS,
14
+ TIER3_SUPPLY_CHAIN_FIELDS, TIER3_SOCIAL_FIELDS, TIER2_MODEL16_FIELDS,
15
+ BrainField, SignConvention,
16
+ get_backfill_days, get_sign_multiplier,
17
  )
18
+ from ..data.brain_groups import get_group_for_expression, PRODUCTION_GROUPS
19
  import random
20
 
21
 
22
  def _get_sign(field: BrainField) -> str:
23
+ """Get sign prefix for a field based on convention."""
24
  if field.sign == SignConvention.LONG_LOW:
25
  return "-"
26
  return ""
 
36
 
37
  def generate_alpha15_variant(field: BrainField, group_key: str = None) -> str:
38
  """
39
+ Alpha 15 archetype: intraday position proxy + value/momentum leg.
 
40
 
41
+ Structure: rank( zscore(ts_rank((high+low)/2 - close, 252)) + zscore(ts_rank(field, 252)) )
42
+
43
+ NOTE: The 0.60/0.40 coefficients are inside rank() and are effectively
44
+ decorative — rank() is a monotonic transform. The signal comes from combining
45
+ two orthogonal sources: (1) intraday position (midpoint vs close)
46
+ and (2) a fundamental/momentum field ranked over 252 days.
47
  """
48
  if group_key is None:
49
  group_key = get_group_for_expression(prefer_novel=True)
 
55
  f"ts_decay_linear("
56
  f"group_neutralize("
57
  f"rank("
58
+ f"zscore(ts_rank((high + low) / 2 - close, 252)) "
59
+ f"+ {sign}zscore(ts_rank({field_ref}, 252))"
60
  f"), {group_key}), 5)"
61
  )
62
  return expr
 
64
 
65
  def generate_alpha6_variant(field: BrainField, group_key: str = None) -> str:
66
  """
67
+ Alpha 6 archetype: microstructure score + value leg.
68
+
69
+ Microstructure: rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20))
70
+ Value leg: zscore(ts_rank(field, 252))
71
 
72
+ NOTE: The structure is multiplicative in the micro part and additive
73
+ with the value leg. The coefficients are decorative inside rank().
74
  """
75
  if group_key is None:
76
  group_key = get_group_for_expression(prefer_novel=True)
 
82
  f"ts_decay_linear("
83
  f"group_neutralize("
84
  f"rank("
85
+ f"rank((vwap - close) / close) * rank(volume / ts_mean(volume, 20)) "
86
+ f"+ {sign}zscore(ts_rank({field_ref}, 252))"
87
  f"), {group_key}), 5)"
88
  )
89
  return expr
 
129
  return expr
130
 
131
 
132
+ def generate_mean_reversion(field: BrainField, horizon: int = 20, group_key: str = None) -> str:
133
+ """
134
+ Mean reversion on a field — short stocks where field is high, long where low.
135
+ Effective for contrarian indicators (option PCR, social buzz).
136
+ """
137
+ if group_key is None:
138
+ group_key = get_group_for_expression(prefer_novel=True)
139
+
140
+ # Mean reversion: flip the natural sign
141
+ if field.sign == SignConvention.LONG_HIGH:
142
+ sign = "-"
143
+ elif field.sign == SignConvention.LONG_LOW:
144
+ sign = ""
145
+ else:
146
+ sign = "-"
147
+
148
+ field_ref = _get_bf(field)
149
+
150
+ expr = (
151
+ f"ts_decay_linear("
152
+ f"group_neutralize("
153
+ f"{sign}zscore(ts_rank({field_ref}, {horizon}))"
154
+ f", {group_key}), 5)"
155
+ )
156
+ return expr
157
+
158
+
159
  def generate_batch_from_proven_templates(count: int = 5) -> list[dict]:
160
  """
161
  Generate a batch of alphas using PROVEN templates with novel fields.
 
163
 
164
  Returns list of dicts with expression, field, template, group_key.
165
  """
166
+ # Priority: goldmine (AC=0) first, then AC=1, then AC≤5
167
  priority_fields = GOLDMINE_FIELDS + TIER1_MODEL77_FIELDS + TIER2_MODEL16_FIELDS + TIER3_ANALYST_FIELDS
168
 
169
  # Shuffle for variety but keep priority ordering
 
175
  ("alpha6", generate_alpha6_variant),
176
  ("pure_rank", generate_pure_field_rank),
177
  ("delta_momentum", generate_delta_momentum),
178
+ ("mean_reversion", generate_mean_reversion),
179
  ]
180
 
181
  results = []