gaurv007 commited on
Commit
54f9c99
Β·
verified Β·
1 Parent(s): b7308ec

Fix pipeline: enforce ts_decay_linear on all expressions + flip short-horizon returns to reversal"

Browse files
alpha_factory/personas/expression_compiler.py CHANGED
@@ -2,6 +2,11 @@
2
  Expression Compiler β€” Persona 2 (Hybrid: Jinja + Tinyfish LLM)
3
  Converts Blueprint JSON β†’ valid BRAIN expression string.
4
  95% handled by templates. LLM only for novel structures.
 
 
 
 
 
5
  """
6
  from jinja2 import Environment, BaseLoader
7
  from ..infra.llm_client import LLMClient
@@ -19,7 +24,8 @@ TEMPLATES = {
19
  """,
20
  "multi_horizon_mr": """
21
  {%- set main = bp.components[0] -%}
22
- zscore(ts_rank({{ main.fields[0] }}, {{ main.horizon_days }})) * {{ main.weight }} + zscore(ts_rank({{ bp.components[1].fields[0] if bp.components|length > 1 else main.fields[0] }}, {{ bp.components[1].horizon_days if bp.components|length > 1 else 20 }})) * {{ bp.components[1].weight if bp.components|length > 1 else 0.3 }}
 
23
  """,
24
  "vol_scaled_shock": """
25
  {%- set c = bp.components[0] -%}
@@ -27,7 +33,7 @@ zscore(ts_delta({{ c.fields[0] }}, {{ c.horizon_days }}) / (ts_std({{ c.fields[0
27
  """,
28
  "intraday_mr_decay": """
29
  {%- set c = bp.components[0] -%}
30
- ts_decay_linear(zscore(rank({{ c.fields[0] }})), {{ bp.decay }})
31
  """,
32
  "pead_revisions": """
33
  {%- set c = bp.components[0] -%}
@@ -38,7 +44,7 @@ group_zscore(ts_delta({{ c.fields[0] }}, {{ c.horizon_days }}), {{ bp.neutraliza
38
  {%- for c in bp.components -%}
39
  {%- set _ = comps.append(c.weight|string ~ " * zscore(rank(" ~ c.fields[0] ~ "))") -%}
40
  {%- endfor -%}
41
- ts_decay_linear({{ comps | join(" + ") }}, {{ bp.decay }})
42
  """,
43
  }
44
 
@@ -53,8 +59,9 @@ RULES:
53
  2. Every additive operand MUST be wrapped in zscore(), rank(), or group_zscore().
54
  3. Output ONLY the expression string β€” no explanation, no markdown.
55
  4. The expression must be syntactically valid (balanced parentheses, correct arity).
56
- 5. Use ts_decay_linear for smoothing if decay > 0.
57
- 6. Use group_zscore or indneutralize for neutralization within the expression if specified.
 
58
 
59
  Available operators: rank, zscore, group_zscore, group_rank, ts_mean, ts_std, ts_sum,
60
  ts_delta, ts_decay_linear, ts_rank, ts_argmax, ts_argmin, ts_correlation,
@@ -63,6 +70,40 @@ if_else, less, greater, filter, trade_when, ts_backfill, indneutralize
63
  """
64
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  async def compile_expression(
67
  blueprint: Blueprint,
68
  llm: LLMClient,
@@ -70,7 +111,7 @@ async def compile_expression(
70
  """
71
  Convert a Blueprint to a BRAIN expression.
72
  Uses Jinja template if archetype is known; LLM fallback for novel structures.
73
- Uses tier="tinyfish" β€” ModelManager resolves to user's selected model.
74
  """
75
  # Try template first (95% of cases)
76
  if blueprint.archetype in TEMPLATES:
@@ -84,10 +125,13 @@ async def compile_expression(
84
  fields_used.extend(c.fields)
85
  ops_used.extend(c.operators)
86
 
 
 
 
87
  return Expression(
88
  expression=expr_text,
89
  fields_used=list(set(fields_used)),
90
- operators_used=list(set(ops_used)),
91
  archetype_used=blueprint.archetype,
92
  )
93
 
@@ -99,11 +143,12 @@ Blueprint:
99
  - Components:
100
  {_format_components(blueprint)}
101
  - Neutralization: {blueprint.neutralization.value}
102
- - Decay: {blueprint.decay}
103
  - Target: cross-sectional rank β†’ long/short
104
 
105
- Output a valid BRAIN expression. Wrap all additive operands in zscore() or rank().
106
- Apply ts_decay_linear with decay={blueprint.decay} if decay > 0."""
 
 
107
 
108
  result = await llm.generate_json(
109
  prompt=user_prompt,
@@ -113,6 +158,11 @@ Apply ts_decay_linear with decay={blueprint.decay} if decay > 0."""
113
  system_prompt=COMPILER_SYSTEM_PROMPT,
114
  )
115
 
 
 
 
 
 
116
  return result
117
 
118
 
 
2
  Expression Compiler β€” Persona 2 (Hybrid: Jinja + Tinyfish LLM)
3
  Converts Blueprint JSON β†’ valid BRAIN expression string.
4
  95% handled by templates. LLM only for novel structures.
5
+
6
+ POST-COMPILE RULES (mandatory, applied to ALL expressions):
7
+ 1. Wrap in ts_decay_linear if decay > 0 (reduces turnover)
8
+ 2. Negate if short-horizon returns theme (cross-sectional reversal)
9
+ 3. Ensure outer wrapper is unit-safe
10
  """
11
  from jinja2 import Environment, BaseLoader
12
  from ..infra.llm_client import LLMClient
 
24
  """,
25
  "multi_horizon_mr": """
26
  {%- set main = bp.components[0] -%}
27
+ {%- set sign = "-" if bp.components[0].sign_direction == "long_low" else "" -%}
28
+ {{ sign }}(zscore(ts_rank({{ main.fields[0] }}, {{ main.horizon_days }})) * {{ main.weight }}{% for c in bp.components[1:] %} + zscore(ts_rank({{ c.fields[0] }}, {{ c.horizon_days }})) * {{ c.weight }}{% endfor %})
29
  """,
30
  "vol_scaled_shock": """
31
  {%- set c = bp.components[0] -%}
 
33
  """,
34
  "intraday_mr_decay": """
35
  {%- set c = bp.components[0] -%}
36
+ zscore(rank({{ c.fields[0] }}))
37
  """,
38
  "pead_revisions": """
39
  {%- set c = bp.components[0] -%}
 
44
  {%- for c in bp.components -%}
45
  {%- set _ = comps.append(c.weight|string ~ " * zscore(rank(" ~ c.fields[0] ~ "))") -%}
46
  {%- endfor -%}
47
+ {{ comps | join(" + ") }}
48
  """,
49
  }
50
 
 
59
  2. Every additive operand MUST be wrapped in zscore(), rank(), or group_zscore().
60
  3. Output ONLY the expression string β€” no explanation, no markdown.
61
  4. The expression must be syntactically valid (balanced parentheses, correct arity).
62
+ 5. Do NOT include ts_decay_linear β€” that will be added automatically post-compilation.
63
+ 6. For short-horizon returns (<=20 days), use NEGATIVE sign (cross-sectional reversal).
64
+ 7. Use group_zscore or indneutralize for neutralization within the expression if specified.
65
 
66
  Available operators: rank, zscore, group_zscore, group_rank, ts_mean, ts_std, ts_sum,
67
  ts_delta, ts_decay_linear, ts_rank, ts_argmax, ts_argmin, ts_correlation,
 
70
  """
71
 
72
 
73
+ # ─── Post-compilation rules (mandatory) ─────────────────────────────────
74
+
75
+ def _apply_post_compile_rules(expression: str, blueprint: Blueprint) -> str:
76
+ """
77
+ Mandatory post-compilation transformations:
78
+ 1. Apply ts_decay_linear to reduce turnover (ALWAYS, min decay=5)
79
+ 2. Apply sign flip for short-horizon reversal themes
80
+ 3. Ensure the expression won't exceed 70% turnover
81
+ """
82
+ expr = expression.strip()
83
+
84
+ # Rule 1: Determine if sign should be flipped (short-horizon reversal)
85
+ # In cross-section, short-term returns (<= 20 days) are mean-reverting
86
+ needs_flip = False
87
+ for c in blueprint.components:
88
+ if any(f in ["returns", "close", "ts_returns"] for f in c.fields):
89
+ if c.horizon_days <= 20 and c.sign_direction == "long_high":
90
+ needs_flip = True
91
+ break
92
+
93
+ if needs_flip:
94
+ expr = f"-({expr})"
95
+
96
+ # Rule 2: ALWAYS wrap in ts_decay_linear to control turnover
97
+ # Minimum decay = 5 days, use blueprint.decay if higher
98
+ decay = max(blueprint.decay, 5)
99
+
100
+ # Don't double-wrap if already has ts_decay_linear
101
+ if not expr.startswith("ts_decay_linear("):
102
+ expr = f"ts_decay_linear({expr}, {decay})"
103
+
104
+ return expr
105
+
106
+
107
  async def compile_expression(
108
  blueprint: Blueprint,
109
  llm: LLMClient,
 
111
  """
112
  Convert a Blueprint to a BRAIN expression.
113
  Uses Jinja template if archetype is known; LLM fallback for novel structures.
114
+ ALWAYS applies post-compile rules (decay, sign correction).
115
  """
116
  # Try template first (95% of cases)
117
  if blueprint.archetype in TEMPLATES:
 
125
  fields_used.extend(c.fields)
126
  ops_used.extend(c.operators)
127
 
128
+ # Apply mandatory post-compile rules
129
+ expr_text = _apply_post_compile_rules(expr_text, blueprint)
130
+
131
  return Expression(
132
  expression=expr_text,
133
  fields_used=list(set(fields_used)),
134
+ operators_used=list(set(ops_used)) + ["ts_decay_linear"],
135
  archetype_used=blueprint.archetype,
136
  )
137
 
 
143
  - Components:
144
  {_format_components(blueprint)}
145
  - Neutralization: {blueprint.neutralization.value}
 
146
  - Target: cross-sectional rank β†’ long/short
147
 
148
+ IMPORTANT: Do NOT include ts_decay_linear β€” it will be added automatically.
149
+ For short-horizon returns (<=20 days), use NEGATIVE sign (reversal works better cross-sectionally).
150
+
151
+ Output a valid BRAIN expression. Wrap all additive operands in zscore() or rank()."""
152
 
153
  result = await llm.generate_json(
154
  prompt=user_prompt,
 
158
  system_prompt=COMPILER_SYSTEM_PROMPT,
159
  )
160
 
161
+ # Apply mandatory post-compile rules to LLM output too
162
+ result.expression = _apply_post_compile_rules(result.expression, blueprint)
163
+ if "ts_decay_linear" not in result.operators_used:
164
+ result.operators_used.append("ts_decay_linear")
165
+
166
  return result
167
 
168