Fix pipeline: enforce ts_decay_linear on all expressions + flip short-horizon returns to reversal"
Browse files
alpha_factory/personas/expression_compiler.py
CHANGED
|
@@ -2,6 +2,11 @@
|
|
| 2 |
Expression Compiler β Persona 2 (Hybrid: Jinja + Tinyfish LLM)
|
| 3 |
Converts Blueprint JSON β valid BRAIN expression string.
|
| 4 |
95% handled by templates. LLM only for novel structures.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
from jinja2 import Environment, BaseLoader
|
| 7 |
from ..infra.llm_client import LLMClient
|
|
@@ -19,7 +24,8 @@ TEMPLATES = {
|
|
| 19 |
""",
|
| 20 |
"multi_horizon_mr": """
|
| 21 |
{%- set main = bp.components[0] -%}
|
| 22 |
-
|
|
|
|
| 23 |
""",
|
| 24 |
"vol_scaled_shock": """
|
| 25 |
{%- set c = bp.components[0] -%}
|
|
@@ -27,7 +33,7 @@ zscore(ts_delta({{ c.fields[0] }}, {{ c.horizon_days }}) / (ts_std({{ c.fields[0
|
|
| 27 |
""",
|
| 28 |
"intraday_mr_decay": """
|
| 29 |
{%- set c = bp.components[0] -%}
|
| 30 |
-
|
| 31 |
""",
|
| 32 |
"pead_revisions": """
|
| 33 |
{%- set c = bp.components[0] -%}
|
|
@@ -38,7 +44,7 @@ group_zscore(ts_delta({{ c.fields[0] }}, {{ c.horizon_days }}), {{ bp.neutraliza
|
|
| 38 |
{%- for c in bp.components -%}
|
| 39 |
{%- set _ = comps.append(c.weight|string ~ " * zscore(rank(" ~ c.fields[0] ~ "))") -%}
|
| 40 |
{%- endfor -%}
|
| 41 |
-
|
| 42 |
""",
|
| 43 |
}
|
| 44 |
|
|
@@ -53,8 +59,9 @@ RULES:
|
|
| 53 |
2. Every additive operand MUST be wrapped in zscore(), rank(), or group_zscore().
|
| 54 |
3. Output ONLY the expression string β no explanation, no markdown.
|
| 55 |
4. The expression must be syntactically valid (balanced parentheses, correct arity).
|
| 56 |
-
5.
|
| 57 |
-
6.
|
|
|
|
| 58 |
|
| 59 |
Available operators: rank, zscore, group_zscore, group_rank, ts_mean, ts_std, ts_sum,
|
| 60 |
ts_delta, ts_decay_linear, ts_rank, ts_argmax, ts_argmin, ts_correlation,
|
|
@@ -63,6 +70,40 @@ if_else, less, greater, filter, trade_when, ts_backfill, indneutralize
|
|
| 63 |
"""
|
| 64 |
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
async def compile_expression(
|
| 67 |
blueprint: Blueprint,
|
| 68 |
llm: LLMClient,
|
|
@@ -70,7 +111,7 @@ async def compile_expression(
|
|
| 70 |
"""
|
| 71 |
Convert a Blueprint to a BRAIN expression.
|
| 72 |
Uses Jinja template if archetype is known; LLM fallback for novel structures.
|
| 73 |
-
|
| 74 |
"""
|
| 75 |
# Try template first (95% of cases)
|
| 76 |
if blueprint.archetype in TEMPLATES:
|
|
@@ -84,10 +125,13 @@ async def compile_expression(
|
|
| 84 |
fields_used.extend(c.fields)
|
| 85 |
ops_used.extend(c.operators)
|
| 86 |
|
|
|
|
|
|
|
|
|
|
| 87 |
return Expression(
|
| 88 |
expression=expr_text,
|
| 89 |
fields_used=list(set(fields_used)),
|
| 90 |
-
operators_used=list(set(ops_used)),
|
| 91 |
archetype_used=blueprint.archetype,
|
| 92 |
)
|
| 93 |
|
|
@@ -99,11 +143,12 @@ Blueprint:
|
|
| 99 |
- Components:
|
| 100 |
{_format_components(blueprint)}
|
| 101 |
- Neutralization: {blueprint.neutralization.value}
|
| 102 |
-
- Decay: {blueprint.decay}
|
| 103 |
- Target: cross-sectional rank β long/short
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
| 107 |
|
| 108 |
result = await llm.generate_json(
|
| 109 |
prompt=user_prompt,
|
|
@@ -113,6 +158,11 @@ Apply ts_decay_linear with decay={blueprint.decay} if decay > 0."""
|
|
| 113 |
system_prompt=COMPILER_SYSTEM_PROMPT,
|
| 114 |
)
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
return result
|
| 117 |
|
| 118 |
|
|
|
|
| 2 |
Expression Compiler β Persona 2 (Hybrid: Jinja + Tinyfish LLM)
|
| 3 |
Converts Blueprint JSON β valid BRAIN expression string.
|
| 4 |
95% handled by templates. LLM only for novel structures.
|
| 5 |
+
|
| 6 |
+
POST-COMPILE RULES (mandatory, applied to ALL expressions):
|
| 7 |
+
1. Wrap in ts_decay_linear if decay > 0 (reduces turnover)
|
| 8 |
+
2. Negate if short-horizon returns theme (cross-sectional reversal)
|
| 9 |
+
3. Ensure outer wrapper is unit-safe
|
| 10 |
"""
|
| 11 |
from jinja2 import Environment, BaseLoader
|
| 12 |
from ..infra.llm_client import LLMClient
|
|
|
|
| 24 |
""",
|
| 25 |
"multi_horizon_mr": """
|
| 26 |
{%- set main = bp.components[0] -%}
|
| 27 |
+
{%- set sign = "-" if bp.components[0].sign_direction == "long_low" else "" -%}
|
| 28 |
+
{{ sign }}(zscore(ts_rank({{ main.fields[0] }}, {{ main.horizon_days }})) * {{ main.weight }}{% for c in bp.components[1:] %} + zscore(ts_rank({{ c.fields[0] }}, {{ c.horizon_days }})) * {{ c.weight }}{% endfor %})
|
| 29 |
""",
|
| 30 |
"vol_scaled_shock": """
|
| 31 |
{%- set c = bp.components[0] -%}
|
|
|
|
| 33 |
""",
|
| 34 |
"intraday_mr_decay": """
|
| 35 |
{%- set c = bp.components[0] -%}
|
| 36 |
+
zscore(rank({{ c.fields[0] }}))
|
| 37 |
""",
|
| 38 |
"pead_revisions": """
|
| 39 |
{%- set c = bp.components[0] -%}
|
|
|
|
| 44 |
{%- for c in bp.components -%}
|
| 45 |
{%- set _ = comps.append(c.weight|string ~ " * zscore(rank(" ~ c.fields[0] ~ "))") -%}
|
| 46 |
{%- endfor -%}
|
| 47 |
+
{{ comps | join(" + ") }}
|
| 48 |
""",
|
| 49 |
}
|
| 50 |
|
|
|
|
| 59 |
2. Every additive operand MUST be wrapped in zscore(), rank(), or group_zscore().
|
| 60 |
3. Output ONLY the expression string β no explanation, no markdown.
|
| 61 |
4. The expression must be syntactically valid (balanced parentheses, correct arity).
|
| 62 |
+
5. Do NOT include ts_decay_linear β that will be added automatically post-compilation.
|
| 63 |
+
6. For short-horizon returns (<=20 days), use NEGATIVE sign (cross-sectional reversal).
|
| 64 |
+
7. Use group_zscore or indneutralize for neutralization within the expression if specified.
|
| 65 |
|
| 66 |
Available operators: rank, zscore, group_zscore, group_rank, ts_mean, ts_std, ts_sum,
|
| 67 |
ts_delta, ts_decay_linear, ts_rank, ts_argmax, ts_argmin, ts_correlation,
|
|
|
|
| 70 |
"""
|
| 71 |
|
| 72 |
|
| 73 |
+
# βββ Post-compilation rules (mandatory) βββββββββββββββββββββββββββββββββ
|
| 74 |
+
|
| 75 |
+
def _apply_post_compile_rules(expression: str, blueprint: Blueprint) -> str:
|
| 76 |
+
"""
|
| 77 |
+
Mandatory post-compilation transformations:
|
| 78 |
+
1. Apply ts_decay_linear to reduce turnover (ALWAYS, min decay=5)
|
| 79 |
+
2. Apply sign flip for short-horizon reversal themes
|
| 80 |
+
3. Ensure the expression won't exceed 70% turnover
|
| 81 |
+
"""
|
| 82 |
+
expr = expression.strip()
|
| 83 |
+
|
| 84 |
+
# Rule 1: Determine if sign should be flipped (short-horizon reversal)
|
| 85 |
+
# In cross-section, short-term returns (<= 20 days) are mean-reverting
|
| 86 |
+
needs_flip = False
|
| 87 |
+
for c in blueprint.components:
|
| 88 |
+
if any(f in ["returns", "close", "ts_returns"] for f in c.fields):
|
| 89 |
+
if c.horizon_days <= 20 and c.sign_direction == "long_high":
|
| 90 |
+
needs_flip = True
|
| 91 |
+
break
|
| 92 |
+
|
| 93 |
+
if needs_flip:
|
| 94 |
+
expr = f"-({expr})"
|
| 95 |
+
|
| 96 |
+
# Rule 2: ALWAYS wrap in ts_decay_linear to control turnover
|
| 97 |
+
# Minimum decay = 5 days, use blueprint.decay if higher
|
| 98 |
+
decay = max(blueprint.decay, 5)
|
| 99 |
+
|
| 100 |
+
# Don't double-wrap if already has ts_decay_linear
|
| 101 |
+
if not expr.startswith("ts_decay_linear("):
|
| 102 |
+
expr = f"ts_decay_linear({expr}, {decay})"
|
| 103 |
+
|
| 104 |
+
return expr
|
| 105 |
+
|
| 106 |
+
|
| 107 |
async def compile_expression(
|
| 108 |
blueprint: Blueprint,
|
| 109 |
llm: LLMClient,
|
|
|
|
| 111 |
"""
|
| 112 |
Convert a Blueprint to a BRAIN expression.
|
| 113 |
Uses Jinja template if archetype is known; LLM fallback for novel structures.
|
| 114 |
+
ALWAYS applies post-compile rules (decay, sign correction).
|
| 115 |
"""
|
| 116 |
# Try template first (95% of cases)
|
| 117 |
if blueprint.archetype in TEMPLATES:
|
|
|
|
| 125 |
fields_used.extend(c.fields)
|
| 126 |
ops_used.extend(c.operators)
|
| 127 |
|
| 128 |
+
# Apply mandatory post-compile rules
|
| 129 |
+
expr_text = _apply_post_compile_rules(expr_text, blueprint)
|
| 130 |
+
|
| 131 |
return Expression(
|
| 132 |
expression=expr_text,
|
| 133 |
fields_used=list(set(fields_used)),
|
| 134 |
+
operators_used=list(set(ops_used)) + ["ts_decay_linear"],
|
| 135 |
archetype_used=blueprint.archetype,
|
| 136 |
)
|
| 137 |
|
|
|
|
| 143 |
- Components:
|
| 144 |
{_format_components(blueprint)}
|
| 145 |
- Neutralization: {blueprint.neutralization.value}
|
|
|
|
| 146 |
- Target: cross-sectional rank β long/short
|
| 147 |
|
| 148 |
+
IMPORTANT: Do NOT include ts_decay_linear β it will be added automatically.
|
| 149 |
+
For short-horizon returns (<=20 days), use NEGATIVE sign (reversal works better cross-sectionally).
|
| 150 |
+
|
| 151 |
+
Output a valid BRAIN expression. Wrap all additive operands in zscore() or rank()."""
|
| 152 |
|
| 153 |
result = await llm.generate_json(
|
| 154 |
prompt=user_prompt,
|
|
|
|
| 158 |
system_prompt=COMPILER_SYSTEM_PROMPT,
|
| 159 |
)
|
| 160 |
|
| 161 |
+
# Apply mandatory post-compile rules to LLM output too
|
| 162 |
+
result.expression = _apply_post_compile_rules(result.expression, blueprint)
|
| 163 |
+
if "ts_decay_linear" not in result.operators_used:
|
| 164 |
+
result.operators_used.append("ts_decay_linear")
|
| 165 |
+
|
| 166 |
return result
|
| 167 |
|
| 168 |
|