Upload alpha_factory/personas/expression_compiler.py with huggingface_hub
Browse files
alpha_factory/personas/expression_compiler.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Expression Compiler β Persona 2 (Hybrid: Jinja + Tinyfish LLM)
|
| 3 |
+
Converts Blueprint JSON β valid BRAIN expression string.
|
| 4 |
+
95% handled by templates. LLM only for novel structures.
|
| 5 |
+
"""
|
| 6 |
+
from jinja2 import Environment, BaseLoader
|
| 7 |
+
from ..infra.llm_client import LLMClient
|
| 8 |
+
from ..schemas import Blueprint, Expression
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# βββ Jinja Templates for proven archetypes βββββββββββββββββββββββββββββββ
|
| 12 |
+
TEMPLATES = {
|
| 13 |
+
"value_quality_blend": """
|
| 14 |
+
{%- set comps = [] -%}
|
| 15 |
+
{%- for c in bp.components -%}
|
| 16 |
+
{%- set _ = comps.append(c.weight|string ~ " * group_zscore(rank(ts_mean(" ~ c.fields[0] ~ ", " ~ c.horizon_days ~ ")), " ~ bp.neutralization.value ~ ")") -%}
|
| 17 |
+
{%- endfor -%}
|
| 18 |
+
{{ comps | join(" + ") }}
|
| 19 |
+
""",
|
| 20 |
+
"multi_horizon_mr": """
|
| 21 |
+
{%- set main = bp.components[0] -%}
|
| 22 |
+
zscore(ts_rank({{ main.fields[0] }}, {{ main.horizon_days }})) * {{ main.weight }} + zscore(ts_rank({{ bp.components[1].fields[0] if bp.components|length > 1 else main.fields[0] }}, {{ bp.components[1].horizon_days if bp.components|length > 1 else 20 }})) * {{ bp.components[1].weight if bp.components|length > 1 else 0.3 }}
|
| 23 |
+
""",
|
| 24 |
+
"vol_scaled_shock": """
|
| 25 |
+
{%- set c = bp.components[0] -%}
|
| 26 |
+
zscore(ts_delta({{ c.fields[0] }}, {{ c.horizon_days }}) / (ts_std({{ c.fields[0] }}, {{ c.horizon_days * 4 }}) + 0.001))
|
| 27 |
+
""",
|
| 28 |
+
"intraday_mr_decay": """
|
| 29 |
+
{%- set c = bp.components[0] -%}
|
| 30 |
+
ts_decay_linear(zscore(rank({{ c.fields[0] }})), {{ bp.decay }})
|
| 31 |
+
""",
|
| 32 |
+
"pead_revisions": """
|
| 33 |
+
{%- set c = bp.components[0] -%}
|
| 34 |
+
group_zscore(ts_delta({{ c.fields[0] }}, {{ c.horizon_days }}), {{ bp.neutralization.value }})
|
| 35 |
+
""",
|
| 36 |
+
"fundamental_yield_composite": """
|
| 37 |
+
{%- set comps = [] -%}
|
| 38 |
+
{%- for c in bp.components -%}
|
| 39 |
+
{%- set _ = comps.append(c.weight|string ~ " * zscore(rank(" ~ c.fields[0] ~ "))") -%}
|
| 40 |
+
{%- endfor -%}
|
| 41 |
+
ts_decay_linear({{ comps | join(" + ") }}, {{ bp.decay }})
|
| 42 |
+
""",
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
# Jinja environment
|
| 46 |
+
_env = Environment(loader=BaseLoader())
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
COMPILER_SYSTEM_PROMPT = """You are a BRAIN expression compiler. Your ONLY job is to convert
|
| 50 |
+
a factor blueprint into a valid WorldQuant BRAIN expression.
|
| 51 |
+
|
| 52 |
+
RULES:
|
| 53 |
+
1. Use ONLY operators from the BRAIN operator catalogue.
|
| 54 |
+
2. Every additive operand MUST be wrapped in zscore(), rank(), or group_zscore().
|
| 55 |
+
3. Output ONLY the expression string β no explanation, no markdown.
|
| 56 |
+
4. The expression must be syntactically valid (balanced parentheses, correct arity).
|
| 57 |
+
5. Use ts_decay_linear for smoothing if decay > 0.
|
| 58 |
+
6. Use group_zscore or indneutralize for neutralization within the expression if specified.
|
| 59 |
+
|
| 60 |
+
Available operators: rank, zscore, group_zscore, group_rank, ts_mean, ts_std, ts_sum,
|
| 61 |
+
ts_delta, ts_decay_linear, ts_rank, ts_argmax, ts_argmin, ts_correlation,
|
| 62 |
+
ts_covariance, ts_regression, winsorize, abs, log, sign, power, sqrt, max, min,
|
| 63 |
+
if_else, less, greater, filter, trade_when, ts_backfill, indneutralize
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
async def compile_expression(
|
| 68 |
+
blueprint: Blueprint,
|
| 69 |
+
llm: LLMClient,
|
| 70 |
+
model: str | None = None,
|
| 71 |
+
) -> Expression:
|
| 72 |
+
"""
|
| 73 |
+
Convert a Blueprint to a BRAIN expression.
|
| 74 |
+
Uses Jinja template if archetype is known; LLM fallback for novel structures.
|
| 75 |
+
"""
|
| 76 |
+
# Try template first (95% of cases)
|
| 77 |
+
if blueprint.archetype in TEMPLATES:
|
| 78 |
+
template_str = TEMPLATES[blueprint.archetype]
|
| 79 |
+
template = _env.from_string(template_str)
|
| 80 |
+
expr_text = template.render(bp=blueprint).strip()
|
| 81 |
+
|
| 82 |
+
# Extract metadata
|
| 83 |
+
fields_used = []
|
| 84 |
+
ops_used = []
|
| 85 |
+
for c in blueprint.components:
|
| 86 |
+
fields_used.extend(c.fields)
|
| 87 |
+
ops_used.extend(c.operators)
|
| 88 |
+
|
| 89 |
+
return Expression(
|
| 90 |
+
expression=expr_text,
|
| 91 |
+
fields_used=list(set(fields_used)),
|
| 92 |
+
operators_used=list(set(ops_used)),
|
| 93 |
+
archetype_used=blueprint.archetype,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# LLM fallback for novel archetypes
|
| 97 |
+
user_prompt = f"""Convert this factor blueprint into a valid BRAIN expression:
|
| 98 |
+
|
| 99 |
+
Blueprint:
|
| 100 |
+
- Theme: {blueprint.theme}
|
| 101 |
+
- Components:
|
| 102 |
+
{_format_components(blueprint)}
|
| 103 |
+
- Neutralization: {blueprint.neutralization.value}
|
| 104 |
+
- Decay: {blueprint.decay}
|
| 105 |
+
- Target: cross-sectional rank β long/short
|
| 106 |
+
|
| 107 |
+
Output a valid BRAIN expression. Wrap all additive operands in zscore() or rank().
|
| 108 |
+
Apply ts_decay_linear with decay={blueprint.decay} if decay > 0."""
|
| 109 |
+
|
| 110 |
+
result = await llm.generate_json(
|
| 111 |
+
prompt=user_prompt,
|
| 112 |
+
schema=Expression,
|
| 113 |
+
model=model or llm.config.tinyfish_model,
|
| 114 |
+
temperature=0.1,
|
| 115 |
+
system_prompt=COMPILER_SYSTEM_PROMPT,
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
return result
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def _format_components(bp: Blueprint) -> str:
|
| 122 |
+
"""Format components for the LLM prompt."""
|
| 123 |
+
lines = []
|
| 124 |
+
for i, c in enumerate(bp.components):
|
| 125 |
+
lines.append(
|
| 126 |
+
f" {i+1}. {c.name}: fields={c.fields}, operators={c.operators}, "
|
| 127 |
+
f"horizon={c.horizon_days}d, weight={c.weight}, sign={c.sign_direction}"
|
| 128 |
+
)
|
| 129 |
+
return "\n".join(lines)
|