gaurv007 commited on
Commit
61516cd
Β·
verified Β·
1 Parent(s): 5090758

Upload alpha_factory/personas/expression_compiler.py with huggingface_hub

Browse files
alpha_factory/personas/expression_compiler.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Expression Compiler β€” Persona 2 (Hybrid: Jinja + Tinyfish LLM)
3
+ Converts Blueprint JSON β†’ valid BRAIN expression string.
4
+ 95% handled by templates. LLM only for novel structures.
5
+ """
6
+ from jinja2 import Environment, BaseLoader
7
+ from ..infra.llm_client import LLMClient
8
+ from ..schemas import Blueprint, Expression
9
+
10
+
11
+ # ─── Jinja Templates for proven archetypes ───────────────────────────────
12
+ TEMPLATES = {
13
+ "value_quality_blend": """
14
+ {%- set comps = [] -%}
15
+ {%- for c in bp.components -%}
16
+ {%- set _ = comps.append(c.weight|string ~ " * group_zscore(rank(ts_mean(" ~ c.fields[0] ~ ", " ~ c.horizon_days ~ ")), " ~ bp.neutralization.value ~ ")") -%}
17
+ {%- endfor -%}
18
+ {{ comps | join(" + ") }}
19
+ """,
20
+ "multi_horizon_mr": """
21
+ {%- set main = bp.components[0] -%}
22
+ zscore(ts_rank({{ main.fields[0] }}, {{ main.horizon_days }})) * {{ main.weight }} + zscore(ts_rank({{ bp.components[1].fields[0] if bp.components|length > 1 else main.fields[0] }}, {{ bp.components[1].horizon_days if bp.components|length > 1 else 20 }})) * {{ bp.components[1].weight if bp.components|length > 1 else 0.3 }}
23
+ """,
24
+ "vol_scaled_shock": """
25
+ {%- set c = bp.components[0] -%}
26
+ zscore(ts_delta({{ c.fields[0] }}, {{ c.horizon_days }}) / (ts_std({{ c.fields[0] }}, {{ c.horizon_days * 4 }}) + 0.001))
27
+ """,
28
+ "intraday_mr_decay": """
29
+ {%- set c = bp.components[0] -%}
30
+ ts_decay_linear(zscore(rank({{ c.fields[0] }})), {{ bp.decay }})
31
+ """,
32
+ "pead_revisions": """
33
+ {%- set c = bp.components[0] -%}
34
+ group_zscore(ts_delta({{ c.fields[0] }}, {{ c.horizon_days }}), {{ bp.neutralization.value }})
35
+ """,
36
+ "fundamental_yield_composite": """
37
+ {%- set comps = [] -%}
38
+ {%- for c in bp.components -%}
39
+ {%- set _ = comps.append(c.weight|string ~ " * zscore(rank(" ~ c.fields[0] ~ "))") -%}
40
+ {%- endfor -%}
41
+ ts_decay_linear({{ comps | join(" + ") }}, {{ bp.decay }})
42
+ """,
43
+ }
44
+
45
+ # Jinja environment
46
+ _env = Environment(loader=BaseLoader())
47
+
48
+
49
+ COMPILER_SYSTEM_PROMPT = """You are a BRAIN expression compiler. Your ONLY job is to convert
50
+ a factor blueprint into a valid WorldQuant BRAIN expression.
51
+
52
+ RULES:
53
+ 1. Use ONLY operators from the BRAIN operator catalogue.
54
+ 2. Every additive operand MUST be wrapped in zscore(), rank(), or group_zscore().
55
+ 3. Output ONLY the expression string β€” no explanation, no markdown.
56
+ 4. The expression must be syntactically valid (balanced parentheses, correct arity).
57
+ 5. Use ts_decay_linear for smoothing if decay > 0.
58
+ 6. Use group_zscore or indneutralize for neutralization within the expression if specified.
59
+
60
+ Available operators: rank, zscore, group_zscore, group_rank, ts_mean, ts_std, ts_sum,
61
+ ts_delta, ts_decay_linear, ts_rank, ts_argmax, ts_argmin, ts_correlation,
62
+ ts_covariance, ts_regression, winsorize, abs, log, sign, power, sqrt, max, min,
63
+ if_else, less, greater, filter, trade_when, ts_backfill, indneutralize
64
+ """
65
+
66
+
67
+ async def compile_expression(
68
+ blueprint: Blueprint,
69
+ llm: LLMClient,
70
+ model: str | None = None,
71
+ ) -> Expression:
72
+ """
73
+ Convert a Blueprint to a BRAIN expression.
74
+ Uses Jinja template if archetype is known; LLM fallback for novel structures.
75
+ """
76
+ # Try template first (95% of cases)
77
+ if blueprint.archetype in TEMPLATES:
78
+ template_str = TEMPLATES[blueprint.archetype]
79
+ template = _env.from_string(template_str)
80
+ expr_text = template.render(bp=blueprint).strip()
81
+
82
+ # Extract metadata
83
+ fields_used = []
84
+ ops_used = []
85
+ for c in blueprint.components:
86
+ fields_used.extend(c.fields)
87
+ ops_used.extend(c.operators)
88
+
89
+ return Expression(
90
+ expression=expr_text,
91
+ fields_used=list(set(fields_used)),
92
+ operators_used=list(set(ops_used)),
93
+ archetype_used=blueprint.archetype,
94
+ )
95
+
96
+ # LLM fallback for novel archetypes
97
+ user_prompt = f"""Convert this factor blueprint into a valid BRAIN expression:
98
+
99
+ Blueprint:
100
+ - Theme: {blueprint.theme}
101
+ - Components:
102
+ {_format_components(blueprint)}
103
+ - Neutralization: {blueprint.neutralization.value}
104
+ - Decay: {blueprint.decay}
105
+ - Target: cross-sectional rank β†’ long/short
106
+
107
+ Output a valid BRAIN expression. Wrap all additive operands in zscore() or rank().
108
+ Apply ts_decay_linear with decay={blueprint.decay} if decay > 0."""
109
+
110
+ result = await llm.generate_json(
111
+ prompt=user_prompt,
112
+ schema=Expression,
113
+ model=model or llm.config.tinyfish_model,
114
+ temperature=0.1,
115
+ system_prompt=COMPILER_SYSTEM_PROMPT,
116
+ )
117
+
118
+ return result
119
+
120
+
121
+ def _format_components(bp: Blueprint) -> str:
122
+ """Format components for the LLM prompt."""
123
+ lines = []
124
+ for i, c in enumerate(bp.components):
125
+ lines.append(
126
+ f" {i+1}. {c.name}: fields={c.fields}, operators={c.operators}, "
127
+ f"horizon={c.horizon_days}d, weight={c.weight}, sign={c.sign_direction}"
128
+ )
129
+ return "\n".join(lines)