gaurv007 commited on
Commit
4579313
·
verified ·
1 Parent(s): 15c8c77

feat: expression_mutator.py — generates 5 variants from any alpha (decay, horizon, neutralization, vol-scaling, sign flip)"

Browse files
alpha_factory/deterministic/expression_mutator.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Expression Mutator — Evolutionary alpha improvement.
3
+ Takes a base alpha expression and generates 5 structural variants.
4
+
5
+ Mutations:
6
+ 1. Decay adjustment (±3 days)
7
+ 2. Horizon change (different lookback window)
8
+ 3. Neutralization swap (novel group key)
9
+ 4. Vol-scaling wrapper
10
+ 5. Sign flip (test opposite direction)
11
+ """
12
+ import re
13
+ from ..data.brain_groups import ALT_GROUPS, PRODUCTION_GROUPS
14
+
15
+
16
+ def mutate_decay(expression: str, current_decay: int) -> list[dict]:
17
+ """Generate decay variants."""
18
+ variants = []
19
+ for new_decay in [3, 5, 7, 10]:
20
+ if new_decay == current_decay:
21
+ continue
22
+ # Replace the decay value in ts_decay_linear
23
+ new_expr = re.sub(
24
+ r"ts_decay_linear\((.+),\s*\d+\)",
25
+ f"ts_decay_linear(\\1, {new_decay})",
26
+ expression
27
+ )
28
+ if new_expr != expression:
29
+ variants.append({
30
+ "expression": new_expr,
31
+ "mutation": f"decay_{current_decay}_to_{new_decay}",
32
+ "decay": new_decay,
33
+ })
34
+ return variants
35
+
36
+
37
+ def mutate_horizon(expression: str) -> list[dict]:
38
+ """Change lookback windows in ts_rank, ts_mean, ts_delta."""
39
+ variants = []
40
+ horizons = [21, 42, 63, 126, 252]
41
+
42
+ # Find current horizon
43
+ match = re.search(r"ts_(?:rank|mean|delta)\([^,]+,\s*(\d+)\)", expression)
44
+ if not match:
45
+ return []
46
+
47
+ current = int(match.group(1))
48
+ for h in horizons:
49
+ if h == current:
50
+ continue
51
+ new_expr = re.sub(
52
+ r"(ts_(?:rank|mean|delta)\([^,]+,\s*)\d+(\))",
53
+ f"\\g<1>{h}\\2",
54
+ expression,
55
+ count=1 # Only change first occurrence
56
+ )
57
+ if new_expr != expression:
58
+ variants.append({
59
+ "expression": new_expr,
60
+ "mutation": f"horizon_{current}_to_{h}",
61
+ "decay": None, # Keep original
62
+ })
63
+ return variants[:2] # Max 2 horizon variants
64
+
65
+
66
+ def mutate_neutralization(expression: str) -> list[dict]:
67
+ """Swap neutralization group key."""
68
+ variants = []
69
+ # Find current group key
70
+ match = re.search(r"group_(?:neutralize|zscore|rank)\([^,]+,\s*([a-z0-9_]+)\)", expression)
71
+ if not match:
72
+ return []
73
+
74
+ current_group = match.group(1)
75
+ # Try novel groups
76
+ novel_groups = [g for g in PRODUCTION_GROUPS if g.id != current_group][:3]
77
+
78
+ for g in novel_groups:
79
+ new_expr = expression.replace(current_group, g.id)
80
+ if new_expr != expression:
81
+ variants.append({
82
+ "expression": new_expr,
83
+ "mutation": f"group_{current_group}_to_{g.id}",
84
+ "decay": None,
85
+ })
86
+ return variants[:2]
87
+
88
+
89
+ def mutate_vol_scale(expression: str) -> list[dict]:
90
+ """Add volatility scaling to the signal."""
91
+ # If expression has a raw field, wrap it in vol-normalization
92
+ # Pattern: find the innermost field reference and divide by ts_std
93
+ match = re.search(r"(ts_(?:rank|mean)\()([a-z][a-z0-9_]+)(,\s*\d+\))", expression)
94
+ if not match:
95
+ return []
96
+
97
+ prefix, field, suffix = match.group(1), match.group(2), match.group(3)
98
+ # Get the window from suffix
99
+ window_match = re.search(r"(\d+)", suffix)
100
+ if not window_match:
101
+ return []
102
+ window = int(window_match.group(1))
103
+ vol_window = min(window * 2, 252)
104
+
105
+ # Replace field with field / (ts_std(field, vol_window) + 0.001)
106
+ vol_scaled = f"{field} / (ts_std({field}, {vol_window}) + 0.001)"
107
+ new_expr = expression.replace(
108
+ f"{prefix}{field}{suffix}",
109
+ f"{prefix}{vol_scaled}{suffix}"
110
+ )
111
+
112
+ if new_expr != expression:
113
+ return [{"expression": new_expr, "mutation": "vol_scaled", "decay": None}]
114
+ return []
115
+
116
+
117
+ def mutate_sign_flip(expression: str) -> list[dict]:
118
+ """Flip the sign of the entire expression (test opposite direction)."""
119
+ # Remove existing negation or add one
120
+ inner = expression
121
+
122
+ # Extract inner from ts_decay_linear wrapper
123
+ match = re.match(r"ts_decay_linear\((.+),\s*(\d+)\)$", expression)
124
+ if match:
125
+ inner_expr = match.group(1).strip()
126
+ decay = match.group(2)
127
+
128
+ if inner_expr.startswith("-(") and inner_expr.endswith(")"):
129
+ # Remove negation
130
+ flipped = inner_expr[2:-1]
131
+ else:
132
+ # Add negation
133
+ flipped = f"-({inner_expr})"
134
+
135
+ new_expr = f"ts_decay_linear({flipped}, {decay})"
136
+ return [{"expression": new_expr, "mutation": "sign_flip", "decay": None}]
137
+
138
+ return []
139
+
140
+
141
+ def generate_mutations(expression: str, decay: int = 5) -> list[dict]:
142
+ """
143
+ Generate all mutation variants from a base expression.
144
+ Returns list of dicts with 'expression', 'mutation' type, and optional 'decay'.
145
+ """
146
+ all_variants = []
147
+
148
+ # 1. Decay mutations
149
+ all_variants.extend(mutate_decay(expression, decay))
150
+
151
+ # 2. Horizon mutations
152
+ all_variants.extend(mutate_horizon(expression))
153
+
154
+ # 3. Neutralization mutations
155
+ all_variants.extend(mutate_neutralization(expression))
156
+
157
+ # 4. Vol-scaling
158
+ all_variants.extend(mutate_vol_scale(expression))
159
+
160
+ # 5. Sign flip
161
+ all_variants.extend(mutate_sign_flip(expression))
162
+
163
+ return all_variants