gaurv007 commited on
Commit
640aecf
·
verified ·
1 Parent(s): 9e49652

Upload alpha_factory/infra/factor_store.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. alpha_factory/infra/factor_store.py +180 -0
alpha_factory/infra/factor_store.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Factor Store — DuckDB + Parquet persistence for all alphas.
3
+ Single source of truth for every alpha ever submitted.
4
+ """
5
+ import duckdb
6
+ from pathlib import Path
7
+ from datetime import datetime
8
+ from typing import Optional
9
+ from ..schemas import BrainMetrics, Verdict
10
+
11
+
12
+ SCHEMA_SQL = """
13
+ CREATE TABLE IF NOT EXISTS alphas (
14
+ alpha_id VARCHAR PRIMARY KEY,
15
+ submitted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
16
+ expression TEXT NOT NULL,
17
+ neutralization VARCHAR NOT NULL,
18
+ decay INTEGER NOT NULL,
19
+ universe VARCHAR DEFAULT 'TOP3000',
20
+ region VARCHAR DEFAULT 'USA',
21
+ delay_days INTEGER DEFAULT 1,
22
+ fields_used VARCHAR[],
23
+ operators_used VARCHAR[],
24
+ archetype VARCHAR,
25
+ theme VARCHAR,
26
+ anomaly_tag VARCHAR,
27
+ academic_anchor VARCHAR,
28
+ sharpe_full DOUBLE,
29
+ sharpe_is DOUBLE,
30
+ sharpe_os DOUBLE,
31
+ fitness_brain DOUBLE,
32
+ yearly_sharpe DOUBLE[],
33
+ yearly_returns DOUBLE[],
34
+ turnover DOUBLE,
35
+ max_drawdown DOUBLE,
36
+ returns_total DOUBLE,
37
+ margin_pct DOUBLE,
38
+ fitness_score DOUBLE,
39
+ max_corr_to_library DOUBLE,
40
+ verdict VARCHAR,
41
+ gatekeeper_memo TEXT,
42
+ iteration INTEGER DEFAULT 1,
43
+ family_id VARCHAR,
44
+ created_by VARCHAR DEFAULT 'pipeline'
45
+ );
46
+
47
+ CREATE TABLE IF NOT EXISTS dead_themes (
48
+ theme VARCHAR,
49
+ universe VARCHAR,
50
+ date_killed TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
51
+ last_sharpe DOUBLE,
52
+ reason TEXT,
53
+ cooldown_until TIMESTAMP
54
+ );
55
+ """
56
+
57
+
58
+ class FactorStore:
59
+ """DuckDB-backed factor store for all alpha history."""
60
+
61
+ def __init__(self, db_path: Path):
62
+ self.db_path = db_path
63
+ db_path.parent.mkdir(parents=True, exist_ok=True)
64
+ self.conn = duckdb.connect(str(db_path))
65
+ self.conn.execute(SCHEMA_SQL)
66
+
67
+ def insert_alpha(
68
+ self,
69
+ alpha_id: str,
70
+ expression: str,
71
+ neutralization: str,
72
+ decay: int,
73
+ fields_used: list[str],
74
+ operators_used: list[str],
75
+ archetype: str,
76
+ theme: str,
77
+ anomaly_tag: str,
78
+ academic_anchor: Optional[str] = None,
79
+ family_id: Optional[str] = None,
80
+ iteration: int = 1,
81
+ ):
82
+ """Insert a new alpha candidate (before BRAIN results arrive)."""
83
+ self.conn.execute("""
84
+ INSERT OR REPLACE INTO alphas (alpha_id, expression, neutralization, decay,
85
+ fields_used, operators_used, archetype, theme, anomaly_tag,
86
+ academic_anchor, family_id, iteration)
87
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
88
+ """, [alpha_id, expression, neutralization, decay,
89
+ fields_used, operators_used, archetype, theme, anomaly_tag,
90
+ academic_anchor, family_id, iteration])
91
+
92
+ def update_metrics(self, alpha_id: str, metrics: BrainMetrics, fitness_score: float):
93
+ """Update alpha with BRAIN simulation results."""
94
+ self.conn.execute("""
95
+ UPDATE alphas SET
96
+ sharpe_full = ?, sharpe_is = ?, sharpe_os = ?,
97
+ fitness_brain = ?, turnover = ?, returns_total = ?,
98
+ max_drawdown = ?, yearly_sharpe = ?, yearly_returns = ?,
99
+ margin_pct = ?, fitness_score = ?
100
+ WHERE alpha_id = ?
101
+ """, [metrics.sharpe_full, metrics.sharpe_is, metrics.sharpe_os,
102
+ metrics.fitness, metrics.turnover, metrics.returns,
103
+ metrics.max_drawdown, metrics.yearly_sharpe, metrics.yearly_returns,
104
+ metrics.margin_pct, fitness_score, alpha_id])
105
+
106
+ def update_verdict(self, alpha_id: str, verdict: Verdict, memo: str = ""):
107
+ """Set the final verdict for an alpha."""
108
+ self.conn.execute("""
109
+ UPDATE alphas SET verdict = ?, gatekeeper_memo = ? WHERE alpha_id = ?
110
+ """, [verdict.value, memo, alpha_id])
111
+
112
+ def update_correlation(self, alpha_id: str, max_corr: float):
113
+ """Update max correlation to library."""
114
+ self.conn.execute("""
115
+ UPDATE alphas SET max_corr_to_library = ? WHERE alpha_id = ?
116
+ """, [max_corr, alpha_id])
117
+
118
+ def get_all_themes(self) -> list[str]:
119
+ """Get themes of all alphas in the store."""
120
+ result = self.conn.execute("SELECT theme FROM alphas WHERE theme IS NOT NULL").fetchall()
121
+ return [r[0] for r in result]
122
+
123
+ def get_all_anomaly_tags(self) -> list[str]:
124
+ """Get anomaly tags of all alphas."""
125
+ result = self.conn.execute("SELECT anomaly_tag FROM alphas WHERE anomaly_tag IS NOT NULL").fetchall()
126
+ return [r[0] for r in result]
127
+
128
+ def get_dead_themes(self) -> list[str]:
129
+ """Get themes that are in cooldown."""
130
+ result = self.conn.execute("""
131
+ SELECT theme FROM dead_themes WHERE cooldown_until > CURRENT_TIMESTAMP
132
+ """).fetchall()
133
+ return [r[0] for r in result]
134
+
135
+ def exists(self, alpha_id: str) -> bool:
136
+ """Check if an alpha already exists (dedup)."""
137
+ result = self.conn.execute("SELECT 1 FROM alphas WHERE alpha_id = ?", [alpha_id]).fetchone()
138
+ return result is not None
139
+
140
+ def get_expression_hashes(self) -> set[str]:
141
+ """Get all alpha_ids for dedup."""
142
+ result = self.conn.execute("SELECT alpha_id FROM alphas").fetchall()
143
+ return {r[0] for r in result}
144
+
145
+ def count_consecutive_kills(self) -> int:
146
+ """Count consecutive kills from most recent (for kill switch)."""
147
+ results = self.conn.execute("""
148
+ SELECT verdict FROM alphas ORDER BY submitted_at DESC LIMIT 50
149
+ """).fetchall()
150
+ count = 0
151
+ for r in results:
152
+ if r[0] == "kill":
153
+ count += 1
154
+ else:
155
+ break
156
+ return count
157
+
158
+ def kill_theme(self, theme: str, last_sharpe: float, reason: str, cooldown_days: int = 180):
159
+ """Add a theme to the dead list with cooldown."""
160
+ self.conn.execute("""
161
+ INSERT INTO dead_themes (theme, universe, last_sharpe, reason, cooldown_until)
162
+ VALUES (?, 'TOP3000', ?, ?, CURRENT_TIMESTAMP + INTERVAL ? DAY)
163
+ """, [theme, last_sharpe, reason, cooldown_days])
164
+
165
+ def get_library_stats(self) -> dict:
166
+ """Summary statistics for the factor store."""
167
+ total = self.conn.execute("SELECT COUNT(*) FROM alphas").fetchone()[0]
168
+ promoted = self.conn.execute("SELECT COUNT(*) FROM alphas WHERE verdict = 'promote'").fetchone()[0]
169
+ killed = self.conn.execute("SELECT COUNT(*) FROM alphas WHERE verdict = 'kill'").fetchone()[0]
170
+ avg_sharpe = self.conn.execute("SELECT AVG(sharpe_os) FROM alphas WHERE sharpe_os IS NOT NULL").fetchone()[0]
171
+ return {
172
+ "total_alphas": total,
173
+ "promoted": promoted,
174
+ "killed": killed,
175
+ "pending": total - promoted - killed,
176
+ "avg_sharpe_os": round(avg_sharpe or 0, 3),
177
+ }
178
+
179
+ def close(self):
180
+ self.conn.close()