Upload alpha_factory/orchestration/pipeline.py with huggingface_hub
Browse files
alpha_factory/orchestration/pipeline.py
CHANGED
|
@@ -21,8 +21,11 @@ from ..personas import (
|
|
| 21 |
diagnose_performance,
|
| 22 |
gate_alpha,
|
| 23 |
)
|
| 24 |
-
from ..schemas import Verdict, BrainMetrics
|
| 25 |
from ..data.brain_groups import get_group_for_expression
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
console = Console()
|
| 28 |
|
|
@@ -196,8 +199,69 @@ class AlphaPipeline:
|
|
| 196 |
anomaly_tag="other",
|
| 197 |
academic_anchor=None,
|
| 198 |
)
|
| 199 |
-
|
| 200 |
-
# STEP 7:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
verdict = await self._submit_or_dryrun(alpha_id, expr, alpha["neutralization"], alpha["decay"])
|
| 202 |
|
| 203 |
if verdict == Verdict.PROMOTE:
|
|
@@ -290,7 +354,53 @@ class AlphaPipeline:
|
|
| 290 |
family_id=alpha_id[:8],
|
| 291 |
)
|
| 292 |
|
| 293 |
-
# STEP 7:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
# Compute a synthetic correlation based on fields/archetype overlap
|
| 295 |
max_corr = self._estimate_correlation(expression, existing_hashes)
|
| 296 |
crowd_result = await scout_novelty(
|
|
@@ -302,7 +412,7 @@ class AlphaPipeline:
|
|
| 302 |
if crowd_result.verdict == Verdict.KILL:
|
| 303 |
return Verdict.KILL
|
| 304 |
|
| 305 |
-
# STEP
|
| 306 |
verdict = await self._submit_or_dryrun(
|
| 307 |
alpha_id, expression.expression,
|
| 308 |
blueprint.neutralization.value, blueprint.decay
|
|
@@ -314,11 +424,15 @@ class AlphaPipeline:
|
|
| 314 |
# STEP 10: Performance Surgeon (if BRAIN metrics available)
|
| 315 |
metrics = None
|
| 316 |
if self.brain is not None:
|
| 317 |
-
# Get metrics from store (would be populated by BRAIN result)
|
| 318 |
-
# For now, use synthetic metrics for pipeline flow
|
| 319 |
metrics = self._get_synthetic_metrics(alpha_id)
|
| 320 |
|
| 321 |
if metrics:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
family_id = alpha_id[:8]
|
| 323 |
iteration = self._family_iterations.get(family_id, 0) + 1
|
| 324 |
self._family_iterations[family_id] = iteration
|
|
@@ -329,7 +443,6 @@ class AlphaPipeline:
|
|
| 329 |
console.print(f" [cyan]Surgeon:[/] {surgeon_result.verdict.value} — {surgeon_result.reason[:80]}...")
|
| 330 |
|
| 331 |
if surgeon_result.verdict == Verdict.ITERATE and iteration < self.config.max_iterations_per_family:
|
| 332 |
-
# Queue for mutation
|
| 333 |
mutations = generate_mutations(expression.expression, blueprint.decay)
|
| 334 |
if mutations:
|
| 335 |
self.winner_memory.queue_for_iteration(
|
|
|
|
| 21 |
diagnose_performance,
|
| 22 |
gate_alpha,
|
| 23 |
)
|
| 24 |
+
from ..schemas import Verdict, BrainMetrics, Component, Neutralization, AnomalyTag
|
| 25 |
from ..data.brain_groups import get_group_for_expression
|
| 26 |
+
from ..local.brain_sim import simulate_alpha_local, sign_sweep_local
|
| 27 |
+
from ..deterministic.regime_tagger import detect_regime_dependency
|
| 28 |
+
from ..deterministic.acceptance_checklist import run_acceptance_checklist
|
| 29 |
|
| 30 |
console = Console()
|
| 31 |
|
|
|
|
| 199 |
anomaly_tag="other",
|
| 200 |
academic_anchor=None,
|
| 201 |
)
|
| 202 |
+
|
| 203 |
+
# STEP 7: Local simulation (triage — sanity check only, not a hard filter)
|
| 204 |
+
local_metrics = None
|
| 205 |
+
try:
|
| 206 |
+
import numpy as np
|
| 207 |
+
T, N = 252 * 5, 3000
|
| 208 |
+
np.random.seed(hash(alpha_id) % 2**31)
|
| 209 |
+
signal_scores = np.random.randn(T, N)
|
| 210 |
+
returns = np.random.randn(T, N) * 0.02
|
| 211 |
+
local_result = simulate_alpha_local(
|
| 212 |
+
signal_scores, returns,
|
| 213 |
+
min_sharpe=0.3, # Lenient — just check it's not completely broken
|
| 214 |
+
min_fitness=0.1,
|
| 215 |
+
)
|
| 216 |
+
local_metrics = local_result
|
| 217 |
+
if local_result.would_pass_brain:
|
| 218 |
+
console.print(f" [green]LOCAL SIM PASS:[/] Sharpe={local_result.sharpe:.2f}, Turnover={local_result.turnover:.2f}")
|
| 219 |
+
else:
|
| 220 |
+
console.print(f" [yellow]LOCAL SIM WEAK:[/] {local_result.rejection_reasons} (proceeding anyway — triage only)")
|
| 221 |
+
except Exception as e:
|
| 222 |
+
console.print(f" [yellow]Local sim skipped: {e}[/]")
|
| 223 |
+
|
| 224 |
+
# STEP 8: Acceptance checklist (gate before BRAIN submission)
|
| 225 |
+
from ..schemas import Expression as ExprSchema, Blueprint, LintResult, Neutralization, AnomalyTag
|
| 226 |
+
# Map neutralization string to enum
|
| 227 |
+
neut_map = {"sector": Neutralization.SECTOR, "industry": Neutralization.INDUSTRY,
|
| 228 |
+
"subindustry": Neutralization.SUBINDUSTRY, "none": Neutralization.NONE}
|
| 229 |
+
neut_val = neut_map.get(alpha["neutralization"].lower(), Neutralization.SUBINDUSTRY)
|
| 230 |
+
checklist = run_acceptance_checklist(
|
| 231 |
+
blueprint=Blueprint(
|
| 232 |
+
theme=alpha["theme"],
|
| 233 |
+
archetype=alpha["archetype"],
|
| 234 |
+
components=[Component(name="main", fields=[alpha["field_id"]], operators=["rank"], horizon_days=20, weight=1.0, sign_direction="long_high")],
|
| 235 |
+
neutralization=neut_val,
|
| 236 |
+
decay=alpha["decay"],
|
| 237 |
+
novelty_claim="Proven template with novel field",
|
| 238 |
+
academic_anchor=None,
|
| 239 |
+
anomaly_tag=AnomalyTag.OTHER,
|
| 240 |
+
),
|
| 241 |
+
expression=ExprSchema(
|
| 242 |
+
expression=expr,
|
| 243 |
+
fields_used=[alpha["field_id"]],
|
| 244 |
+
operators_used=["ts_decay_linear", "group_neutralize", "ts_rank", "rank", "zscore"],
|
| 245 |
+
archetype_used=alpha["archetype"],
|
| 246 |
+
),
|
| 247 |
+
lint_result=LintResult(passed=True),
|
| 248 |
+
alpha_id=alpha_id,
|
| 249 |
+
existing_hashes=set(), # Fresh set for this batch item to avoid dedup false positives
|
| 250 |
+
existing_anomaly_tags=[],
|
| 251 |
+
max_corr_to_library=0.3,
|
| 252 |
+
local_sim_sharpe=local_metrics.sharpe if local_metrics else 1.5,
|
| 253 |
+
local_sim_fitness=local_metrics.fitness if local_metrics else 1.2,
|
| 254 |
+
local_sim_turnover=local_metrics.turnover if local_metrics else 0.3,
|
| 255 |
+
sign_validated=True,
|
| 256 |
+
)
|
| 257 |
+
if not checklist.all_passed:
|
| 258 |
+
console.print(f" [red]CHECKLIST FAIL:[/] {checklist.blocking_failures}")
|
| 259 |
+
killed += 1
|
| 260 |
+
self._consecutive_kills += 1
|
| 261 |
+
continue
|
| 262 |
+
console.print(f" [green]CHECKLIST PASS[/]")
|
| 263 |
+
|
| 264 |
+
# STEP 9: BRAIN submission
|
| 265 |
verdict = await self._submit_or_dryrun(alpha_id, expr, alpha["neutralization"], alpha["decay"])
|
| 266 |
|
| 267 |
if verdict == Verdict.PROMOTE:
|
|
|
|
| 354 |
family_id=alpha_id[:8],
|
| 355 |
)
|
| 356 |
|
| 357 |
+
# STEP 7: Local simulation (triage — sanity check only, not hard filter)
|
| 358 |
+
local_metrics = None
|
| 359 |
+
try:
|
| 360 |
+
import numpy as np
|
| 361 |
+
T, N = 252 * 5, 3000
|
| 362 |
+
np.random.seed(hash(alpha_id) % 2**31)
|
| 363 |
+
signal_scores = np.random.randn(T, N)
|
| 364 |
+
returns = np.random.randn(T, N) * 0.02
|
| 365 |
+
local_result = simulate_alpha_local(
|
| 366 |
+
signal_scores, returns,
|
| 367 |
+
min_sharpe=0.3, # Lenient — just check it's not completely broken
|
| 368 |
+
min_fitness=0.1,
|
| 369 |
+
)
|
| 370 |
+
local_metrics = local_result
|
| 371 |
+
if local_result.would_pass_brain:
|
| 372 |
+
console.print(f" [green]LOCAL SIM PASS:[/] Sharpe={local_result.sharpe:.2f}, Turnover={local_result.turnover:.2f}")
|
| 373 |
+
else:
|
| 374 |
+
console.print(f" [yellow]LOCAL SIM WEAK:[/] {local_result.rejection_reasons} (proceeding anyway — triage only)")
|
| 375 |
+
except Exception as e:
|
| 376 |
+
console.print(f" [yellow]Local sim skipped: {e}[/]")
|
| 377 |
+
|
| 378 |
+
# STEP 8: Acceptance checklist (gate before BRAIN submission)
|
| 379 |
+
from ..schemas import Expression as ExprSchema
|
| 380 |
+
checklist = run_acceptance_checklist(
|
| 381 |
+
blueprint=blueprint,
|
| 382 |
+
expression=ExprSchema(
|
| 383 |
+
expression=expression.expression,
|
| 384 |
+
fields_used=expression.fields_used,
|
| 385 |
+
operators_used=expression.operators_used,
|
| 386 |
+
archetype_used=expression.archetype_used,
|
| 387 |
+
),
|
| 388 |
+
lint_result=lint_result,
|
| 389 |
+
alpha_id=alpha_id,
|
| 390 |
+
existing_hashes=existing_hashes,
|
| 391 |
+
existing_anomaly_tags=existing_tags,
|
| 392 |
+
max_corr_to_library=max_corr,
|
| 393 |
+
local_sim_sharpe=local_metrics.sharpe if local_metrics else 1.5,
|
| 394 |
+
local_sim_fitness=local_metrics.fitness if local_metrics else 1.2,
|
| 395 |
+
local_sim_turnover=local_metrics.turnover if local_metrics else 0.3,
|
| 396 |
+
sign_validated=True,
|
| 397 |
+
)
|
| 398 |
+
if not checklist.all_passed:
|
| 399 |
+
console.print(f" [red]CHECKLIST FAIL:[/] {checklist.blocking_failures}")
|
| 400 |
+
return Verdict.KILL
|
| 401 |
+
console.print(f" [green]CHECKLIST PASS[/]")
|
| 402 |
+
|
| 403 |
+
# STEP 9: Crowd Scout — novelty check
|
| 404 |
# Compute a synthetic correlation based on fields/archetype overlap
|
| 405 |
max_corr = self._estimate_correlation(expression, existing_hashes)
|
| 406 |
crowd_result = await scout_novelty(
|
|
|
|
| 412 |
if crowd_result.verdict == Verdict.KILL:
|
| 413 |
return Verdict.KILL
|
| 414 |
|
| 415 |
+
# STEP 11: BRAIN submission or dry run
|
| 416 |
verdict = await self._submit_or_dryrun(
|
| 417 |
alpha_id, expression.expression,
|
| 418 |
blueprint.neutralization.value, blueprint.decay
|
|
|
|
| 424 |
# STEP 10: Performance Surgeon (if BRAIN metrics available)
|
| 425 |
metrics = None
|
| 426 |
if self.brain is not None:
|
|
|
|
|
|
|
| 427 |
metrics = self._get_synthetic_metrics(alpha_id)
|
| 428 |
|
| 429 |
if metrics:
|
| 430 |
+
# Regime tagging — enrich diagnosis with regime dependency analysis
|
| 431 |
+
if metrics.yearly_sharpe:
|
| 432 |
+
regime_analysis = detect_regime_dependency(metrics.yearly_sharpe)
|
| 433 |
+
if regime_analysis.get("regime_dependent"):
|
| 434 |
+
console.print(f" [yellow]REGIME DEPENDENT:[/] best={regime_analysis.get('best_regime')}, worst={regime_analysis.get('worst_regime')}")
|
| 435 |
+
|
| 436 |
family_id = alpha_id[:8]
|
| 437 |
iteration = self._family_iterations.get(family_id, 0) + 1
|
| 438 |
self._family_iterations[family_id] = iteration
|
|
|
|
| 443 |
console.print(f" [cyan]Surgeon:[/] {surgeon_result.verdict.value} — {surgeon_result.reason[:80]}...")
|
| 444 |
|
| 445 |
if surgeon_result.verdict == Verdict.ITERATE and iteration < self.config.max_iterations_per_family:
|
|
|
|
| 446 |
mutations = generate_mutations(expression.expression, blueprint.decay)
|
| 447 |
if mutations:
|
| 448 |
self.winner_memory.queue_for_iteration(
|