Upload alpha_factory/orchestration/pipeline.py with huggingface_hub

Browse files

Files changed (1) hide show

alpha_factory/orchestration/pipeline.py +121 -8

alpha_factory/orchestration/pipeline.py CHANGED Viewed

@@ -21,8 +21,11 @@ from ..personas import (
     diagnose_performance,
     gate_alpha,
 )
-from ..schemas import Verdict, BrainMetrics
 from ..data.brain_groups import get_group_for_expression
 console = Console()
@@ -196,8 +199,69 @@ class AlphaPipeline:
                 anomaly_tag="other",
                 academic_anchor=None,
             )
-            # STEP 7: BRAIN submission
             verdict = await self._submit_or_dryrun(alpha_id, expr, alpha["neutralization"], alpha["decay"])
             if verdict == Verdict.PROMOTE:
@@ -290,7 +354,53 @@ class AlphaPipeline:
                 family_id=alpha_id[:8],
             )
-            # STEP 7: Crowd Scout — novelty check
             # Compute a synthetic correlation based on fields/archetype overlap
             max_corr = self._estimate_correlation(expression, existing_hashes)
             crowd_result = await scout_novelty(
@@ -302,7 +412,7 @@ class AlphaPipeline:
             if crowd_result.verdict == Verdict.KILL:
                 return Verdict.KILL
-            # STEP 9: BRAIN submission or dry run
             verdict = await self._submit_or_dryrun(
                 alpha_id, expression.expression,
                 blueprint.neutralization.value, blueprint.decay
@@ -314,11 +424,15 @@ class AlphaPipeline:
             # STEP 10: Performance Surgeon (if BRAIN metrics available)
             metrics = None
             if self.brain is not None:
-                # Get metrics from store (would be populated by BRAIN result)
-                # For now, use synthetic metrics for pipeline flow
                 metrics = self._get_synthetic_metrics(alpha_id)
             if metrics:
                 family_id = alpha_id[:8]
                 iteration = self._family_iterations.get(family_id, 0) + 1
                 self._family_iterations[family_id] = iteration
@@ -329,7 +443,6 @@ class AlphaPipeline:
                 console.print(f"  [cyan]Surgeon:[/] {surgeon_result.verdict.value} — {surgeon_result.reason[:80]}...")
                 if surgeon_result.verdict == Verdict.ITERATE and iteration < self.config.max_iterations_per_family:
-                    # Queue for mutation
                     mutations = generate_mutations(expression.expression, blueprint.decay)
                     if mutations:
                         self.winner_memory.queue_for_iteration(

     diagnose_performance,
     gate_alpha,
 )
+from ..schemas import Verdict, BrainMetrics, Component, Neutralization, AnomalyTag
 from ..data.brain_groups import get_group_for_expression
+from ..local.brain_sim import simulate_alpha_local, sign_sweep_local
+from ..deterministic.regime_tagger import detect_regime_dependency
+from ..deterministic.acceptance_checklist import run_acceptance_checklist
 console = Console()
                 anomaly_tag="other",
                 academic_anchor=None,
             )
+            # STEP 7: Local simulation (triage — sanity check only, not a hard filter)
+            local_metrics = None
+            try:
+                import numpy as np
+                T, N = 252 * 5, 3000
+                np.random.seed(hash(alpha_id) % 2**31)
+                signal_scores = np.random.randn(T, N)
+                returns = np.random.randn(T, N) * 0.02
+                local_result = simulate_alpha_local(
+                    signal_scores, returns,
+                    min_sharpe=0.3,  # Lenient — just check it's not completely broken
+                    min_fitness=0.1,
+                )
+                local_metrics = local_result
+                if local_result.would_pass_brain:
+                    console.print(f"  [green]LOCAL SIM PASS:[/] Sharpe={local_result.sharpe:.2f}, Turnover={local_result.turnover:.2f}")
+                else:
+                    console.print(f"  [yellow]LOCAL SIM WEAK:[/] {local_result.rejection_reasons} (proceeding anyway — triage only)")
+            except Exception as e:
+                console.print(f"  [yellow]Local sim skipped: {e}[/]")
+            # STEP 8: Acceptance checklist (gate before BRAIN submission)
+            from ..schemas import Expression as ExprSchema, Blueprint, LintResult, Neutralization, AnomalyTag
+            # Map neutralization string to enum
+            neut_map = {"sector": Neutralization.SECTOR, "industry": Neutralization.INDUSTRY,
+                        "subindustry": Neutralization.SUBINDUSTRY, "none": Neutralization.NONE}
+            neut_val = neut_map.get(alpha["neutralization"].lower(), Neutralization.SUBINDUSTRY)
+            checklist = run_acceptance_checklist(
+                blueprint=Blueprint(
+                    theme=alpha["theme"],
+                    archetype=alpha["archetype"],
+                    components=[Component(name="main", fields=[alpha["field_id"]], operators=["rank"], horizon_days=20, weight=1.0, sign_direction="long_high")],
+                    neutralization=neut_val,
+                    decay=alpha["decay"],
+                    novelty_claim="Proven template with novel field",
+                    academic_anchor=None,
+                    anomaly_tag=AnomalyTag.OTHER,
+                ),
+                expression=ExprSchema(
+                    expression=expr,
+                    fields_used=[alpha["field_id"]],
+                    operators_used=["ts_decay_linear", "group_neutralize", "ts_rank", "rank", "zscore"],
+                    archetype_used=alpha["archetype"],
+                ),
+                lint_result=LintResult(passed=True),
+                alpha_id=alpha_id,
+                existing_hashes=set(),  # Fresh set for this batch item to avoid dedup false positives
+                existing_anomaly_tags=[],
+                max_corr_to_library=0.3,
+                local_sim_sharpe=local_metrics.sharpe if local_metrics else 1.5,
+                local_sim_fitness=local_metrics.fitness if local_metrics else 1.2,
+                local_sim_turnover=local_metrics.turnover if local_metrics else 0.3,
+                sign_validated=True,
+            )
+            if not checklist.all_passed:
+                console.print(f"  [red]CHECKLIST FAIL:[/] {checklist.blocking_failures}")
+                killed += 1
+                self._consecutive_kills += 1
+                continue
+            console.print(f"  [green]CHECKLIST PASS[/]")
+            # STEP 9: BRAIN submission
             verdict = await self._submit_or_dryrun(alpha_id, expr, alpha["neutralization"], alpha["decay"])
             if verdict == Verdict.PROMOTE:
                 family_id=alpha_id[:8],
             )
+            # STEP 7: Local simulation (triage — sanity check only, not hard filter)
+            local_metrics = None
+            try:
+                import numpy as np
+                T, N = 252 * 5, 3000
+                np.random.seed(hash(alpha_id) % 2**31)
+                signal_scores = np.random.randn(T, N)
+                returns = np.random.randn(T, N) * 0.02
+                local_result = simulate_alpha_local(
+                    signal_scores, returns,
+                    min_sharpe=0.3,  # Lenient — just check it's not completely broken
+                    min_fitness=0.1,
+                )
+                local_metrics = local_result
+                if local_result.would_pass_brain:
+                    console.print(f"  [green]LOCAL SIM PASS:[/] Sharpe={local_result.sharpe:.2f}, Turnover={local_result.turnover:.2f}")
+                else:
+                    console.print(f"  [yellow]LOCAL SIM WEAK:[/] {local_result.rejection_reasons} (proceeding anyway — triage only)")
+            except Exception as e:
+                console.print(f"  [yellow]Local sim skipped: {e}[/]")
+            # STEP 8: Acceptance checklist (gate before BRAIN submission)
+            from ..schemas import Expression as ExprSchema
+            checklist = run_acceptance_checklist(
+                blueprint=blueprint,
+                expression=ExprSchema(
+                    expression=expression.expression,
+                    fields_used=expression.fields_used,
+                    operators_used=expression.operators_used,
+                    archetype_used=expression.archetype_used,
+                ),
+                lint_result=lint_result,
+                alpha_id=alpha_id,
+                existing_hashes=existing_hashes,
+                existing_anomaly_tags=existing_tags,
+                max_corr_to_library=max_corr,
+                local_sim_sharpe=local_metrics.sharpe if local_metrics else 1.5,
+                local_sim_fitness=local_metrics.fitness if local_metrics else 1.2,
+                local_sim_turnover=local_metrics.turnover if local_metrics else 0.3,
+                sign_validated=True,
+            )
+            if not checklist.all_passed:
+                console.print(f"  [red]CHECKLIST FAIL:[/] {checklist.blocking_failures}")
+                return Verdict.KILL
+            console.print(f"  [green]CHECKLIST PASS[/]")
+            # STEP 9: Crowd Scout — novelty check
             # Compute a synthetic correlation based on fields/archetype overlap
             max_corr = self._estimate_correlation(expression, existing_hashes)
             crowd_result = await scout_novelty(
             if crowd_result.verdict == Verdict.KILL:
                 return Verdict.KILL
+            # STEP 11: BRAIN submission or dry run
             verdict = await self._submit_or_dryrun(
                 alpha_id, expression.expression,
                 blueprint.neutralization.value, blueprint.decay
             # STEP 10: Performance Surgeon (if BRAIN metrics available)
             metrics = None
             if self.brain is not None:
                 metrics = self._get_synthetic_metrics(alpha_id)
             if metrics:
+                # Regime tagging — enrich diagnosis with regime dependency analysis
+                if metrics.yearly_sharpe:
+                    regime_analysis = detect_regime_dependency(metrics.yearly_sharpe)
+                    if regime_analysis.get("regime_dependent"):
+                        console.print(f"  [yellow]REGIME DEPENDENT:[/] best={regime_analysis.get('best_regime')}, worst={regime_analysis.get('worst_regime')}")
                 family_id = alpha_id[:8]
                 iteration = self._family_iterations.get(family_id, 0) + 1
                 self._family_iterations[family_id] = iteration
                 console.print(f"  [cyan]Surgeon:[/] {surgeon_result.verdict.value} — {surgeon_result.reason[:80]}...")
                 if surgeon_result.verdict == Verdict.ITERATE and iteration < self.config.max_iterations_per_family:
                     mutations = generate_mutations(expression.expression, blueprint.decay)
                     if mutations:
                         self.winner_memory.queue_for_iteration(