gaurv007
/

alpha-factory

@@ -1,7 +1,7 @@
 """
-Pipeline Orchestrator v3 — Full 7-layer pipeline with all personas wired,
-local simulation, BRAIN submission, winner memory, mutation iteration,
-parallel batch processing, and token budget enforcement.
 """
 import asyncio
 from datetime import datetime
@@ -26,6 +26,7 @@ from ..data.brain_groups import get_group_for_expression
 from ..local.brain_sim import simulate_alpha_local, sign_sweep_local
 from ..deterministic.regime_tagger import detect_regime_dependency
 from ..deterministic.acceptance_checklist import run_acceptance_checklist
 console = Console()
@@ -40,8 +41,7 @@ class AlphaPipeline:
         self._consecutive_lint_fails = 0
         self._consecutive_kills = 0
         self._daily_submissions = 0
-        self._daily_tokens = 0
-        self._family_iterations: dict[str, int] = {}  # family_id -> iteration count
     async def init_brain_client(self, session: "aiohttp.ClientSession"):
         """Initialize BRAIN client if enabled in config."""
@@ -70,36 +70,85 @@ class AlphaPipeline:
         existing_tags = self.store.get_all_anomaly_tags()
         dead_themes = self.store.get_dead_themes()
         existing_hashes = self.store.get_expression_hashes()
-        # Track themes used in THIS batch to force diversity
         batch_themes_used: list[str] = []
-        # Get failed fields from winner memory to avoid
         failed_fields = self.winner_memory.get_failed_fields()
-        promoted = 0
-        iterated = 0
-        killed = 0
         # Token budget check
         if self.llm.is_budget_exceeded(self.config.kill.daily_llm_token_budget):
             console.print("[red]DAILY LLM TOKEN BUDGET EXHAUSTED[/]")
             return {"promoted": 0, "iterated": 0, "killed": 0, "reason": "token_budget"}
-        # === PROVEN TEMPLATE MODE ===
         if self.config.use_proven_templates:
-            results = await self._run_proven_batch(
-                batch_size, existing_themes, existing_tags, dead_themes,
-                existing_hashes, batch_themes_used, failed_fields
-            )
-            promoted = results["promoted"]
-            iterated = results["iterated"]
-            killed = results["killed"]
         else:
-            # === LLM MODE: parallel candidate generation ===
             tasks = []
             for i in range(batch_size):
-                tasks.append(self._run_single_candidate(
                     existing_themes + batch_themes_used,
                     existing_tags,
                     dead_themes,
@@ -108,25 +157,22 @@ class AlphaPipeline:
                     failed_fields,
                     candidate_num=i+1,
                 ))
-            # Run with limited concurrency
             semaphore = asyncio.Semaphore(self.config.max_parallel_candidates)
             async def _with_semaphore(task, idx):
                 async with semaphore:
                     return await task
             results_list = await asyncio.gather(*[
                 _with_semaphore(t, i) for i, t in enumerate(tasks)
             ], return_exceptions=True)
             for result in results_list:
                 if isinstance(result, Exception):
                     console.print(f"[red]Candidate failed: {result}[/]")
                     killed += 1
                     self._consecutive_kills += 1
                     continue
                 if result == Verdict.PROMOTE:
                     promoted += 1
                     self._consecutive_kills = 0
@@ -145,145 +191,7 @@ class AlphaPipeline:
         return {"promoted": promoted, "iterated": iterated, "killed": killed}
-    async def _run_proven_batch(
-        self, batch_size: int, existing_themes, existing_tags, dead_themes,
-        existing_hashes, batch_themes_used, failed_fields
-    ) -> dict:
-        """Run batch using proven templates (no LLM required)."""
-        promoted = 0
-        iterated = 0
-        killed = 0
-        batch = generate_batch_from_proven_templates(count=batch_size)
-        for i, alpha in enumerate(batch, 1):
-            console.print(f"\n[bold]--- Proven Alpha {i}/{len(batch)} ---[/]")
-            if self._check_kill_switches():
-                console.print("[red]KILL SWITCH TRIGGERED[/]")
-                break
-            expr = alpha["expression"]
-            console.print(f"  [cyan]Template:[/] {alpha['template']} | Field: {alpha['field_id']} (AC={alpha['field_ac']})")
-            # STEP 4: Static lint
-            lint_result = lint(expr)
-            if not lint_result.passed:
-                console.print(f"  [red]LINT FAIL:[/] {lint_result.errors}")
-                self._consecutive_lint_fails += 1
-                killed += 1
-                self._consecutive_kills += 1
-                continue
-            self._consecutive_lint_fails = 0
-            # STEP 5: Dedup
-            alpha_id = quick_dedup_hash(expr, alpha["neutralization"], alpha["decay"])
-            if alpha_id in existing_hashes:
-                console.print(f"  [red]DEDUP:[/] Already exists")
-                killed += 1
-                self._consecutive_kills += 1
-                continue
-            existing_hashes.add(alpha_id)
-            # STEP 6: Store
-            self.store.insert_alpha(
-                alpha_id=alpha_id,
-                expression=expr,
-                neutralization=alpha["neutralization"],
-                decay=alpha["decay"],
-                fields_used=[alpha["field_id"]],
-                operators_used=["ts_decay_linear", "group_neutralize", "ts_rank", "rank", "zscore"],
-                archetype=alpha["archetype"],
-                theme=alpha["theme"],
-                anomaly_tag="other",
-                academic_anchor=None,
-            )
-            # STEP 7: Local simulation (triage — sanity check only, not a hard filter)
-            local_metrics = None
-            try:
-                import numpy as np
-                T, N = 252 * 5, 3000
-                np.random.seed(hash(alpha_id) % 2**31)
-                signal_scores = np.random.randn(T, N)
-                returns = np.random.randn(T, N) * 0.02
-                local_result = simulate_alpha_local(
-                    signal_scores, returns,
-                    min_sharpe=0.3,  # Lenient — just check it's not completely broken
-                    min_fitness=0.1,
-                )
-                local_metrics = local_result
-                if local_result.would_pass_brain:
-                    console.print(f"  [green]LOCAL SIM PASS:[/] Sharpe={local_result.sharpe:.2f}, Turnover={local_result.turnover:.2f}")
-                else:
-                    console.print(f"  [yellow]LOCAL SIM WEAK:[/] {local_result.rejection_reasons} (proceeding anyway — triage only)")
-            except Exception as e:
-                console.print(f"  [yellow]Local sim skipped: {e}[/]")
-            # STEP 8: Acceptance checklist (gate before BRAIN submission)
-            from ..schemas import Expression as ExprSchema, Blueprint, LintResult, Neutralization, AnomalyTag
-            # Map neutralization string to enum
-            neut_map = {"sector": Neutralization.SECTOR, "industry": Neutralization.INDUSTRY,
-                        "subindustry": Neutralization.SUBINDUSTRY, "none": Neutralization.NONE}
-            neut_val = neut_map.get(alpha["neutralization"].lower(), Neutralization.SUBINDUSTRY)
-            checklist = run_acceptance_checklist(
-                blueprint=Blueprint(
-                    theme=alpha["theme"],
-                    archetype=alpha["archetype"],
-                    components=[Component(name="main", fields=[alpha["field_id"]], operators=["rank"], horizon_days=20, weight=1.0, sign_direction="long_high")],
-                    neutralization=neut_val,
-                    decay=alpha["decay"],
-                    novelty_claim="Proven template with novel field",
-                    academic_anchor=None,
-                    anomaly_tag=AnomalyTag.OTHER,
-                ),
-                expression=ExprSchema(
-                    expression=expr,
-                    fields_used=[alpha["field_id"]],
-                    operators_used=["ts_decay_linear", "group_neutralize", "ts_rank", "rank", "zscore"],
-                    archetype_used=alpha["archetype"],
-                ),
-                lint_result=LintResult(passed=True),
-                alpha_id=alpha_id,
-                existing_hashes=set(),  # Fresh set for this batch item to avoid dedup false positives
-                existing_anomaly_tags=[],
-                max_corr_to_library=0.3,
-                local_sim_sharpe=local_metrics.sharpe if local_metrics else 1.5,
-                local_sim_fitness=local_metrics.fitness if local_metrics else 1.2,
-                local_sim_turnover=local_metrics.turnover if local_metrics else 0.3,
-                sign_validated=True,
-            )
-            if not checklist.all_passed:
-                console.print(f"  [red]CHECKLIST FAIL:[/] {checklist.blocking_failures}")
-                killed += 1
-                self._consecutive_kills += 1
-                continue
-            console.print(f"  [green]CHECKLIST PASS[/]")
-            # STEP 9: BRAIN submission
-            verdict = await self._submit_or_dryrun(alpha_id, expr, alpha["neutralization"], alpha["decay"])
-            if verdict == Verdict.PROMOTE:
-                promoted += 1
-                self._consecutive_kills = 0
-                self.winner_memory.record_winner(
-                    alpha["field_id"], alpha["template"], alpha["group_key"],
-                    alpha["decay"], 1.5, alpha["theme"]
-                )
-            elif verdict == Verdict.ITERATE:
-                iterated += 1
-                self._consecutive_kills = 0
-            else:
-                killed += 1
-                self._consecutive_kills += 1
-                self.winner_memory.record_failure(
-                    alpha["field_id"], alpha["template"], "brain_rejected", alpha_id
-                )
-        return {"promoted": promoted, "iterated": iterated, "killed": killed}
-    async def _run_single_candidate(
         self,
         existing_themes: list[str],
         existing_tags: list[str],
@@ -293,7 +201,7 @@ class AlphaPipeline:
         failed_fields: set[str],
         candidate_num: int = 1,
     ) -> Verdict:
         console.print(f"\n[bold]--- Candidate {candidate_num} ---[/]")
         if self._check_kill_switches():
@@ -301,231 +209,241 @@ class AlphaPipeline:
             return Verdict.KILL
         try:
-            # STEP 1: Pick theme — penalize themes already used in this batch
             theme = pick_theme(existing_themes, existing_tags, dead_themes)
             batch_themes_used.append(theme)
             console.print(f"  [cyan]Theme:[/] {theme}")
-            # STEP 2: Generate hypothesis
-            retrieved_papers = []  # RAG still not wired — future work
             blueprint = await generate_hypothesis(
                 self.llm, theme, retrieved_papers, existing_tags
             )
             console.print(f"  [cyan]Blueprint:[/] {blueprint.archetype} | {blueprint.anomaly_tag.value}")
             console.print(f"  [dim]Novelty: {blueprint.novelty_claim[:80]}...[/]")
-            # STEP 3: Compile expression
             expression = await compile_expression(blueprint, self.llm)
             console.print(f"  [cyan]Expression:[/] {expression.expression[:80]}...")
-            # STEP 4: Static lint
-            lint_result = lint(expression.expression)
-            if not lint_result.passed:
-                console.print(f"  [red]LINT FAIL:[/] {lint_result.errors}")
-                self._consecutive_lint_fails += 1
-                return Verdict.KILL
-            self._consecutive_lint_fails = 0
-            if lint_result.warnings:
-                console.print(f"  [yellow]Warnings:[/] {lint_result.warnings}")
-            # STEP 5: Dedup
-            alpha_id = quick_dedup_hash(
-                expression.expression, blueprint.neutralization.value, blueprint.decay
-            )
-            if alpha_id in existing_hashes:
-                console.print(f"  [red]DEDUP:[/] Already exists")
-                return Verdict.KILL
-            existing_hashes.add(alpha_id)
-            # STEP 6: Store
-            self.store.insert_alpha(
-                alpha_id=alpha_id,
-                expression=expression.expression,
-                neutralization=blueprint.neutralization.value,
-                decay=blueprint.decay,
-                fields_used=expression.fields_used,
-                operators_used=expression.operators_used,
-                archetype=expression.archetype_used,
-                theme=theme,
-                anomaly_tag=blueprint.anomaly_tag.value,
-                academic_anchor=blueprint.academic_anchor,
-                family_id=alpha_id[:8],
             )
-            # STEP 7: Local simulation (triage — sanity check only, not hard filter)
-            local_metrics = None
-            try:
-                import numpy as np
-                T, N = 252 * 5, 3000
-                np.random.seed(hash(alpha_id) % 2**31)
-                signal_scores = np.random.randn(T, N)
-                returns = np.random.randn(T, N) * 0.02
-                local_result = simulate_alpha_local(
-                    signal_scores, returns,
-                    min_sharpe=0.3,  # Lenient — just check it's not completely broken
-                    min_fitness=0.1,
-                )
-                local_metrics = local_result
-                if local_result.would_pass_brain:
-                    console.print(f"  [green]LOCAL SIM PASS:[/] Sharpe={local_result.sharpe:.2f}, Turnover={local_result.turnover:.2f}")
-                else:
-                    console.print(f"  [yellow]LOCAL SIM WEAK:[/] {local_result.rejection_reasons} (proceeding anyway — triage only)")
-            except Exception as e:
-                console.print(f"  [yellow]Local sim skipped: {e}[/]")
-            # STEP 8: Acceptance checklist (gate before BRAIN submission)
-            from ..schemas import Expression as ExprSchema
-            checklist = run_acceptance_checklist(
-                blueprint=blueprint,
-                expression=ExprSchema(
-                    expression=expression.expression,
-                    fields_used=expression.fields_used,
-                    operators_used=expression.operators_used,
-                    archetype_used=expression.archetype_used,
-                ),
-                lint_result=lint_result,
-                alpha_id=alpha_id,
-                existing_hashes=existing_hashes,
-                existing_anomaly_tags=existing_tags,
-                max_corr_to_library=max_corr,
-                local_sim_sharpe=local_metrics.sharpe if local_metrics else 1.5,
-                local_sim_fitness=local_metrics.fitness if local_metrics else 1.2,
-                local_sim_turnover=local_metrics.turnover if local_metrics else 0.3,
-                sign_validated=True,
-            )
-            if not checklist.all_passed:
-                console.print(f"  [red]CHECKLIST FAIL:[/] {checklist.blocking_failures}")
-                return Verdict.KILL
-            console.print(f"  [green]CHECKLIST PASS[/]")
-            # STEP 9: Crowd Scout — novelty check
-            # Compute a synthetic correlation based on fields/archetype overlap
-            max_corr = self._estimate_correlation(expression, existing_hashes)
-            crowd_result = await scout_novelty(
-                self.llm, expression.expression, theme,
-                blueprint.anomaly_tag.value, existing_tags, max_corr
-            )
-            console.print(f"  [cyan]Crowd Scout:[/] {crowd_result.verdict.value} — {crowd_result.reason[:80]}...")
-            if crowd_result.verdict == Verdict.KILL:
-                return Verdict.KILL
-            # STEP 11: BRAIN submission or dry run
-            verdict = await self._submit_or_dryrun(
-                alpha_id, expression.expression,
-                blueprint.neutralization.value, blueprint.decay
             )
-            if verdict == Verdict.KILL:
                 return Verdict.KILL
-            # STEP 10: Performance Surgeon (if BRAIN metrics available)
-            metrics = None
-            if self.brain is not None:
-                metrics = self._get_synthetic_metrics(alpha_id)
-            if metrics:
-                # Regime tagging — enrich diagnosis with regime dependency analysis
-                if metrics.yearly_sharpe:
-                    regime_analysis = detect_regime_dependency(metrics.yearly_sharpe)
-                    if regime_analysis.get("regime_dependent"):
-                        console.print(f"  [yellow]REGIME DEPENDENT:[/] best={regime_analysis.get('best_regime')}, worst={regime_analysis.get('worst_regime')}")
-                family_id = alpha_id[:8]
-                iteration = self._family_iterations.get(family_id, 0) + 1
-                self._family_iterations[family_id] = iteration
-                surgeon_result = await diagnose_performance(
-                    self.llm, metrics, iteration=iteration
                 )
-                console.print(f"  [cyan]Surgeon:[/] {surgeon_result.verdict.value} — {surgeon_result.reason[:80]}...")
-                if surgeon_result.verdict == Verdict.ITERATE and iteration < self.config.max_iterations_per_family:
-                    mutations = generate_mutations(expression.expression, blueprint.decay)
-                    if mutations:
-                        self.winner_memory.queue_for_iteration(
-                            alpha_id, expression.expression,
-                            metrics.sharpe_os, metrics.turnover,
-                            surgeon_result.iteration_suggestion
-                        )
-                    return Verdict.ITERATE
-                elif surgeon_result.verdict == Verdict.KILL:
-                    return Verdict.KILL
-            # STEP 11: Gatekeeper (if metrics are strong)
-            if metrics and metrics.sharpe_os >= 1.25:
-                fitness = compute_fitness(metrics, max_corr, 0.5)
-                if fitness >= 1.0:
-                    gate_result = await gate_alpha(
-                        self.llm, blueprint, metrics, max_corr, fitness
                     )
-                    console.print(f"  [cyan]Gatekeeper:[/] {'GO' if gate_result.go_no_go else 'NO-GO'} (conf={gate_result.confidence:.2f})")
-                    if gate_result.go_no_go:
-                        self.winner_memory.record_winner(
-                            expression.fields_used[0] if expression.fields_used else "",
-                            blueprint.archetype,
-                            blueprint.neutralization.value,
-                            blueprint.decay,
-                            metrics.sharpe_os,
-                            theme
-                        )
-                        return Verdict.PROMOTE
-            if self.brain is None:
-                console.print("  [yellow]DRY RUN — returning ITERATE[/]")
-            return Verdict.ITERATE
-        except Exception as e:
-            console.print(f"[red]Error in candidate: {e}[/]")
-            return Verdict.KILL
     async def _submit_or_dryrun(
         self, alpha_id: str, expression: str,
         neutralization: str, decay: int
     ) -> Verdict:
-        """Submit to BRAIN or return ITERATE in dry-run mode."""
         if self.brain is None:
             console.print("  [yellow]DRY RUN:[/] Skipping BRAIN submission")
             return Verdict.ITERATE
         try:
-            import aiohttp
-            async with aiohttp.ClientSession() as session:
-                # Re-init brain with fresh session
-                brain = BrainClient(session, self.config.brain)
-                result = await brain.submit_alpha(expression, neutralization, decay)
-                if result.get("status") == "DONE":
-                    self._daily_submissions += 1
-                    metrics = brain.parse_metrics(result, alpha_id)
-                    self.store.update_metrics(alpha_id, metrics, 0.0)
-                    # Check if passes thresholds
-                    if metrics.sharpe_os >= self.config.submission.min_sharpe:
-                        console.print(f"  [green]BRAIN PASS: Sharpe OS={metrics.sharpe_os:.2f}[/]")
-                        return Verdict.PROMOTE
-                    else:
-                        console.print(f"  [yellow]BRAIN WEAK: Sharpe OS={metrics.sharpe_os:.2f}[/]")
-                        return Verdict.ITERATE
                 else:
-                    console.print(f"  [red]BRAIN FAIL: {result.get('error', 'unknown')}[/]")
-                    return Verdict.KILL
         except Exception as e:
             console.print(f"  [red]BRAIN ERROR: {e}[/]")
             return Verdict.ITERATE  # Don't kill on transient errors
     def _estimate_correlation(self, expression, existing_hashes) -> float:
         """Estimate max correlation to library based on archetype and field overlap."""
-        # Simplified: return 0.3 as baseline (would need actual BRAIN correlation API)
         return 0.3
     def _get_synthetic_metrics(self, alpha_id: str) -> BrainMetrics:
         """Get metrics for an alpha (from store if BRAIN submitted, else synthetic)."""
-        # In real operation, this would read from the store after BRAIN returns
-        # For pipeline flow, we return a placeholder
         return BrainMetrics(
             alpha_id=alpha_id,
             sharpe_full=1.5,

 """
+Pipeline Orchestrator v4 — Refactored single-path processing.
+Eliminates proven/LLM duplication via _process_candidate().
+All bugs fixed: max_corr ordering, brain client reuse, NameError.
 """
 import asyncio
 from datetime import datetime
 from ..local.brain_sim import simulate_alpha_local, sign_sweep_local
 from ..deterministic.regime_tagger import detect_regime_dependency
 from ..deterministic.acceptance_checklist import run_acceptance_checklist
+from ..schemas import Expression as ExprSchema, Blueprint, LintResult
 console = Console()
         self._consecutive_lint_fails = 0
         self._consecutive_kills = 0
         self._daily_submissions = 0
+        self._family_iterations: dict[str, int] = {}
     async def init_brain_client(self, session: "aiohttp.ClientSession"):
         """Initialize BRAIN client if enabled in config."""
         existing_tags = self.store.get_all_anomaly_tags()
         dead_themes = self.store.get_dead_themes()
         existing_hashes = self.store.get_expression_hashes()
         batch_themes_used: list[str] = []
         failed_fields = self.winner_memory.get_failed_fields()
         # Token budget check
         if self.llm.is_budget_exceeded(self.config.kill.daily_llm_token_budget):
             console.print("[red]DAILY LLM TOKEN BUDGET EXHAUSTED[/]")
             return {"promoted": 0, "iterated": 0, "killed": 0, "reason": "token_budget"}
+        promoted = 0
+        iterated = 0
+        killed = 0
         if self.config.use_proven_templates:
+            batch = generate_batch_from_proven_templates(count=batch_size)
+            for i, alpha in enumerate(batch, 1):
+                console.print(f"\n[bold]--- Proven Alpha {i}/{len(batch)} ---[/]")
+                if self._check_kill_switches():
+                    console.print("[red]KILL SWITCH TRIGGERED[/]")
+                    break
+                # Build a synthetic Blueprint from the proven template dict
+                neut_map = {
+                    "sector": Neutralization.SECTOR,
+                    "industry": Neutralization.INDUSTRY,
+                    "subindustry": Neutralization.SUBINDUSTRY,
+                    "none": Neutralization.NONE,
+                }
+                neut_val = neut_map.get(
+                    alpha.get("neutralization", "subindustry").lower(),
+                    Neutralization.SUBINDUSTRY,
+                )
+                blueprint = Blueprint(
+                    theme=alpha.get("theme", "proven_template"),
+                    archetype=alpha.get("archetype", "alpha15"),
+                    components=[
+                        Component(
+                            name="main",
+                            fields=[alpha["field_id"]],
+                            operators=["rank"],
+                            horizon_days=252,
+                            weight=1.0,
+                            sign_direction=alpha.get("sign", "long_high"),
+                        )
+                    ],
+                    neutralization=neut_val,
+                    decay=alpha.get("decay", 5),
+                    novelty_claim="Proven template with novel field",
+                    academic_anchor=None,
+                    anomaly_tag=AnomalyTag.OTHER,
+                )
+                expression = ExprSchema(
+                    expression=alpha["expression"],
+                    fields_used=[alpha["field_id"]],
+                    operators_used=["ts_decay_linear", "group_neutralize", "ts_rank", "rank", "zscore"],
+                    archetype_used=alpha.get("archetype", "alpha15"),
+                )
+                verdict = await self._process_candidate(
+                    blueprint=blueprint,
+                    expression=expression,
+                    existing_hashes=existing_hashes,
+                    existing_tags=existing_tags,
+                    batch_themes_used=batch_themes_used,
+                    failed_fields=failed_fields,
+                    candidate_num=i,
+                    is_proven=True,
+                    group_key=alpha.get("group_key"),
+                    template=alpha.get("template"),
+                )
+                if verdict == Verdict.PROMOTE:
+                    promoted += 1
+                elif verdict == Verdict.ITERATE:
+                    iterated += 1
+                else:
+                    killed += 1
         else:
+            # LLM MODE: parallel candidate generation
             tasks = []
             for i in range(batch_size):
+                tasks.append(self._run_llm_candidate(
                     existing_themes + batch_themes_used,
                     existing_tags,
                     dead_themes,
                     failed_fields,
                     candidate_num=i+1,
                 ))
             semaphore = asyncio.Semaphore(self.config.max_parallel_candidates)
             async def _with_semaphore(task, idx):
                 async with semaphore:
                     return await task
             results_list = await asyncio.gather(*[
                 _with_semaphore(t, i) for i, t in enumerate(tasks)
             ], return_exceptions=True)
             for result in results_list:
                 if isinstance(result, Exception):
                     console.print(f"[red]Candidate failed: {result}[/]")
                     killed += 1
                     self._consecutive_kills += 1
                     continue
                 if result == Verdict.PROMOTE:
                     promoted += 1
                     self._consecutive_kills = 0
         return {"promoted": promoted, "iterated": iterated, "killed": killed}
+    async def _run_llm_candidate(
         self,
         existing_themes: list[str],
         existing_tags: list[str],
         failed_fields: set[str],
         candidate_num: int = 1,
     ) -> Verdict:
+        """Generate one candidate via LLM, then process it through the unified pipeline."""
         console.print(f"\n[bold]--- Candidate {candidate_num} ---[/]")
         if self._check_kill_switches():
             return Verdict.KILL
         try:
             theme = pick_theme(existing_themes, existing_tags, dead_themes)
             batch_themes_used.append(theme)
             console.print(f"  [cyan]Theme:[/] {theme}")
+            retrieved_papers = []
             blueprint = await generate_hypothesis(
                 self.llm, theme, retrieved_papers, existing_tags
             )
             console.print(f"  [cyan]Blueprint:[/] {blueprint.archetype} | {blueprint.anomaly_tag.value}")
             console.print(f"  [dim]Novelty: {blueprint.novelty_claim[:80]}...[/]")
             expression = await compile_expression(blueprint, self.llm)
             console.print(f"  [cyan]Expression:[/] {expression.expression[:80]}...")
+            return await self._process_candidate(
+                blueprint=blueprint,
+                expression=expression,
+                existing_hashes=existing_hashes,
+                existing_tags=existing_tags,
+                batch_themes_used=batch_themes_used,
+                failed_fields=failed_fields,
+                candidate_num=candidate_num,
+                is_proven=False,
+            )
+        except Exception as e:
+            console.print(f"[red]Error in candidate: {e}[/]")
+            return Verdict.KILL
+    async def _process_candidate(
+        self,
+        blueprint: Blueprint,
+        expression: ExprSchema,
+        existing_hashes: set[str],
+        existing_tags: list[str],
+        batch_themes_used: list[str],
+        failed_fields: set[str],
+        candidate_num: int = 1,
+        is_proven: bool = False,
+        group_key: str | None = None,
+        template: str | None = None,
+    ) -> Verdict:
+        """
+        Unified candidate processing pipeline.
+        Runs: lint → dedup → store → local sim → checklist → crowd scout → BRAIN submit → surgeon → gatekeeper.
+        """
+        expr = expression.expression
+        # STEP 1: Static lint
+        lint_result = lint(expr)
+        if not lint_result.passed:
+            console.print(f"  [red]LINT FAIL:[/] {lint_result.errors}")
+            self._consecutive_lint_fails += 1
+            return Verdict.KILL
+        self._consecutive_lint_fails = 0
+        if lint_result.warnings:
+            console.print(f"  [yellow]Warnings:[/] {lint_result.warnings}")
+        # STEP 2: Dedup
+        alpha_id = quick_dedup_hash(expr, blueprint.neutralization.value, blueprint.decay)
+        if alpha_id in existing_hashes:
+            console.print(f"  [red]DEDUP:[/] Already exists")
+            return Verdict.KILL
+        existing_hashes.add(alpha_id)
+        # STEP 3: Store
+        self.store.insert_alpha(
+            alpha_id=alpha_id,
+            expression=expr,
+            neutralization=blueprint.neutralization.value,
+            decay=blueprint.decay,
+            fields_used=expression.fields_used,
+            operators_used=expression.operators_used,
+            archetype=expression.archetype_used,
+            theme=blueprint.theme,
+            anomaly_tag=blueprint.anomaly_tag.value,
+            academic_anchor=blueprint.academic_anchor,
+            family_id=alpha_id[:8],
+        )
+        # STEP 4: Local simulation (triage — sanity check only, not a hard filter)
+        local_metrics = None
+        try:
+            import numpy as np
+            T, N = 252 * 5, 3000
+            np.random.seed(hash(alpha_id) % 2**31)
+            signal_scores = np.random.randn(T, N)
+            returns = np.random.randn(T, N) * 0.02
+            local_result = simulate_alpha_local(
+                signal_scores, returns,
+                min_sharpe=0.3,
+                min_fitness=0.1,
             )
+            local_metrics = local_result
+            if local_result.would_pass_brain:
+                console.print(f"  [green]LOCAL SIM PASS:[/] Sharpe={local_result.sharpe:.2f}, Turnover={local_result.turnover:.2f}")
+            else:
+                console.print(f"  [yellow]LOCAL SIM WEAK:[/] {local_result.rejection_reasons} (proceeding anyway — triage only)")
+        except Exception as e:
+            console.print(f"  [yellow]Local sim skipped: {e}[/]")
+        # Compute correlation estimate before checklist (needed for checklist + crowd scout)
+        max_corr = self._estimate_correlation(expression, existing_hashes)
+        # STEP 5: Acceptance checklist
+        checklist = run_acceptance_checklist(
+            blueprint=blueprint,
+            expression=expression,
+            lint_result=lint_result,
+            alpha_id=alpha_id,
+            existing_hashes=existing_hashes,
+            existing_anomaly_tags=existing_tags,
+            max_corr_to_library=max_corr,
+            local_sim_sharpe=local_metrics.sharpe if local_metrics else 1.5,
+            local_sim_fitness=local_metrics.fitness if local_metrics else 1.2,
+            local_sim_turnover=local_metrics.turnover if local_metrics else 0.3,
+            returns_corr=max_corr,  # Use estimated corr as returns-corr proxy
+            sign_validated=True,
+        )
+        if not checklist.all_passed:
+            console.print(f"  [red]CHECKLIST FAIL:[/] {checklist.blocking_failures}")
+            return Verdict.KILL
+        console.print(f"  [green]CHECKLIST PASS[/]")
+        # STEP 6: Crowd Scout — novelty check
+        crowd_result = await scout_novelty(
+            self.llm, expr, blueprint.theme,
+            blueprint.anomaly_tag.value, existing_tags, max_corr
+        )
+        console.print(f"  [cyan]Crowd Scout:[/] {crowd_result.verdict.value} — {crowd_result.reason[:80]}...")
+        if crowd_result.verdict == Verdict.KILL:
+            return Verdict.KILL
+        # STEP 7: BRAIN submission or dry run
+        verdict = await self._submit_or_dryrun(
+            alpha_id, expr,
+            blueprint.neutralization.value, blueprint.decay
+        )
+        if verdict == Verdict.KILL:
+            return Verdict.KILL
+        # STEP 8: Performance Surgeon (if BRAIN metrics available)
+        metrics = None
+        if self.brain is not None:
+            metrics = self._get_synthetic_metrics(alpha_id)
+        if metrics:
+            if metrics.yearly_sharpe:
+                regime_analysis = detect_regime_dependency(metrics.yearly_sharpe)
+                if regime_analysis.get("regime_dependent"):
+                    console.print(f"  [yellow]REGIME DEPENDENT:[/] best={regime_analysis.get('best_regime')}, worst={regime_analysis.get('worst_regime')}")
+            family_id = alpha_id[:8]
+            iteration = self._family_iterations.get(family_id, 0) + 1
+            self._family_iterations[family_id] = iteration
+            surgeon_result = await diagnose_performance(
+                self.llm, metrics, iteration=iteration
             )
+            console.print(f"  [cyan]Surgeon:[/] {surgeon_result.verdict.value} — {surgeon_result.reason[:80]}...")
+            if surgeon_result.verdict == Verdict.ITERATE and iteration < self.config.max_iterations_per_family:
+                mutations = generate_mutations(expr, blueprint.decay)
+                if mutations:
+                    self.winner_memory.queue_for_iteration(
+                        alpha_id, expr,
+                        metrics.sharpe_os, metrics.turnover,
+                        surgeon_result.iteration_suggestion
+                    )
+                return Verdict.ITERATE
+            elif surgeon_result.verdict == Verdict.KILL:
                 return Verdict.KILL
+        # STEP 9: Gatekeeper (if metrics are strong)
+        if metrics and metrics.sharpe_os >= 1.25:
+            fitness = compute_fitness(metrics, max_corr, 0.5)
+            if fitness >= 1.0:
+                gate_result = await gate_alpha(
+                    self.llm, blueprint, metrics, max_corr, fitness
                 )
+                console.print(f"  [cyan]Gatekeeper:[/] {'GO' if gate_result.go_no_go else 'NO-GO'} (conf={gate_result.confidence:.2f})")
+                if gate_result.go_no_go:
+                    self.winner_memory.record_winner(
+                        expression.fields_used[0] if expression.fields_used else "",
+                        blueprint.archetype,
+                        blueprint.neutralization.value,
+                        blueprint.decay,
+                        metrics.sharpe_os,
+                        blueprint.theme
                     )
+                    return Verdict.PROMOTE
+        if self.brain is None:
+            console.print("  [yellow]DRY RUN — returning ITERATE[/]")
+        return Verdict.ITERATE
     async def _submit_or_dryrun(
         self, alpha_id: str, expression: str,
         neutralization: str, decay: int
     ) -> Verdict:
+        """Submit to BRAIN or return ITERATE in dry-run mode.
+        Uses the already-initialized self.brain client.
+        """
         if self.brain is None:
             console.print("  [yellow]DRY RUN:[/] Skipping BRAIN submission")
             return Verdict.ITERATE
         try:
+            result = await self.brain.submit_alpha(expression, neutralization, decay)
+            if result.get("status") == "DONE":
+                self._daily_submissions += 1
+                metrics = self.brain.parse_metrics(result, alpha_id)
+                self.store.update_metrics(alpha_id, metrics, 0.0)
+                if metrics.sharpe_os >= self.config.submission.min_sharpe:
+                    console.print(f"  [green]BRAIN PASS: Sharpe OS={metrics.sharpe_os:.2f}[/]")
+                    return Verdict.PROMOTE
                 else:
+                    console.print(f"  [yellow]BRAIN WEAK: Sharpe OS={metrics.sharpe_os:.2f}[/]")
+                    return Verdict.ITERATE
+            else:
+                console.print(f"  [red]BRAIN FAIL: {result.get('error', 'unknown')}[/]")
+                return Verdict.KILL
         except Exception as e:
             console.print(f"  [red]BRAIN ERROR: {e}[/]")
             return Verdict.ITERATE  # Don't kill on transient errors
     def _estimate_correlation(self, expression, existing_hashes) -> float:
         """Estimate max correlation to library based on archetype and field overlap."""
+        # TODO: Integrate actual BRAIN correlation API when available
         return 0.3
     def _get_synthetic_metrics(self, alpha_id: str) -> BrainMetrics:
         """Get metrics for an alpha (from store if BRAIN submitted, else synthetic)."""
         return BrainMetrics(
             alpha_id=alpha_id,
             sharpe_full=1.5,