Upload alpha_factory/orchestration/pipeline.py with huggingface_hub

Browse files

Files changed (1) hide show

alpha_factory/orchestration/pipeline.py +188 -0

alpha_factory/orchestration/pipeline.py ADDED Viewed

	@@ -0,0 +1,188 @@

+"""
+Pipeline Orchestrator — the DAG that connects all agents.
+Runs the full alpha generation → evaluation → promotion loop.
+"""
+import asyncio
+from datetime import datetime
+from rich.console import Console
+from rich.panel import Panel
+from ..config import Config, load_config
+from ..infra import LLMClient, FactorStore, BrainClient
+from ..deterministic import lint, quick_dedup_hash, pick_theme, compute_fitness
+from ..personas import (
+    generate_hypothesis,
+    compile_expression,
+    scout_novelty,
+    diagnose_performance,
+    gate_alpha,
+)
+from ..schemas import Verdict
+console = Console()
+class AlphaPipeline:
+    """
+    The full alpha generation pipeline.
+    One run = one batch of N candidate alphas through all stages.
+    """
+    def __init__(self, config: Config):
+        self.config = config
+        self.llm = LLMClient(config.llm)
+        self.store = FactorStore(config.paths.factor_store / "alphas.duckdb")
+        self.brain: BrainClient | None = None  # initialized in run()
+        # Counters for kill switches
+        self._consecutive_lint_fails = 0
+        self._consecutive_kills = 0
+        self._daily_submissions = 0
+    async def run_batch(self, batch_size: int | None = None):
+        """Run one batch of alpha generation + evaluation."""
+        batch_size = batch_size or self.config.batch_size
+        console.print(Panel(
+            f"[bold green]Alpha Factory[/] — Batch of {batch_size} candidates\n"
+            f"[dim]{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}[/]",
+            title="🏭 Pipeline Start"
+        ))
+        # Load current library state
+        existing_themes = self.store.get_all_themes()
+        existing_tags = self.store.get_all_anomaly_tags()
+        dead_themes = self.store.get_dead_themes()
+        existing_hashes = self.store.get_expression_hashes()
+        promoted = 0
+        iterated = 0
+        killed = 0
+        for i in range(batch_size):
+            console.print(f"\n[bold]━━━ Candidate {i+1}/{batch_size} ━━━[/]")
+            # ─── Kill switch checks ──────────────────────────────
+            if self._check_kill_switches():
+                console.print("[red]⚠️ KILL SWITCH TRIGGERED — stopping batch[/]")
+                break
+            try:
+                result = await self._run_single_candidate(
+                    existing_themes, existing_tags, dead_themes, existing_hashes
+                )
+                if result == Verdict.PROMOTE:
+                    promoted += 1
+                elif result == Verdict.ITERATE:
+                    iterated += 1
+                else:
+                    killed += 1
+                    self._consecutive_kills += 1
+                # Reset kill counter on non-kill
+                if result != Verdict.KILL:
+                    self._consecutive_kills = 0
+            except Exception as e:
+                console.print(f"[red]Error: {e}[/]")
+                killed += 1
+        # Summary
+        console.print(Panel(
+            f"[green]Promoted:[/] {promoted}  [yellow]Iterate:[/] {iterated}  [red]Killed:[/] {killed}\n"
+            f"Tokens used: {self.llm.tokens_used:,}  |  BRAIN submissions: {self._daily_submissions}",
+            title="📊 Batch Complete"
+        ))
+        return {"promoted": promoted, "iterated": iterated, "killed": killed}
+    async def _run_single_candidate(
+        self,
+        existing_themes: list[str],
+        existing_tags: list[str],
+        dead_themes: list[str],
+        existing_hashes: set[str],
+    ) -> Verdict:
+        """Run a single candidate through the full pipeline."""
+        # ─── STEP 1: Pick theme (deterministic) ─────────────────
+        theme = pick_theme(existing_themes, existing_tags, dead_themes)
+        console.print(f"  [cyan]Theme:[/] {theme}")
+        # ─── STEP 2: Generate hypothesis (Microfish LLM) ────────
+        # TODO: RAG retrieval from ChromaDB would go here
+        retrieved_papers = []  # Placeholder — implement with ChromaDB
+        blueprint = await generate_hypothesis(
+            self.llm, theme, retrieved_papers, existing_tags
+        )
+        console.print(f"  [cyan]Blueprint:[/] {blueprint.archetype} | {blueprint.anomaly_tag.value}")
+        console.print(f"  [dim]Novelty: {blueprint.novelty_claim[:80]}...[/]")
+        # ─── STEP 3: Compile expression (Jinja/Tinyfish) ────────
+        expression = await compile_expression(blueprint, self.llm)
+        console.print(f"  [cyan]Expression:[/] {expression.expression[:80]}...")
+        # ─── STEP 4: Static lint (deterministic) ────────────────
+        lint_result = lint(expression.expression)
+        if not lint_result.passed:
+            console.print(f"  [red]LINT FAIL:[/] {lint_result.errors}")
+            self._consecutive_lint_fails += 1
+            return Verdict.KILL
+        self._consecutive_lint_fails = 0
+        if lint_result.warnings:
+            console.print(f"  [yellow]Warnings:[/] {lint_result.warnings}")
+        # ─── STEP 5: Dedup check (deterministic) ────────────────
+        alpha_id = quick_dedup_hash(
+            expression.expression, blueprint.neutralization.value, blueprint.decay
+        )
+        if alpha_id in existing_hashes:
+            console.print(f"  [red]DEDUP:[/] Already exists in factor store")
+            return Verdict.KILL
+        # ─── STEP 6: Store candidate ────────────────────────────
+        self.store.insert_alpha(
+            alpha_id=alpha_id,
+            expression=expression.expression,
+            neutralization=blueprint.neutralization.value,
+            decay=blueprint.decay,
+            fields_used=expression.fields_used,
+            operators_used=expression.operators_used,
+            archetype=expression.archetype_used,
+            theme=theme,
+            anomaly_tag=blueprint.anomaly_tag.value,
+            academic_anchor=blueprint.academic_anchor,
+        )
+        # ─── STEP 7: BRAIN submission (if client available) ─────
+        # In dry-run mode, skip BRAIN and use mock metrics
+        if self.brain is None:
+            console.print("  [yellow]DRY RUN:[/] Skipping BRAIN submission (no client configured)")
+            console.print(f"  [green]✓ Candidate {alpha_id} passed all pre-submission checks[/]")
+            return Verdict.ITERATE  # Can't promote without real metrics
+        # TODO: Submit to BRAIN, poll results, harvest metrics
+        # metrics = await self._submit_and_harvest(alpha_id, expression, blueprint)
+        # ... crowd scout, surgeon, gatekeeper follow
+        return Verdict.ITERATE
+    def _check_kill_switches(self) -> bool:
+        """Check if any kill switch has triggered."""
+        if self._consecutive_lint_fails >= self.config.kill.consecutive_lint_fail_max:
+            console.print("[red]Kill switch: too many consecutive lint failures[/]")
+            return True
+        if self._consecutive_kills >= self.config.kill.consecutive_kill_verdict_max:
+            console.print("[red]Kill switch: too many consecutive kill verdicts[/]")
+            return True
+        if self._daily_submissions >= self.config.kill.daily_brain_submissions_max:
+            console.print("[red]Kill switch: daily submission limit reached[/]")
+            return True
+        return False
+    def close(self):
+        """Clean up resources."""
+        self.store.close()