Spaces:

TheLinconX
/

contextforge-demo

Sleeping

File size: 10,930 Bytes

"""Pipeline orchestrator v3.0 - wired to ContextForge registry."""
import asyncio
import logging
import time
from typing import Any, Optional

from agents.demo_agents import create_agents

from apohara_context_forge.dedup.faiss_index import FAISSContextIndex
from apohara_context_forge.dedup.lsh_engine import LSHTokenMatcher
from apohara_context_forge.metrics.vram_monitor import VRAMMonitor
from apohara_context_forge.pipeline_config import PipelineConfig
from apohara_context_forge.registry.context_registry import ContextRegistry
from apohara_context_forge.registry.vram_aware_cache import VRAMAwareCache

logger = logging.getLogger(__name__)


class Pipeline:
    """
    Orchestrates 5-agent pipeline with ContextForge v3.0 registry.

    Uses LSHTokenMatcher + FAISSContextIndex + VRAMAwareCache for:
    - Token-level SimHash deduplication (LSH)
    - O(log n) ANN semantic search (FAISS)
    - VRAM-pressure-responsive eviction (VRAMAwareCache)

    Usage:
        config = PipelineConfig(model_id="Qwen/Qwen3-235B-A22B")
        pipeline = Pipeline(config=config)
        await pipeline.start()
        result = await pipeline.run("What is machine learning?")
        await pipeline.stop()
    """

    def __init__(
        self,
        config: Optional[PipelineConfig] = None,
        enable_contextforge: bool = True,
    ):
        self._config = config or PipelineConfig()
        self._config.validate()
        self.enable_contextforge = enable_contextforge

        # Create ContextForge registry with dependency injection
        self._registry: Optional[ContextRegistry] = None
        self._vram_monitor: Optional[VRAMMonitor] = None

        # Create demo agents
        self.agents = create_agents()

        # Metrics collection
        self.metrics = {
            "total_tokens_before": 0,
            "total_tokens_after": 0,
            "agent_ttft_ms": [],
            "strategies_used": {},
            "cache_hits": 0,
            "cache_misses": 0,
            "lsh_matches": 0,
        }

    async def start(self) -> None:
        """Start ContextForge registry and VRAM monitor."""
        if not self.enable_contextforge:
            return

        # Initialize VRAM monitor
        self._vram_monitor = VRAMMonitor()
        await self._vram_monitor.start()

        # Initialize registry with wired components
        self._registry = ContextRegistry(
            lsh_matcher=LSHTokenMatcher(
                block_size=self._config.block_size,
                hamming_threshold=self._config.hamming_threshold,
            ),
            vram_cache=VRAMAwareCache(
                max_token_budget=self._config.vram_budget_tokens,
            ),
            faiss_index=FAISSContextIndex(dim=self._config.faiss_dim),
            vram_budget_tokens=self._config.vram_budget_tokens,
            block_size=self._config.block_size,
            hamming_threshold=self._config.hamming_threshold,
            faiss_nlist=self._config.faiss_nlist,
        )
        await self._registry.start()

        logger.info(f"Pipeline started with ContextForge v3.0 (model={self._config.model_id})")

    async def stop(self) -> None:
        """Stop ContextForge registry and VRAM monitor."""
        if self._registry:
            await self._registry.stop()
            self._registry = None
        if self._vram_monitor:
            await self._vram_monitor.stop()
            self._vram_monitor = None
        logger.info("Pipeline stopped")

    async def run(self, query: str) -> dict[str, Any]:
        """Run the full pipeline for a query."""
        logger.info(f"Starting pipeline for query: {query[:50]}...")

        input_data = {"query": query}
        pipeline_output = {}
        start_time = time.time()

        for i, agent in enumerate(self.agents):
            agent_start = time.time()

            # Build context for this agent
            if self.enable_contextforge and self._registry:
                shared_context = self._build_shared_context(input_data, agent)

                # Register with ContextForge
                try:
                    # Get shared system prompt from first agent or use default
                    system_prompt = self._get_system_prompt()
                    role_prompt = self._build_role_prompt(agent)

                    await self._registry.register_agent(
                        agent.agent_id,
                        system_prompt,
                        role_prompt,
                    )

                    # Query for shared context across agents
                    all_agents = await self._registry.get_all_agents()
                    if len(all_agents) >= 2:
                        shared_results = await self._registry.get_shared_context(
                            all_agents,
                            target_agent_id=agent.agent_id,
                        )
                        if shared_results:
                            self.metrics["lsh_matches"] += 1
                            self.metrics["cache_hits"] += 1
                        else:
                            self.metrics["cache_misses"] += 1
                except Exception as e:
                    logger.warning(f"ContextForge error for {agent.agent_id}: {e}")

            # Process agent
            result = await agent.process(input_data)
            agent_duration = (time.time() - agent_start) * 1000

            pipeline_output[f"{agent.agent_id}_output"] = result["result"]
            pipeline_output[f"{agent.agent_id}_metrics"] = {
                "ttft_ms": agent_duration,
                "strategy": result["strategy"],
                "tokens_before": result["tokens_before"],
                "tokens_after": result["tokens_after"],
            }

            self.metrics["total_tokens_before"] += result["tokens_before"]
            self.metrics["total_tokens_after"] += result["tokens_after"]
            self.metrics["agent_ttft_ms"].append(agent_duration)
            self.metrics["strategies_used"][result["strategy"]] = \
                self.metrics["strategies_used"].get(result["strategy"], 0) + 1

            input_data[f"{agent.agent_id}_output"] = result["result"]

        total_duration = (time.time() - start_time) * 1000

        return {
            "query": query,
            "final_output": pipeline_output.get("responder_output", ""),
            "pipeline_duration_ms": total_duration,
            "agent_metrics": pipeline_output,
            "summary": {
                "total_tokens_before": self.metrics["total_tokens_before"],
                "total_tokens_after": self.metrics["total_tokens_after"],
                "avg_ttft_ms": sum(self.metrics["agent_ttft_ms"]) / len(self.metrics["agent_ttft_ms"]),
                "strategies": self.metrics["strategies_used"],
                "token_savings_pct": (
                    (self.metrics["total_tokens_before"] - self.metrics["total_tokens_after"])
                    / self.metrics["total_tokens_before"] * 100
                    if self.metrics["total_tokens_before"] > 0 else 0
                ),
                "cache_hits": self.metrics["cache_hits"],
                "cache_misses": self.metrics["cache_misses"],
                "lsh_matches": self.metrics["lsh_matches"],
            },
            "contextforge": {
                "vram_pressure": self._vram_monitor.get_pressure() if self._vram_monitor else 0.0,
                "eviction_mode": self._registry.get_vram_mode() if self._registry else "unknown",
                "registry_size": self._registry.registry_size if self._registry else 0,
            } if self.enable_contextforge else None,
        }

    def _build_shared_context(self, input_data: dict, agent) -> str:
        """Build the shared context string for an agent."""
        prev_output = input_data.get(f"{agent.agent_id}_output", "")
        return f"Query: {input_data.get('query', '')}\nPrevious: {prev_output}\nRole: {agent.role}"

    def _get_system_prompt(self) -> str:
        """Get the canonical system prompt (shared across all agents)."""
        return (
            "You are a helpful AI assistant. "
            "Provide accurate, detailed, and thoughtful responses. "
            "Use chain-of-thought reasoning when appropriate."
        )

    def _build_role_prompt(self, agent) -> str:
        """Build agent-specific role prompt."""
        return f"You are a {agent.role}. {agent.agent_id}"

    @property
    def registry(self) -> Optional[ContextRegistry]:
        """Direct access to ContextRegistry (for advanced queries)."""
        return self._registry


async def run_pipeline_dry():
    """Dry run - prints agent plan without execution."""
    agents = create_agents()
    print("\n=== ContextForge v3.0 Pipeline - Dry Run ===")
    print(f"Total agents: {len(agents)}\n")
    for i, agent in enumerate(agents, 1):
        print(f"{i}. {agent.agent_id.upper()} ({agent.role})")
    print("\nPipeline flow:")
    print("  Query -> Retriever -> Reranker -> Summarizer -> Critic -> Responder")
    print("\nContextForge v3.0 wiring:")
    print("  - LSHTokenMatcher: SimHash on Qwen3 token IDs")
    print("  - FAISSContextIndex: O(log n) ANN search")
    print("  - VRAMAwareCache: 5-mode VRAM-pressure eviction")
    print("\nEach agent will:")
    print("  1. Register context with ContextForge (LSH + VRAM cache)")
    print("  2. Query shared context via FAISS ANN + LSH validation")
    print("  3. Return result with metrics\n")


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="ContextForge v3.0 Pipeline")
    parser.add_argument("--dry-run", action="store_true", help="Print plan without running")
    parser.add_argument("--query", default="What is machine learning?", help="Query to process")
    parser.add_argument(
        "--no-contextforge",
        action="store_true",
        help="Disable ContextForge (use raw pipeline)",
    )
    args = parser.parse_args()

    if args.dry_run:
        asyncio.run(run_pipeline_dry())
    else:
        config = PipelineConfig()
        pipeline = Pipeline(config=config, enable_contextforge=not args.no_contextforge)

        async def main():
            await pipeline.start()
            result = await pipeline.run(args.query)
            await pipeline.stop()
            return result

        result = asyncio.run(main())
        print(f"\n=== Pipeline Result ===")
        print(f"Token savings: {result['summary']['token_savings_pct']:.1f}%")
        print(f"Avg TTFT: {result['summary']['avg_ttft_ms']:.1f}ms")
        print(f"Strategies: {result['summary']['strategies']}")
        if result.get("contextforge"):
            print(f"VRAM pressure: {result['contextforge']['vram_pressure']:.2%}")
            print(f"Eviction mode: {result['contextforge']['eviction_mode']}")
            print(f"Registry size: {result['contextforge']['registry_size']}")