File size: 6,706 Bytes

80a4e8f

"""
routing.py — SLM-native LLM call router with cost homeostasis.

Routes tasks to the smallest capable model. Local-first by default.
Enforces cost, latency, and token budgets as hard constraints.

Complexity classification:
  simple    → single SLM call (summarize, answer simple Q)
  moderate  → sequential chain (plan → execute)
  complex   → parallel specialists (research + code + review)
  critical  → specialists + critic ensemble + optional HITL

Router decisions are logged and reproducible.
"""
from __future__ import annotations

import logging
import time
from dataclasses import dataclass, field
from enum import Enum
from typing import Any

from purpose_agent.llm_backend import LLMBackend

logger = logging.getLogger(__name__)


class TaskComplexity(str, Enum):
    SIMPLE = "simple"
    MODERATE = "moderate"
    COMPLEX = "complex"
    CRITICAL = "critical"


@dataclass
class RoutingPolicy:
    """Policy governing model selection and cost control."""
    prefer_local: bool = True
    max_cost_per_task_usd: float = 0.10
    max_latency_per_call_s: float = 30.0
    max_tokens_per_task: int = 10000
    allow_cloud_fallback: bool = True
    fallback_model: str = ""
    local_model: str = "ollama:qwen3:1.7b"
    cloud_model: str = "openrouter:meta-llama/llama-3.3-70b-instruct"


@dataclass
class ModelOption:
    """A model available for routing."""
    spec: str                       # e.g. "ollama:qwen3:1.7b"
    is_local: bool = True
    cost_per_1k_tokens: float = 0.0  # $0 for local
    avg_latency_s: float = 1.0
    max_context: int = 32768
    capabilities: list[str] = field(default_factory=list)  # ["code","reasoning","general"]


@dataclass
class RoutingDecision:
    """Recorded decision from the router."""
    task_summary: str
    complexity: TaskComplexity
    selected_model: str
    reason: str
    timestamp: float = field(default_factory=time.time)
    estimated_cost: float = 0.0


# Keyword-based complexity heuristics
_COMPLEX_KEYWORDS = {"research", "analyze", "compare", "design", "architect", "security", "audit"}
_CRITICAL_KEYWORDS = {"deploy", "production", "delete", "admin", "payment", "credential", "secret"}
_SIMPLE_KEYWORDS = {"summarize", "translate", "hello", "what is", "define", "explain"}


class TaskComplexityClassifier:
    """Classifies task complexity from the purpose description."""

    def classify(self, purpose: str) -> TaskComplexity:
        words = set(purpose.lower().split())

        if words & _CRITICAL_KEYWORDS:
            return TaskComplexity.CRITICAL
        if words & _COMPLEX_KEYWORDS:
            return TaskComplexity.COMPLEX
        if words & _SIMPLE_KEYWORDS:
            return TaskComplexity.SIMPLE
        # Default: moderate for anything with multiple sentences or code-related
        if len(purpose) > 100 or "code" in purpose.lower() or "function" in purpose.lower():
            return TaskComplexity.MODERATE
        return TaskComplexity.SIMPLE


class ModelSelector:
    """
    Selects the best model for a task given complexity and policy.
    
    Rules:
      1. Local-first (if policy.prefer_local and local model available)
      2. Smallest capable model (don't use 70B for "say hello")
      3. Respect cost/latency budgets
      4. Fallback to cloud only when policy allows and local fails
    """

    def __init__(self, models: list[ModelOption] | None = None, policy: RoutingPolicy | None = None):
        self.models = models or []
        self.policy = policy or RoutingPolicy()

    def select(self, complexity: TaskComplexity) -> str:
        """Select the best model spec for given complexity."""
        # Filter by policy
        candidates = list(self.models)

        if self.policy.prefer_local:
            local = [m for m in candidates if m.is_local]
            if local:
                candidates = local

        # For simple tasks, prefer smallest/cheapest
        if complexity == TaskComplexity.SIMPLE:
            candidates.sort(key=lambda m: m.cost_per_1k_tokens)
            if candidates:
                return candidates[0].spec

        # For complex/critical, prefer most capable
        if complexity in (TaskComplexity.COMPLEX, TaskComplexity.CRITICAL):
            # Prefer cloud models with more capability
            if self.policy.allow_cloud_fallback:
                return self.policy.cloud_model
            capable = [m for m in candidates if "reasoning" in m.capabilities or "code" in m.capabilities]
            if capable:
                return capable[0].spec

        # Default: local model
        return self.policy.local_model


class LLMCallRouter:
    """
    Main router: classifies task → selects model → logs decision.
    
    Usage:
        router = LLMCallRouter(policy=RoutingPolicy(prefer_local=True))
        model_spec = router.route("Write a fibonacci function")
        # → "ollama:qwen3:1.7b" (local, code task, moderate complexity)
        
        model_spec = router.route("Audit production deployment for security vulnerabilities")
        # → cloud model (critical task, needs strong reasoning)
    """

    def __init__(self, policy: RoutingPolicy | None = None, models: list[ModelOption] | None = None):
        self.policy = policy or RoutingPolicy()
        self.classifier = TaskComplexityClassifier()
        self.selector = ModelSelector(models or [], self.policy)
        self._decisions: list[RoutingDecision] = []
        self._total_cost = 0.0

    def route(self, task: str) -> str:
        """Route a task to the best model. Returns model spec string."""
        complexity = self.classifier.classify(task)
        selected = self.selector.select(complexity)

        # Budget check
        if self._total_cost >= self.policy.max_cost_per_task_usd:
            # Over budget: force local
            selected = self.policy.local_model
            reason = "budget_exceeded: forced local"
        else:
            reason = f"complexity={complexity.value}"

        decision = RoutingDecision(
            task_summary=task[:80],
            complexity=complexity,
            selected_model=selected,
            reason=reason,
        )
        self._decisions.append(decision)
        logger.info(f"Router: {complexity.value} → {selected} ({reason})")
        return selected

    def record_cost(self, cost_usd: float) -> None:
        """Record cost of a completed call for budget tracking."""
        self._total_cost += cost_usd

    @property
    def total_cost(self) -> float:
        return self._total_cost

    @property
    def decisions(self) -> list[RoutingDecision]:
        return self._decisions

    def reset_budget(self) -> None:
        self._total_cost = 0.0