narcolepticchicken
/

agent-cost-optimizer

Safetensors

Model card Files Files and versions

xet

Community

narcolepticchicken commited on about 16 hours ago

Commit

e6100b5

verified ·

1 Parent(s): 1b0e9a1

Upload aco/router.py with huggingface_hub

Browse files

Files changed (1) hide show

aco/router.py +132 -231

aco/router.py CHANGED Viewed

@@ -1,245 +1,146 @@
-"""Model Cascade Router - Module 3.
-Routes agent requests through a cascade of models:
-tiny local → cheap small cloud → medium → frontier → specialist.
-Supports:
-A. always frontier
-B. static routing
-C. prompt-only router
-D. trained cost-aware router
-E. trained router + verifier fallback
-"""
-import random
-from typing import Dict, List, Optional, Tuple
 from dataclasses import dataclass
-from .trace_schema import TaskType, Outcome
-from .config import ACOConfig, ModelConfig
-from .classifier import TaskPrediction
 @dataclass
 class RoutingDecision:
     model_id: str
-    provider: str
     tier: int
     confidence: float
     reasoning: str
-    fallback_model_id: Optional[str] = None
-    use_verifier: bool = False
-    max_tokens: int = 4096
-    temperature: float = 0.7
 class ModelCascadeRouter:
-    """Routes tasks to the cheapest acceptable model."""
-    TIER_ORDER = [1, 2, 3, 4, 5]  # tiny → cheap → medium → frontier → specialist
-    def __init__(self, config: ACOConfig):
-        self.config = config
-        self.models_by_tier: Dict[int, List[ModelConfig]] = {t: [] for t in self.TIER_ORDER}
-        self._build_tier_index()
-        self.routing_stats: Dict[str, Dict] = {}
-        self.decision_history: List[Dict] = []
-    def _build_tier_index(self):
-        for name, mc in self.config.models.items():
-            self.models_by_tier.setdefault(mc.strength_tier, []).append(mc)
-    def route(self, task_prediction: TaskPrediction, routing_mode: str = "cascade") -> RoutingDecision:
-        """Select model based on task prediction and routing policy."""
-        if routing_mode == "always_frontier":
-            return self._route_always_frontier(task_prediction)
-        elif routing_mode == "static":
-            return self._route_static(task_prediction)
-        elif routing_mode == "prompt_only":
-            return self._route_prompt_only(task_prediction)
-        elif routing_mode == "learned":
-            return self._route_learned(task_prediction)
-        elif routing_mode == "learned_verifier":
-            return self._route_learned(task_prediction, verifier_fallback=True)
-        else:
-            return self._route_cascade(task_prediction)
-    def _route_always_frontier(self, prediction: TaskPrediction) -> RoutingDecision:
-        frontier = self.models_by_tier.get(4, [])
-        if not frontier:
-            frontier = self.models_by_tier.get(5, [])
-        if not frontier:
-            frontier = self.models_by_tier.get(3, [])
-        model = frontier[0] if frontier else list(self.config.models.values())[0]
-        return RoutingDecision(
-            model_id=model.model_id,
-            provider=model.provider,
-            tier=4,
-            confidence=1.0,
-            reasoning="Always frontier policy",
-            max_tokens=min(prediction.expected_cost * 50000, model.max_context),
-        )
-    def _route_static(self, prediction: TaskPrediction) -> RoutingDecision:
-        # Static mapping: task type -> tier
-        static_map = {
-            TaskType.QUICK_ANSWER: 1,
-            TaskType.UNKNOWN_AMBIGUOUS: 2,
-            TaskType.TOOL_HEAVY: 2,
-            TaskType.RETRIEVAL_HEAVY: 2,
-            TaskType.DOCUMENT_DRAFTING: 3,
-            TaskType.CODING: 3,
-            TaskType.RESEARCH: 4,
-            TaskType.LONG_HORIZON: 4,
-            TaskType.LEGAL_REGULATED: 5,
         }
-        tier = static_map.get(prediction.task_type, 3)
-        models = self.models_by_tier.get(tier, self.models_by_tier[3])
-        model = models[0] if models else list(self.config.models.values())[0]
-        return RoutingDecision(
-            model_id=model.model_id,
-            provider=model.provider,
-            tier=tier,
-            confidence=0.6,
-            reasoning=f"Static routing: {prediction.task_type.value} -> tier {tier}",
-            fallback_model_id=self._next_tier_model(tier).model_id if tier < 5 else None,
-        )
-    def _route_prompt_only(self, prediction: TaskPrediction) -> RoutingDecision:
-        """Use prompt heuristics to decide model tier."""
-        # Heuristic: if risk > 0.7 or expected tier >= 4, use frontier
-        if prediction.risk_of_failure > 0.7 or prediction.expected_model_tier >= 4:
-            tier = 4
-        elif prediction.expected_model_tier <= 2:
-            tier = max(prediction.expected_model_tier, 1)
-        else:
-            # Start cheap, escalate on low confidence
-            tier = max(prediction.expected_model_tier - 1, 1)
-        models = self.models_by_tier.get(tier, self.models_by_tier[3])
-        model = models[0] if models else list(self.config.models.values())[0]
-        fallback = None
-        if tier < 5 and prediction.risk_of_failure > 0.5:
-            fallback = self._next_tier_model(tier)
-            fallback_id = fallback.model_id if fallback else None
-        else:
-            fallback_id = None
         return RoutingDecision(
-            model_id=model.model_id,
-            provider=model.provider,
             tier=tier,
-            confidence=1.0 - prediction.risk_of_failure,
-            reasoning=f"Prompt heuristic: risk={prediction.risk_of_failure:.2f}, expected_tier={prediction.expected_model_tier}",
-            fallback_model_id=fallback_id,
-            use_verifier=prediction.verifier_required,
-        )
-    def _route_learned(self, prediction: TaskPrediction, verifier_fallback: bool = False) -> RoutingDecision:
-        """Learned router with cost-quality tradeoff.
-        In a full implementation, this would load a trained classifier.
-        Here we use a heuristic calibrated from routing_stats.
-        """
-        # Check historical success rate per tier for this task type
-        task_key = prediction.task_type.value
-        best_tier = None
-        best_score = -float("inf")
-        for tier in self.TIER_ORDER:
-            stats = self.routing_stats.get(f"{task_key}_tier_{tier}", {})
-            success_rate = stats.get("success_rate", 0.5)
-            avg_cost = stats.get("avg_cost", 0.01 * tier)
-            # Score = success_weight * success_rate - cost_weight * cost
-            score = 10 * success_rate - 100 * avg_cost
-            # Penalize tiers below expected if risk is high
-            if tier < prediction.expected_model_tier and prediction.risk_of_failure > 0.5:
-                score -= 5
-            if score > best_score:
-                best_score = score
-                best_tier = tier
-        # Default to expected tier if no history
-        if best_tier is None:
-            best_tier = prediction.expected_model_tier
-        models = self.models_by_tier.get(best_tier, self.models_by_tier[3])
-        model = models[0] if models else list(self.config.models.values())[0]
-        # Verifier fallback on uncertain predictions
-        use_verifier = verifier_fallback and prediction.risk_of_failure > 0.5
-        return RoutingDecision(
-            model_id=model.model_id,
-            provider=model.provider,
-            tier=best_tier,
-            confidence=min(best_score / 10 + 0.5, 1.0),
-            reasoning=f"Learned router: tier {best_tier} scored {best_score:.3f} for {task_key}",
-            fallback_model_id=self._next_tier_model(best_tier).model_id if best_tier < 5 else None,
-            use_verifier=use_verifier,
-        )
-    def _route_cascade(self, prediction: TaskPrediction) -> RoutingDecision:
-        """FrugalGPT-style cascade: try cheap first, escalate on low confidence."""
-        start_tier = max(1, prediction.expected_model_tier - 2)
-        # Don't start below tier 2 for risky tasks
-        if prediction.risk_of_failure > 0.6:
-            start_tier = max(start_tier, 2)
-        models = self.models_by_tier.get(start_tier, [])
-        if not models:
-            models = self.models_by_tier.get(1, [])
-        if not models:
-            models = list(self.config.models.values())
-        model = models[0]
-        # Determine if we should pre-escalate (for critical tasks)
-        pre_escalate = prediction.task_type == TaskType.LEGAL_REGULATED
-        fallback = None
-        if not pre_escalate and start_tier < prediction.expected_model_tier:
-            fallback = self._next_tier_model(start_tier)
-        return RoutingDecision(
-            model_id=model.model_id,
-            provider=model.provider,
-            tier=start_tier,
-            confidence=1.0 - prediction.risk_of_failure,
-            reasoning=f"Cascade start at tier {start_tier}, expected tier {prediction.expected_model_tier}, risk={prediction.risk_of_failure:.2f}",
-            fallback_model_id=fallback.model_id if fallback else None,
-            use_verifier=prediction.verifier_required,
         )
-    def _next_tier_model(self, current_tier: int) -> Optional[ModelConfig]:
-        for tier in range(current_tier + 1, 6):
-            models = self.models_by_tier.get(tier)
-            if models:
-                return models[0]
-        return None
-    def update_stats(self, task_type: TaskType, tier: int, cost: float, success: bool) -> None:
-        key = f"{task_type.value}_tier_{tier}"
-        stats = self.routing_stats.setdefault(key, {"count": 0, "successes": 0, "total_cost": 0.0})
-        stats["count"] += 1
-        if success:
-            stats["successes"] += 1
-        stats["total_cost"] += cost
-        stats["success_rate"] = stats["successes"] / stats["count"]
-        stats["avg_cost"] = stats["total_cost"] / stats["count"]
-    def should_escalate(self, decision: RoutingDecision, step_outcome: Outcome, confidence: float) -> bool:
-        """Decide whether to escalate to a stronger model after a step."""
-        if decision.tier >= 5:
-            return False
-        if step_outcome == Outcome.FAILURE and confidence < 0.5:
-            return True
-        if step_outcome == Outcome.PARTIAL_SUCCESS and decision.tier < 4:
-            return True
-        return False

+"""Model Cascade Router: Dynamic difficulty + ML confirmation + safety floors."""
+import numpy as np
+import pickle, os, json
+from typing import Dict, Tuple, Optional
 from dataclasses import dataclass
 @dataclass
 class RoutingDecision:
     model_id: str
     tier: int
     confidence: float
     reasoning: str
+    cost_estimate: float
+    dynamic_difficulty: int
+    escalated: bool = False
+    downgraded: bool = False
+CODE_KW = ["python","javascript","code","function","bug","debug","refactor","implement","test",
+           "compile","runtime","segfault","thread","async","class","module"]
+LEGAL_KW = ["contract","legal","compliance","gdpr","privacy","policy","regulatory","liability","indemnification","clause"]
+RESEARCH_KW = ["research","find sources","literature","investigate","compare","analyze","survey","paper","arxiv"]
+TOOL_KW = ["search","fetch","retrieve","query","api","database","scrape","aggregate"]
+LONG_KW = ["plan","project","roadmap","orchestrate","multi-step","migrate","pipeline","deploy","architecture"]
+MATH_KW = ["calculate","compute","solve","equation","formula","optimize","probability","integral"]
+CRITICAL_KW = ["critical","production","urgent","now","emergency","live","deployed","safety","security"]
+SIMPLE_KW = ["typo","simple","quick","brief","briefly","just","minor","small","easy","trivial","clarification"]
+TT2IDX = {"quick_answer":0,"coding":1,"research":2,"document_drafting":3,
+           "legal_regulated":4,"tool_heavy":5,"retrieval_heavy":6,"long_horizon":7,"unknown_ambiguous":8}
+TIER_MODELS = {
+    1: {"model_id": "tiny-local-3b", "provider": "local", "cost_per_1k": 0.0},
+    2: {"model_id": "cheap-cloud-8b", "provider": "cloud", "cost_per_1k": 0.05},
+    3: {"model_id": "medium-70b", "provider": "cloud", "cost_per_1k": 0.30},
+    4: {"model_id": "frontier-latest", "provider": "cloud", "cost_per_1k": 1.00},
+    5: {"model_id": "specialist-expert", "provider": "cloud", "cost_per_1k": 1.50},
+}
 class ModelCascadeRouter:
+    def __init__(self, model_path: str = None, safety_threshold: float = 0.30,
+                 downgrade_threshold: float = 0.90,
+                 task_floor: Dict[str,int] = None,
+                 tier_costs: Dict[int,float] = None):
+        self.safety_threshold = safety_threshold
+        self.downgrade_threshold = downgrade_threshold
+        self.task_floor = task_floor or {
+            "legal_regulated":4,"long_horizon":3,"research":3,"coding":3,
+            "unknown_ambiguous":3,"quick_answer":1,"document_drafting":2,
+            "tool_heavy":2,"retrieval_heavy":2,
         }
+        self.tier_costs = tier_costs or {1:0.05,2:0.15,3:0.75,4:1.0,5:1.5}
+        self.tier_clfs = None
+        self.tier_calibs = None
+        self.feat_keys = None
+        self._load_model(model_path)
+    def _load_model(self, model_path: str = None):
+        if model_path and os.path.exists(model_path):
+            try:
+                bundle = pickle.load(open(model_path, "rb"))
+                self.tier_clfs = {int(k):v for k,v in bundle.get("tier_clfs",{}).items()}
+                self.tier_calibs = {int(k):v for k,v in bundle.get("tier_calibrators",{}).items()}
+                self.feat_keys = bundle.get("feat_keys", None)
+            except Exception as e:
+                print(f"[ACO] Warning: Could not load router model: {e}")
+    def estimate_difficulty(self, request: str, task_type: str) -> int:
+        r = request.lower()
+        base = {"quick_answer":1,"document_drafting":2,"tool_heavy":2,"retrieval_heavy":2,
+                "research":3,"coding":3,"unknown_ambiguous":3,"long_horizon":4,"legal_regulated":5}.get(task_type,3)
+        if any(k in r for k in CRITICAL_KW): base = min(base + 1, 5)
+        if any(k in r for k in SIMPLE_KW): base = max(base - 1, 1)
+        return base
+    def _extract_features(self, request: str, task_type: str, difficulty: int) -> np.ndarray:
+        r = request.lower()
+        feats = {
+            "req_len": len(request), "num_words": len(request.split()),
+            "has_code": int(any(k in r for k in CODE_KW)),
+            "n_code": sum(1 for k in CODE_KW if k in r),
+            "has_legal": int(any(k in r for k in LEGAL_KW)),
+            "n_legal": sum(1 for k in LEGAL_KW if k in r),
+            "has_research": int(any(k in r for k in RESEARCH_KW)),
+            "n_research": sum(1 for k in RESEARCH_KW if k in r),
+            "has_tool": int(any(k in r for k in TOOL_KW)),
+            "n_tool": sum(1 for k in TOOL_KW if k in r),
+            "has_long": int(any(k in r for k in LONG_KW)),
+            "has_math": int(any(k in r for k in MATH_KW)),
+            "tt_idx": TT2IDX.get(task_type, 8),
+            "difficulty": difficulty,
+        }
+        for tt in TT2IDX:
+            feats[f"tt_{tt}"] = int(task_type == tt)
+        if self.feat_keys:
+            return np.array([float(feats.get(k, 0.0)) for k in self.feat_keys], dtype=np.float32).reshape(1, -1)
+        return np.zeros((1, 23), dtype=np.float32)
+    def _get_psuccess(self, x: np.ndarray, tier: int) -> float:
+        if self.tier_clfs and tier in self.tier_clfs and self.tier_calibs and tier in self.tier_calibs:
+            try:
+                p_raw = self.tier_clfs[tier].predict_proba(x)[0, 1]
+                return float(self.tier_calibs[tier].transform([p_raw])[0])
+            except: pass
+        # Fallback heuristic probability
+        strengths = {1:0.35,2:0.55,3:0.80,4:0.93,5:0.97}
+        diff_feat = float(x[0, self.feat_keys.index("difficulty")]) if self.feat_keys and "difficulty" in self.feat_keys else 3
+        return strengths.get(tier, 0.80) ** (diff_feat * 0.6)
+    def route(self, request: str, task_type: str, difficulty: int = None,
+              prediction: dict = None) -> RoutingDecision:
+        if difficulty is None:
+            difficulty = self.estimate_difficulty(request, task_type)
+        base = min(difficulty + 1, 5)
+        floor = self.task_floor.get(task_type, 2)
+        base = max(base, floor)
+        x = self._extract_features(request, task_type, difficulty)
+        tier = base
+        ps = self._get_psuccess(x, tier)
+        escalated = False
+        downgraded = False
+        # Safety net
+        if ps < self.safety_threshold and tier < 5:
+            tier += 1
+            ps = self._get_psuccess(x, tier)
+            escalated = True
+        # Cost saver
+        if tier > floor and not escalated and tier == base:
+            cheaper = tier - 1
+            pc = self._get_psuccess(x, cheaper)
+            if pc >= self.downgrade_threshold and cheaper >= floor:
+                tier = cheaper
+                ps = pc
+                downgraded = True
+        model_info = TIER_MODELS.get(tier, TIER_MODELS[4])
+        reasoning_parts = [f"base_tier={base}"]
+        if escalated: reasoning_parts.append(f"escalated(P(success@{base})<{self.safety_threshold})")
+        if downgraded: reasoning_parts.append(f"downgraded(P(success@{cheaper})>={self.downgrade_threshold})")
         return RoutingDecision(
+            model_id=model_info["model_id"],
             tier=tier,
+            confidence=ps,
+            reasoning="; ".join(reasoning_parts),
+            cost_estimate=self.tier_costs.get(tier, 1.0),
+            dynamic_difficulty=difficulty,
+            escalated=escalated,
+            downgraded=downgraded,
         )