Spaces:
Runtime error
Runtime error
Ashira Pitchayapakayakul
feat: migrate $HOME/.claude/* to $HOME/.surrogate/* (clean separation from Claude Code)
e36381e | """OpenRouter client β free-first then paid tiers. | |
| Tiers (per Ashira 2026-04-19): | |
| FREE: qwen, gpt-oss, llama, nemotron, glm | |
| CHEAP: deepseek-v3.2, grok-4.1-fast | |
| PREMIUM: gpt-5.4, claude-haiku-4.5, claude-sonnet-4.6, claude-opus-4.7 | |
| Per-model cooldown tracked in ~/.surrogate/yolo/or-cooldowns.json to avoid | |
| hammering rate-limited free models. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import time | |
| import urllib.error | |
| import urllib.request | |
| from dataclasses import dataclass, field | |
| from pathlib import Path | |
| from typing import Optional | |
| OR_URL = "https://openrouter.ai/api/v1/chat/completions" | |
| COOLDOWN_PATH = Path.home() / ".surrogate" / "yolo" / "or-cooldowns.json" | |
| FREE_MODELS = [ | |
| "qwen/qwen3-coder:free", | |
| "openai/gpt-oss-120b:free", | |
| "meta-llama/llama-3.3-70b-instruct:free", | |
| "nvidia/nemotron-3-super-120b-a12b:free", | |
| "z-ai/glm-4.5-air:free", | |
| ] | |
| CHEAP_MODELS = [ | |
| "deepseek/deepseek-v3.2", | |
| "x-ai/grok-4.1-fast", | |
| ] | |
| PREMIUM_MODELS = [ | |
| "openai/gpt-5.4", | |
| "anthropic/claude-haiku-4.5", | |
| "anthropic/claude-sonnet-4.6", | |
| "x-ai/grok-4.20", | |
| "anthropic/claude-opus-4.7", | |
| ] | |
| DEFAULT_COOLDOWN_SECONDS = 60 # after 429, wait 60s before retrying this model | |
| class ORUnavailable(Exception): | |
| def __init__(self, model: str, code: int, body: str): | |
| self.model = model | |
| self.code = code | |
| self.body = body | |
| super().__init__(f"OR {model}: {code} {body[:200]}") | |
| class ORResponse: | |
| content: str | |
| model_requested: str | |
| model_served: str | |
| input_tokens: int = 0 | |
| output_tokens: int = 0 | |
| def _load_cooldowns() -> dict[str, float]: | |
| if not COOLDOWN_PATH.exists(): | |
| return {} | |
| try: | |
| return json.loads(COOLDOWN_PATH.read_text()) | |
| except (json.JSONDecodeError, OSError): | |
| return {} | |
| def _save_cooldowns(c: dict[str, float]) -> None: | |
| COOLDOWN_PATH.parent.mkdir(parents=True, exist_ok=True) | |
| COOLDOWN_PATH.write_text(json.dumps(c)) | |
| def is_on_cooldown(model: str) -> bool: | |
| c = _load_cooldowns() | |
| return c.get(model, 0) > time.time() | |
| def mark_cooldown(model: str, seconds: int = DEFAULT_COOLDOWN_SECONDS) -> None: | |
| c = _load_cooldowns() | |
| c[model] = time.time() + seconds | |
| # Prune expired entries | |
| c = {k: v for k, v in c.items() if v > time.time()} | |
| _save_cooldowns(c) | |
| def call_openrouter( | |
| model: str, | |
| messages: list[dict], | |
| max_tokens: int = 4000, | |
| system: Optional[str] = None, | |
| timeout: int = 120, | |
| ) -> ORResponse: | |
| """Call OpenRouter directly. Raises ORUnavailable on error.""" | |
| api_key = os.environ.get("OPENROUTER_API_KEY", "") | |
| if not api_key: | |
| # Try loading from .env (accepts both `KEY=val` and `export KEY=val` formats) | |
| env_file = Path.home() / ".surrogate" / ".env" | |
| if env_file.exists(): | |
| for line in env_file.read_text().splitlines(): | |
| s = line.strip() | |
| if s.startswith("export "): | |
| s = s[len("export "):].lstrip() | |
| if s.startswith("OPENROUTER_API_KEY="): | |
| api_key = s.split("=", 1)[1].strip().strip('"').strip("'") | |
| break | |
| if not api_key: | |
| raise ORUnavailable(model, 0, "OPENROUTER_API_KEY not set") | |
| body_msgs = list(messages) | |
| if system: | |
| body_msgs = [{"role": "system", "content": system}] + body_msgs | |
| body = json.dumps({ | |
| "model": model, | |
| "max_tokens": max_tokens, | |
| "messages": body_msgs, | |
| }).encode() | |
| req = urllib.request.Request( | |
| OR_URL, | |
| data=body, | |
| headers={ | |
| "Authorization": f"Bearer {api_key}", | |
| "HTTP-Referer": "https://github.com/Ashira/axentx", | |
| "X-Title": "axentx-smart-dispatcher", | |
| "content-type": "application/json", | |
| }, | |
| method="POST", | |
| ) | |
| try: | |
| with urllib.request.urlopen(req, timeout=timeout) as r: | |
| data = json.loads(r.read()) | |
| if "choices" not in data: | |
| raise ORUnavailable(model, 0, str(data)[:200]) | |
| choice = data["choices"][0] | |
| content = choice["message"]["content"] | |
| usage = data.get("usage", {}) | |
| return ORResponse( | |
| content=content, | |
| model_requested=model, | |
| model_served=data.get("model", model), | |
| input_tokens=usage.get("prompt_tokens", 0), | |
| output_tokens=usage.get("completion_tokens", 0), | |
| ) | |
| except urllib.error.HTTPError as e: | |
| body = e.read().decode() | |
| # 429 or 503 β mark cooldown | |
| if e.code in (429, 503, 502): | |
| mark_cooldown(model) | |
| raise ORUnavailable(model, e.code, body) | |
| except Exception as e: # network errors | |
| raise ORUnavailable(model, 0, str(e)) | |
| def pick_free() -> Optional[str]: | |
| """First free model not on cooldown.""" | |
| for m in FREE_MODELS: | |
| if not is_on_cooldown(m): | |
| return m | |
| return None | |
| def pick_cheap() -> Optional[str]: | |
| for m in CHEAP_MODELS: | |
| if not is_on_cooldown(m): | |
| return m | |
| return None | |
| def pick_premium() -> Optional[str]: | |
| for m in PREMIUM_MODELS: | |
| if not is_on_cooldown(m): | |
| return m | |
| return None | |
| if __name__ == "__main__": | |
| import sys | |
| if len(sys.argv) > 1 and sys.argv[1] == "pick": | |
| print(f"free: {pick_free()}") | |
| print(f"cheap: {pick_cheap()}") | |
| print(f"premium: {pick_premium()}") | |
| else: | |
| m = pick_free() or pick_cheap() or pick_premium() | |
| q = sys.argv[1] if len(sys.argv) > 1 else "say OK" | |
| r = call_openrouter(m, [{"role": "user", "content": q}], max_tokens=30) | |
| print(f"[{r.model_served}] {r.content[:100]}") | |