surrogate-1 / bin /lib /openrouter_client.py
Ashira Pitchayapakayakul
feat: migrate $HOME/.claude/* to $HOME/.surrogate/* (clean separation from Claude Code)
e36381e
"""OpenRouter client β€” free-first then paid tiers.
Tiers (per Ashira 2026-04-19):
FREE: qwen, gpt-oss, llama, nemotron, glm
CHEAP: deepseek-v3.2, grok-4.1-fast
PREMIUM: gpt-5.4, claude-haiku-4.5, claude-sonnet-4.6, claude-opus-4.7
Per-model cooldown tracked in ~/.surrogate/yolo/or-cooldowns.json to avoid
hammering rate-limited free models.
"""
from __future__ import annotations
import json
import os
import time
import urllib.error
import urllib.request
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
OR_URL = "https://openrouter.ai/api/v1/chat/completions"
COOLDOWN_PATH = Path.home() / ".surrogate" / "yolo" / "or-cooldowns.json"
FREE_MODELS = [
"qwen/qwen3-coder:free",
"openai/gpt-oss-120b:free",
"meta-llama/llama-3.3-70b-instruct:free",
"nvidia/nemotron-3-super-120b-a12b:free",
"z-ai/glm-4.5-air:free",
]
CHEAP_MODELS = [
"deepseek/deepseek-v3.2",
"x-ai/grok-4.1-fast",
]
PREMIUM_MODELS = [
"openai/gpt-5.4",
"anthropic/claude-haiku-4.5",
"anthropic/claude-sonnet-4.6",
"x-ai/grok-4.20",
"anthropic/claude-opus-4.7",
]
DEFAULT_COOLDOWN_SECONDS = 60 # after 429, wait 60s before retrying this model
class ORUnavailable(Exception):
def __init__(self, model: str, code: int, body: str):
self.model = model
self.code = code
self.body = body
super().__init__(f"OR {model}: {code} {body[:200]}")
@dataclass
class ORResponse:
content: str
model_requested: str
model_served: str
input_tokens: int = 0
output_tokens: int = 0
def _load_cooldowns() -> dict[str, float]:
if not COOLDOWN_PATH.exists():
return {}
try:
return json.loads(COOLDOWN_PATH.read_text())
except (json.JSONDecodeError, OSError):
return {}
def _save_cooldowns(c: dict[str, float]) -> None:
COOLDOWN_PATH.parent.mkdir(parents=True, exist_ok=True)
COOLDOWN_PATH.write_text(json.dumps(c))
def is_on_cooldown(model: str) -> bool:
c = _load_cooldowns()
return c.get(model, 0) > time.time()
def mark_cooldown(model: str, seconds: int = DEFAULT_COOLDOWN_SECONDS) -> None:
c = _load_cooldowns()
c[model] = time.time() + seconds
# Prune expired entries
c = {k: v for k, v in c.items() if v > time.time()}
_save_cooldowns(c)
def call_openrouter(
model: str,
messages: list[dict],
max_tokens: int = 4000,
system: Optional[str] = None,
timeout: int = 120,
) -> ORResponse:
"""Call OpenRouter directly. Raises ORUnavailable on error."""
api_key = os.environ.get("OPENROUTER_API_KEY", "")
if not api_key:
# Try loading from .env (accepts both `KEY=val` and `export KEY=val` formats)
env_file = Path.home() / ".surrogate" / ".env"
if env_file.exists():
for line in env_file.read_text().splitlines():
s = line.strip()
if s.startswith("export "):
s = s[len("export "):].lstrip()
if s.startswith("OPENROUTER_API_KEY="):
api_key = s.split("=", 1)[1].strip().strip('"').strip("'")
break
if not api_key:
raise ORUnavailable(model, 0, "OPENROUTER_API_KEY not set")
body_msgs = list(messages)
if system:
body_msgs = [{"role": "system", "content": system}] + body_msgs
body = json.dumps({
"model": model,
"max_tokens": max_tokens,
"messages": body_msgs,
}).encode()
req = urllib.request.Request(
OR_URL,
data=body,
headers={
"Authorization": f"Bearer {api_key}",
"HTTP-Referer": "https://github.com/Ashira/axentx",
"X-Title": "axentx-smart-dispatcher",
"content-type": "application/json",
},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=timeout) as r:
data = json.loads(r.read())
if "choices" not in data:
raise ORUnavailable(model, 0, str(data)[:200])
choice = data["choices"][0]
content = choice["message"]["content"]
usage = data.get("usage", {})
return ORResponse(
content=content,
model_requested=model,
model_served=data.get("model", model),
input_tokens=usage.get("prompt_tokens", 0),
output_tokens=usage.get("completion_tokens", 0),
)
except urllib.error.HTTPError as e:
body = e.read().decode()
# 429 or 503 β†’ mark cooldown
if e.code in (429, 503, 502):
mark_cooldown(model)
raise ORUnavailable(model, e.code, body)
except Exception as e: # network errors
raise ORUnavailable(model, 0, str(e))
def pick_free() -> Optional[str]:
"""First free model not on cooldown."""
for m in FREE_MODELS:
if not is_on_cooldown(m):
return m
return None
def pick_cheap() -> Optional[str]:
for m in CHEAP_MODELS:
if not is_on_cooldown(m):
return m
return None
def pick_premium() -> Optional[str]:
for m in PREMIUM_MODELS:
if not is_on_cooldown(m):
return m
return None
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] == "pick":
print(f"free: {pick_free()}")
print(f"cheap: {pick_cheap()}")
print(f"premium: {pick_premium()}")
else:
m = pick_free() or pick_cheap() or pick_premium()
q = sys.argv[1] if len(sys.argv) > 1 else "say OK"
r = call_openrouter(m, [{"role": "user", "content": q}], max_tokens=30)
print(f"[{r.model_served}] {r.content[:100]}")