Spaces:
Runtime error
Runtime error
Ashira Pitchayapakayakul
feat: migrate $HOME/.claude/* to $HOME/.surrogate/* (clean separation from Claude Code)
e36381e | """Shared HTTP retry library for all cloud bridges. | |
| Handles: exponential backoff + jitter + Retry-After + circuit breaker. | |
| Import at top of any bridge: exec(open(...).read()) | |
| Exports: request_with_retry(url, data, headers, max_retries=4, base_delay=2.0) | |
| """ | |
| import json as _json | |
| import os as _os | |
| import random as _random | |
| import time as _time | |
| import urllib.request as _urlreq | |
| import urllib.error as _urlerr | |
| # Circuit breaker state β persisted in /tmp so all bridge invocations share | |
| _CB_DIR = "/tmp/bridge-circuits" | |
| _os.makedirs(_CB_DIR, exist_ok=True) | |
| def _cb_state_path(host): | |
| return f"{_CB_DIR}/{host.replace('/', '_')}.json" | |
| def _circuit_open(host): | |
| p = _cb_state_path(host) | |
| try: | |
| with open(p) as f: | |
| s = _json.load(f) | |
| # Circuit closed after timeout | |
| if _time.time() > s.get("open_until", 0): | |
| return False, 0 | |
| return True, int(s["open_until"] - _time.time()) | |
| except Exception: | |
| return False, 0 | |
| def _record_failure(host, open_seconds=60): | |
| """Called on 429 or 5xx β track consecutive failures.""" | |
| p = _cb_state_path(host) | |
| try: | |
| with open(p) as f: | |
| s = _json.load(f) | |
| except Exception: | |
| s = {"consec_fails": 0, "open_until": 0} | |
| s["consec_fails"] = s.get("consec_fails", 0) + 1 | |
| # Open circuit after 3 consecutive failures | |
| if s["consec_fails"] >= 3: | |
| s["open_until"] = _time.time() + open_seconds | |
| with open(p, "w") as f: | |
| _json.dump(s, f) | |
| def _record_success(host): | |
| """Called on 2xx β reset failure counter.""" | |
| p = _cb_state_path(host) | |
| try: | |
| with open(p, "w") as f: | |
| _json.dump({"consec_fails": 0, "open_until": 0}, f) | |
| except Exception: | |
| pass | |
| def _parse_retry_after(headers, default_delay): | |
| """Honor Retry-After header (seconds) or x-ratelimit-reset-after.""" | |
| for h in ("Retry-After", "retry-after", "x-ratelimit-reset-after", "x-ratelimit-reset"): | |
| val = headers.get(h) | |
| if val: | |
| try: | |
| n = int(val) | |
| # x-ratelimit-reset may be absolute epoch β convert to delta | |
| if n > 10_000_000_000: # way in future = epoch ms | |
| n = n // 1000 - int(_time.time()) | |
| elif n > 1_000_000_000: # epoch seconds | |
| n = n - int(_time.time()) | |
| return max(1, min(n, 300)) # clamp 1..300s | |
| except (ValueError, TypeError): | |
| pass | |
| return default_delay | |
| def request_with_retry(url, data, headers, timeout=120, max_retries=4, base_delay=2.0, open_seconds=60): | |
| """Make HTTP request with exp-backoff retry + circuit breaker. | |
| Args: | |
| open_seconds: how long to open circuit after 3 consecutive failures. | |
| Default 60s. Callers with strict per-minute rate limits (Cloudflare, | |
| SambaNova) should use 120-180s so we don't hammer during cooldown. | |
| Returns: parsed JSON response. | |
| Raises: Exception if circuit open or max retries exhausted. | |
| """ | |
| from urllib.parse import urlparse | |
| host = urlparse(url).netloc | |
| # Circuit breaker check | |
| is_open, remaining = _circuit_open(host) | |
| if is_open: | |
| raise Exception(f"circuit-open for {host} ({remaining}s remaining)") | |
| last_err = None | |
| for attempt in range(max_retries): | |
| try: | |
| req = _urlreq.Request(url, data=data, headers=headers) | |
| with _urlreq.urlopen(req, timeout=timeout) as r: | |
| result = _json.load(r) | |
| _record_success(host) | |
| return result | |
| except _urlerr.HTTPError as e: | |
| last_err = e | |
| if e.code == 429: | |
| # Rate-limited β honor Retry-After | |
| base = base_delay * (2 ** attempt) | |
| delay = _parse_retry_after(e.headers, base) | |
| delay *= (1 + _random.uniform(-0.2, 0.2)) # jitter Β±20% | |
| if attempt < max_retries - 1: | |
| _time.sleep(min(delay, 60)) | |
| continue | |
| _record_failure(host, open_seconds=open_seconds) | |
| raise Exception(f"HTTP 429 after {max_retries} retries (last Retry-After: {delay:.0f}s)") | |
| elif 500 <= e.code < 600: | |
| # Server error β exp backoff with jitter | |
| delay = base_delay * (2 ** attempt) * (1 + _random.uniform(-0.2, 0.2)) | |
| if attempt < max_retries - 1: | |
| _time.sleep(min(delay, 30)) | |
| continue | |
| _record_failure(host, open_seconds=open_seconds) | |
| raise Exception(f"HTTP {e.code} after {max_retries} retries") | |
| else: | |
| # 4xx other than 429 β not retryable (client error) | |
| _record_failure(host, open_seconds=open_seconds) | |
| raise | |
| except (_urlerr.URLError, _os.error) as e: | |
| last_err = e | |
| # Network error β retry with backoff | |
| delay = base_delay * (2 ** attempt) * (1 + _random.uniform(-0.2, 0.2)) | |
| if attempt < max_retries - 1: | |
| _time.sleep(min(delay, 30)) | |
| continue | |
| _record_failure(host, open_seconds=open_seconds) | |
| raise | |
| raise Exception(f"max retries ({max_retries}) exhausted: {last_err}") | |