#!/usr/bin/env python3 """OpenAI-compatible fallback proxy for Hermes on Hugging Face Spaces.""" import json import os from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from typing import Any, Dict, Optional import requests LISTEN_HOST = os.environ.get("FALLBACK_PROXY_HOST", "127.0.0.1") LISTEN_PORT = int(os.environ.get("FALLBACK_PROXY_PORT", "8787")) REQUEST_TIMEOUT = int(os.environ.get("FALLBACK_PROXY_TIMEOUT", "180")) PRIMARY_BASE_URL = os.environ.get("PRIMARY_BASE_URL", "").rstrip("/") PRIMARY_API_KEY = os.environ.get("PRIMARY_API_KEY", "") PRIMARY_MODEL = os.environ.get("PRIMARY_MODEL", "") FALLBACK_BASE_URL = os.environ.get("FALLBACK_BASE_URL", "https://openrouter.ai/api/v1").rstrip("/") FALLBACK_API_KEY = os.environ.get("FALLBACK_API_KEY", "") FALLBACK_MODEL = os.environ.get("FALLBACK_MODEL", "openrouter/free") FALLBACK_REFERER = os.environ.get("OPENROUTER_HTTP_REFERER", "https://huggingface.co") FALLBACK_TITLE = os.environ.get("OPENROUTER_X_TITLE", "Hermes HF Fallback") VERBOSE_LOGGING = os.environ.get("FALLBACK_PROXY_VERBOSE", "true").lower() in {"1", "true", "yes", "on"} MAX_LOG_CHARS = int(os.environ.get("FALLBACK_PROXY_MAX_LOG_CHARS", "6000")) def is_retryable(status_code: int) -> bool: return status_code in {408, 409, 425, 429, 500, 502, 503, 504} def should_fallback(status_code: int) -> bool: return status_code == 400 or is_retryable(status_code) def build_headers(api_key: str, extra: Optional[Dict[str, str]] = None) -> Dict[str, str]: headers = {"Content-Type": "application/json"} if api_key: headers["Authorization"] = f"Bearer {api_key}" if extra: headers.update(extra) return headers def normalize_message_content(content: Any) -> str: if content is None: return "" if isinstance(content, str): return content if isinstance(content, list): parts = [] for item in content: if isinstance(item, str): parts.append(item) elif isinstance(item, dict): if item.get("type") == "text": parts.append(str(item.get("text", ""))) elif "text" in item: parts.append(str(item.get("text", ""))) else: parts.append(json.dumps(item, ensure_ascii=False)) else: parts.append(str(item)) return "\n".join(part for part in parts if part) if isinstance(content, dict): return json.dumps(content, ensure_ascii=False) return str(content) def normalize_messages(messages: Any) -> Any: if not isinstance(messages, list): return messages normalized_messages = [] for message in messages: if not isinstance(message, dict): normalized_messages.append(message) continue role = (message.get("role") or "user").lower() content = normalize_message_content(message.get("content")) if role == "developer": role = "system" elif role in {"tool", "function"}: role = "user" prefix = "Tool result" tool_name = message.get("name") or message.get("tool_call_id") if tool_name: prefix = f"Tool result ({tool_name})" content = f"{prefix}:\n{content}" if content else prefix elif role not in {"system", "user", "assistant"}: role = "user" normalized: Dict[str, Any] = { "role": role, "content": content, } if role == "assistant" and message.get("tool_calls"): normalized["tool_calls"] = message.get("tool_calls") if not content: normalized["content"] = json.dumps(message.get("tool_calls"), ensure_ascii=False) normalized_messages.append(normalized) return normalized_messages def clip_text(value: str, limit: int = MAX_LOG_CHARS) -> str: if len(value) <= limit: return value return value[:limit] + f"... [truncated {len(value) - limit} chars]" def dump_json(data: Any) -> str: try: return clip_text(json.dumps(data, ensure_ascii=False, indent=2)) except Exception as exc: return f"" def summarize_messages(messages: Any) -> Any: if not isinstance(messages, list): return messages summary = [] for idx, message in enumerate(messages): if not isinstance(message, dict): summary.append({"index": idx, "type": type(message).__name__, "value": str(message)[:200]}) continue content = message.get("content") if isinstance(content, str): content_preview = clip_text(content, 400) content_type = "str" elif isinstance(content, list): content_preview = clip_text(json.dumps(content, ensure_ascii=False), 400) content_type = "list" elif isinstance(content, dict): content_preview = clip_text(json.dumps(content, ensure_ascii=False), 400) content_type = "dict" else: content_preview = clip_text(str(content), 400) content_type = type(content).__name__ summary.append( { "index": idx, "role": message.get("role"), "content_type": content_type, "content_preview": content_preview, "has_tool_calls": bool(message.get("tool_calls")), "tool_call_id": message.get("tool_call_id"), "name": message.get("name"), "keys": sorted(message.keys()), } ) return summary def log_debug(title: str, data: Any) -> None: if not VERBOSE_LOGGING: return print(f"[fallback-proxy] {title}:\n{dump_json(data)}") def create_upstream_response( upstream_base: str, payload: Dict[str, Any], api_key: str, model_override: str, extra_headers: Optional[Dict[str, str]] = None, ) -> requests.Response: request_payload = dict(payload) request_payload["messages"] = normalize_messages(request_payload.get("messages")) request_payload["model"] = model_override log_debug( "outbound_request", { "upstream_base": upstream_base, "model_override": model_override, "stream": bool(request_payload.get("stream")), "keys": sorted(request_payload.keys()), "message_summary": summarize_messages(request_payload.get("messages")), "payload": request_payload, }, ) return requests.post( f"{upstream_base}/chat/completions", headers=build_headers(api_key, extra_headers), json=request_payload, timeout=REQUEST_TIMEOUT, stream=bool(request_payload.get("stream")), ) class Handler(BaseHTTPRequestHandler): server_version = "HermesFallbackProxy/0.1" protocol_version = "HTTP/1.1" def _send_json(self, status_code: int, payload: Dict[str, Any]) -> None: body = json.dumps(payload).encode("utf-8") self.send_response(status_code) self.send_header("Content-Type", "application/json") self.send_header("Content-Length", str(len(body))) self.end_headers() self.wfile.write(body) def _read_json(self) -> Optional[Dict[str, Any]]: length = int(self.headers.get("Content-Length", "0")) raw = self.rfile.read(length) if length > 0 else b"{}" try: return json.loads(raw.decode("utf-8")) if raw else {} except json.JSONDecodeError: self._send_json(400, {"error": {"message": "Invalid JSON body"}}) return None def _relay_response(self, response: requests.Response, stream: bool) -> None: content_type = response.headers.get("Content-Type", "application/json") self.send_response(response.status_code) self.send_header("Content-Type", content_type) if stream: self.send_header("Cache-Control", "no-cache") self.send_header("Connection", "keep-alive") else: self.send_header("Content-Length", str(len(response.content))) self.end_headers() if stream: for chunk in response.iter_content(chunk_size=1024): if chunk: self.wfile.write(chunk) self.wfile.flush() response.close() else: self.wfile.write(response.content) def _send_plain(self, status_code: int, body: str) -> None: payload = body.encode("utf-8") self.send_response(status_code) self.send_header("Content-Type", "text/plain; charset=utf-8") self.send_header("Content-Length", str(len(payload))) self.end_headers() self.wfile.write(payload) def do_GET(self) -> None: if self.path == "/health": self._send_json( 200, { "status": "ok", "primary_configured": bool(PRIMARY_BASE_URL and PRIMARY_MODEL), "fallback_configured": bool(FALLBACK_API_KEY and FALLBACK_MODEL), }, ) return if self.path in {"/version", "/v1/props", "/props"}: self._send_json( 200, { "version": "fallback-proxy", "primary_model": PRIMARY_MODEL, "fallback_model": FALLBACK_MODEL, }, ) return if self.path in {"/api/tags"}: self._send_json(200, {"models": [{"name": PRIMARY_MODEL or FALLBACK_MODEL}]}) return if self.path in {"/v1/models", "/api/v1/models"}: self._send_json( 200, { "object": "list", "data": [ { "id": PRIMARY_MODEL or FALLBACK_MODEL or "openai-fallback-proxy", "object": "model", "owned_by": "hermes-local-proxy", } ], }, ) return if self.path.startswith("/v1/models/"): model_id = self.path.split("/v1/models/", 1)[1] self._send_json( 200, { "id": model_id or PRIMARY_MODEL or FALLBACK_MODEL, "object": "model", "owned_by": "hermes-local-proxy", }, ) return self._send_json(404, {"error": {"message": "Not found"}}) def do_POST(self) -> None: if self.path not in {"/v1/chat/completions", "/chat/completions"}: self._send_json(404, {"error": {"message": "Not found"}}) return payload = self._read_json() if payload is None: return log_debug( "incoming_request", { "path": self.path, "keys": sorted(payload.keys()), "stream": bool(payload.get("stream")), "message_summary": summarize_messages(payload.get("messages")), "payload": payload, }, ) if not PRIMARY_BASE_URL or not PRIMARY_MODEL: self._send_json(500, {"error": {"message": "Primary model not configured"}}) return stream = bool(payload.get("stream")) primary_response = None try: primary_response = create_upstream_response( PRIMARY_BASE_URL, payload, PRIMARY_API_KEY, PRIMARY_MODEL, ) log_debug( "primary_response", { "status_code": primary_response.status_code, "headers": dict(primary_response.headers), "body_preview": clip_text(primary_response.text if not stream else ""), }, ) if primary_response.status_code < 400: self._relay_response(primary_response, stream) return if primary_response.status_code == 400: try: body_preview = primary_response.text[:500] except Exception: body_preview = "" try: message_roles = [m.get("role") for m in (payload.get("messages") or []) if isinstance(m, dict)] except Exception: message_roles = [] print( "[fallback-proxy] primary 400 -> fallback; " f"roles={message_roles} keys={sorted(payload.keys())} body={body_preview}" ) if not FALLBACK_API_KEY or not should_fallback(primary_response.status_code): self._relay_response(primary_response, False) return except requests.RequestException as error: if not FALLBACK_API_KEY: self._send_json(502, {"error": {"message": f"Primary upstream request failed: {error}"}}) return finally: if primary_response is not None and not stream: primary_response.close() try: fallback_response = create_upstream_response( FALLBACK_BASE_URL, payload, FALLBACK_API_KEY, FALLBACK_MODEL, { "HTTP-Referer": FALLBACK_REFERER, "X-Title": FALLBACK_TITLE, }, ) log_debug( "fallback_response", { "status_code": fallback_response.status_code, "headers": dict(fallback_response.headers), "body_preview": clip_text(fallback_response.text if not stream else ""), }, ) self._relay_response(fallback_response, stream) except requests.RequestException as error: self._send_json(502, {"error": {"message": f"Fallback upstream request failed: {error}"}}) def log_message(self, fmt: str, *args: Any) -> None: print(f"[fallback-proxy] {self.address_string()} - {fmt % args}") def main() -> None: server = ThreadingHTTPServer((LISTEN_HOST, LISTEN_PORT), Handler) print(f"[fallback-proxy] listening on http://{LISTEN_HOST}:{LISTEN_PORT}") server.serve_forever() if __name__ == "__main__": main()