Spaces:
Running
Running
GitHub Action commited on
Commit ·
b53785b
1
Parent(s): 5f890f7
Auto deploy from GitHub 2026-05-11 11:19:17
Browse files- entrypoint.sh +117 -4
- src/openai_fallback_proxy.py +191 -0
entrypoint.sh
CHANGED
|
@@ -82,6 +82,38 @@ declare -A PROVIDER_BASE_URLS=(
|
|
| 82 |
["longcat"]="https://api.longcat.chat/openai"
|
| 83 |
)
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
# ---- 检测主模型 ----
|
| 86 |
detect_main_model() {
|
| 87 |
if [ -n "$MODEL_PROVIDER" ] && [ -n "$MODEL_NAME" ]; then
|
|
@@ -144,9 +176,35 @@ echo "⚡ Aux Model: ${AUX_MODEL_VAL:-auto-detect}"
|
|
| 144 |
DELEGATION_MODEL_VAL=$(detect_delegation_model)
|
| 145 |
echo "💻 Delegation Model: ${DELEGATION_MODEL_VAL:-inherit-main}"
|
| 146 |
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
echo " Base URL: $MAIN_BASE_URL"
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
echo "────────────────────────────────────────"
|
| 151 |
|
| 152 |
# ==================== 生成 config.yaml ====================
|
|
@@ -354,6 +412,12 @@ fi
|
|
| 354 |
if [ -n "$SILICONFLOW_API_KEY" ]; then
|
| 355 |
export SILICONFLOW_BASE_URL="${SILICONFLOW_BASE_URL:-https://api.siliconflow.cn/v1}"
|
| 356 |
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
if [ -n "$GEMINI_API_KEY" ]; then
|
| 358 |
export GEMINI_BASE_URL="${GEMINI_BASE_URL:-https://generativelanguage.googleapis.com}"
|
| 359 |
fi
|
|
@@ -400,19 +464,63 @@ echo " ✅ HERMES_MODEL=$HERMES_MODEL (进程级模型覆盖)"
|
|
| 400 |
|
| 401 |
# ==================== 环境变量注入 ====================
|
| 402 |
echo "⚙️ 注入环境变量到 .env..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
ENV_FILE="/data/.hermes/.env"
|
| 404 |
mkdir -p /data/.hermes
|
| 405 |
|
| 406 |
PERSISTENT_VARS=(
|
| 407 |
-
"MODEL_PROVIDER" "MODEL_NAME" "HERMES_MODEL"
|
| 408 |
"VISION_MODEL" "AUX_MODEL" "DELEGATION_MODEL"
|
| 409 |
"NVIDIA_API_KEY" "NVIDIA_BASE_URL"
|
| 410 |
"SILICONFLOW_API_KEY" "SILICONFLOW_BASE_URL"
|
| 411 |
-
"OPENAI_API_KEY"
|
| 412 |
-
"ANTHROPIC_API_KEY"
|
| 413 |
"GOOGLE_API_KEY" "GEMINI_API_KEY" "GEMINI_BASE_URL"
|
| 414 |
"OPENROUTER_API_KEY" "OPENROUTER_BASE_URL"
|
| 415 |
"LONGCAT_API_KEY" "LONGCAT_BASE_URL"
|
|
|
|
|
|
|
| 416 |
"API_SERVER_ENABLED" "API_SERVER_PORT" "API_SERVER_HOST"
|
| 417 |
"TELEGRAM_BOT_TOKEN" "TELEGRAM_ALLOWED_USERS" "TELEGRAM_PROXY"
|
| 418 |
"DISCORD_BOT_TOKEN" "DISCORD_CLIENT_ID"
|
|
@@ -1270,6 +1378,11 @@ cleanup() {
|
|
| 1270 |
kill $BFF_PID 2>/dev/null || true
|
| 1271 |
wait $BFF_PID 2>/dev/null || true
|
| 1272 |
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1273 |
if [ -n "$GATEWAY_PID" ] && kill -0 $GATEWAY_PID 2>/dev/null; then
|
| 1274 |
echo " 🛑 停止 Gateway..."
|
| 1275 |
kill $GATEWAY_PID 2>/dev/null || true
|
|
|
|
| 82 |
["longcat"]="https://api.longcat.chat/openai"
|
| 83 |
)
|
| 84 |
|
| 85 |
+
FALLBACK_PROXY_PORT="${FALLBACK_PROXY_PORT:-8787}"
|
| 86 |
+
|
| 87 |
+
get_provider_base_url() {
|
| 88 |
+
local provider="$1"
|
| 89 |
+
case "$provider" in
|
| 90 |
+
nvidia) echo "${NVIDIA_BASE_URL:-${PROVIDER_BASE_URLS[nvidia]}}" ;;
|
| 91 |
+
siliconflow) echo "${SILICONFLOW_BASE_URL:-${PROVIDER_BASE_URLS[siliconflow]}}" ;;
|
| 92 |
+
openai) echo "${OPENAI_BASE_URL:-${PROVIDER_BASE_URLS[openai]}}" ;;
|
| 93 |
+
anthropic) echo "${ANTHROPIC_BASE_URL:-${PROVIDER_BASE_URLS[anthropic]}}" ;;
|
| 94 |
+
google) echo "${GOOGLE_BASE_URL:-${PROVIDER_BASE_URLS[google]}}" ;;
|
| 95 |
+
gemini) echo "${GEMINI_BASE_URL:-${PROVIDER_BASE_URLS[gemini]}}" ;;
|
| 96 |
+
openrouter) echo "${OPENROUTER_BASE_URL:-${PROVIDER_BASE_URLS[openrouter]}}" ;;
|
| 97 |
+
longcat) echo "${LONGCAT_BASE_URL:-${PROVIDER_BASE_URLS[longcat]}}" ;;
|
| 98 |
+
*) echo "" ;;
|
| 99 |
+
esac
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
get_provider_api_key_var() {
|
| 103 |
+
local provider="$1"
|
| 104 |
+
case "$provider" in
|
| 105 |
+
nvidia) echo "NVIDIA_API_KEY" ;;
|
| 106 |
+
siliconflow) echo "SILICONFLOW_API_KEY" ;;
|
| 107 |
+
openai) echo "OPENAI_API_KEY" ;;
|
| 108 |
+
anthropic) echo "ANTHROPIC_API_KEY" ;;
|
| 109 |
+
google) echo "GOOGLE_API_KEY" ;;
|
| 110 |
+
gemini) echo "GEMINI_API_KEY" ;;
|
| 111 |
+
openrouter) echo "OPENROUTER_API_KEY" ;;
|
| 112 |
+
longcat) echo "LONGCAT_API_KEY" ;;
|
| 113 |
+
*) echo "" ;;
|
| 114 |
+
esac
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
# ---- 检测主模型 ----
|
| 118 |
detect_main_model() {
|
| 119 |
if [ -n "$MODEL_PROVIDER" ] && [ -n "$MODEL_NAME" ]; then
|
|
|
|
| 176 |
DELEGATION_MODEL_VAL=$(detect_delegation_model)
|
| 177 |
echo "💻 Delegation Model: ${DELEGATION_MODEL_VAL:-inherit-main}"
|
| 178 |
|
| 179 |
+
PRIMARY_PROVIDER_FOR_PROXY="$MAIN_PROVIDER"
|
| 180 |
+
MAIN_BASE_URL="$(get_provider_base_url "$MAIN_PROVIDER")"
|
| 181 |
+
if [ -n "$MODEL_BASE_URL" ]; then
|
| 182 |
+
MAIN_BASE_URL="$MODEL_BASE_URL"
|
| 183 |
+
fi
|
| 184 |
echo " Base URL: $MAIN_BASE_URL"
|
| 185 |
|
| 186 |
+
FALLBACK_PROVIDER="${FALLBACK_MODEL_PROVIDER:-}"
|
| 187 |
+
FALLBACK_MODEL_VAL="${FALLBACK_MODEL_NAME:-}"
|
| 188 |
+
FALLBACK_BASE_URL_VAL="${FALLBACK_MODEL_BASE_URL:-}"
|
| 189 |
+
if [ -n "$FALLBACK_PROVIDER" ] && [ -z "$FALLBACK_MODEL_VAL" ] && [ "$FALLBACK_PROVIDER" = "openrouter" ]; then
|
| 190 |
+
FALLBACK_MODEL_VAL="openrouter/free"
|
| 191 |
+
fi
|
| 192 |
+
if [ -n "$FALLBACK_PROVIDER" ] && [ -z "$FALLBACK_BASE_URL_VAL" ]; then
|
| 193 |
+
FALLBACK_BASE_URL_VAL="$(get_provider_base_url "$FALLBACK_PROVIDER")"
|
| 194 |
+
fi
|
| 195 |
+
|
| 196 |
+
USE_MAIN_FALLBACK_PROXY=false
|
| 197 |
+
if [ -n "$MODEL_BASE_URL" ] && [ -n "$FALLBACK_PROVIDER" ] && [ -n "$FALLBACK_MODEL_VAL" ]; then
|
| 198 |
+
USE_MAIN_FALLBACK_PROXY=true
|
| 199 |
+
MAIN_PROVIDER="openai"
|
| 200 |
+
MAIN_BASE_URL="http://127.0.0.1:${FALLBACK_PROXY_PORT}/v1"
|
| 201 |
+
fi
|
| 202 |
+
|
| 203 |
+
if [ "$USE_MAIN_FALLBACK_PROXY" = true ]; then
|
| 204 |
+
echo "馃洂 Fallback Model: $FALLBACK_PROVIDER/$FALLBACK_MODEL_VAL"
|
| 205 |
+
echo " Fallback URL: $FALLBACK_BASE_URL_VAL"
|
| 206 |
+
fi
|
| 207 |
+
|
| 208 |
echo "────────────────────────────────────────"
|
| 209 |
|
| 210 |
# ==================== 生成 config.yaml ====================
|
|
|
|
| 412 |
if [ -n "$SILICONFLOW_API_KEY" ]; then
|
| 413 |
export SILICONFLOW_BASE_URL="${SILICONFLOW_BASE_URL:-https://api.siliconflow.cn/v1}"
|
| 414 |
fi
|
| 415 |
+
if [ -n "$OPENAI_API_KEY" ]; then
|
| 416 |
+
export OPENAI_BASE_URL="${OPENAI_BASE_URL:-https://api.openai.com/v1}"
|
| 417 |
+
fi
|
| 418 |
+
if [ -n "$ANTHROPIC_API_KEY" ]; then
|
| 419 |
+
export ANTHROPIC_BASE_URL="${ANTHROPIC_BASE_URL:-https://api.anthropic.com/v1}"
|
| 420 |
+
fi
|
| 421 |
if [ -n "$GEMINI_API_KEY" ]; then
|
| 422 |
export GEMINI_BASE_URL="${GEMINI_BASE_URL:-https://generativelanguage.googleapis.com}"
|
| 423 |
fi
|
|
|
|
| 464 |
|
| 465 |
# ==================== 环境变量注入 ====================
|
| 466 |
echo "⚙️ 注入环境变量到 .env..."
|
| 467 |
+
FALLBACK_PROXY_PID=""
|
| 468 |
+
if [ "$USE_MAIN_FALLBACK_PROXY" = true ]; then
|
| 469 |
+
echo "馃洂 鍚姩涓绘ā鍨?fallback 浠g悊..."
|
| 470 |
+
PRIMARY_KEY_VAR=$(get_provider_api_key_var "$PRIMARY_PROVIDER_FOR_PROXY")
|
| 471 |
+
FALLBACK_KEY_VAR=$(get_provider_api_key_var "$FALLBACK_PROVIDER")
|
| 472 |
+
PRIMARY_API_KEY_VAL="${!PRIMARY_KEY_VAR}"
|
| 473 |
+
FALLBACK_API_KEY_VAL="${!FALLBACK_KEY_VAR}"
|
| 474 |
+
|
| 475 |
+
if [ -z "$PRIMARY_KEY_VAR" ] || [ -z "$PRIMARY_API_KEY_VAL" ]; then
|
| 476 |
+
echo " 鉂?涓绘ā鍨?API Key 鏈缃? provider=$PRIMARY_PROVIDER_FOR_PROXY var=$PRIMARY_KEY_VAR"
|
| 477 |
+
exit 1
|
| 478 |
+
fi
|
| 479 |
+
if [ -z "$FALLBACK_KEY_VAR" ] || [ -z "$FALLBACK_API_KEY_VAL" ]; then
|
| 480 |
+
echo " 鉂?fallback API Key 鏈缃? provider=$FALLBACK_PROVIDER var=$FALLBACK_KEY_VAR"
|
| 481 |
+
exit 1
|
| 482 |
+
fi
|
| 483 |
+
|
| 484 |
+
FALLBACK_PROXY_HOST=127.0.0.1 \
|
| 485 |
+
FALLBACK_PROXY_PORT="$FALLBACK_PROXY_PORT" \
|
| 486 |
+
PRIMARY_BASE_URL="$MODEL_BASE_URL" \
|
| 487 |
+
PRIMARY_API_KEY="$PRIMARY_API_KEY_VAL" \
|
| 488 |
+
PRIMARY_MODEL="$MAIN_MODEL" \
|
| 489 |
+
FALLBACK_BASE_URL="$FALLBACK_BASE_URL_VAL" \
|
| 490 |
+
FALLBACK_API_KEY="$FALLBACK_API_KEY_VAL" \
|
| 491 |
+
FALLBACK_MODEL="$FALLBACK_MODEL_VAL" \
|
| 492 |
+
OPENROUTER_HTTP_REFERER="${OPENROUTER_HTTP_REFERER:-https://huggingface.co/spaces/JackKing001/Hermes}" \
|
| 493 |
+
OPENROUTER_X_TITLE="${OPENROUTER_X_TITLE:-Hermes HF Fallback}" \
|
| 494 |
+
python -m src.openai_fallback_proxy &
|
| 495 |
+
FALLBACK_PROXY_PID=$!
|
| 496 |
+
|
| 497 |
+
for i in $(seq 1 10); do
|
| 498 |
+
if curl -sf "http://127.0.0.1:${FALLBACK_PROXY_PORT}/health" > /dev/null 2>&1; then
|
| 499 |
+
echo " 鉁?fallback 浠g悊宸插氨缁?(http://127.0.0.1:${FALLBACK_PROXY_PORT})"
|
| 500 |
+
break
|
| 501 |
+
fi
|
| 502 |
+
sleep 1
|
| 503 |
+
done
|
| 504 |
+
|
| 505 |
+
export OPENAI_API_KEY="local-fallback-proxy"
|
| 506 |
+
export OPENAI_BASE_URL="http://127.0.0.1:${FALLBACK_PROXY_PORT}/v1"
|
| 507 |
+
fi
|
| 508 |
+
|
| 509 |
ENV_FILE="/data/.hermes/.env"
|
| 510 |
mkdir -p /data/.hermes
|
| 511 |
|
| 512 |
PERSISTENT_VARS=(
|
| 513 |
+
"MODEL_PROVIDER" "MODEL_NAME" "MODEL_BASE_URL" "HERMES_MODEL"
|
| 514 |
"VISION_MODEL" "AUX_MODEL" "DELEGATION_MODEL"
|
| 515 |
"NVIDIA_API_KEY" "NVIDIA_BASE_URL"
|
| 516 |
"SILICONFLOW_API_KEY" "SILICONFLOW_BASE_URL"
|
| 517 |
+
"OPENAI_API_KEY" "OPENAI_BASE_URL"
|
| 518 |
+
"ANTHROPIC_API_KEY" "ANTHROPIC_BASE_URL"
|
| 519 |
"GOOGLE_API_KEY" "GEMINI_API_KEY" "GEMINI_BASE_URL"
|
| 520 |
"OPENROUTER_API_KEY" "OPENROUTER_BASE_URL"
|
| 521 |
"LONGCAT_API_KEY" "LONGCAT_BASE_URL"
|
| 522 |
+
"FALLBACK_MODEL_PROVIDER" "FALLBACK_MODEL_NAME" "FALLBACK_MODEL_BASE_URL"
|
| 523 |
+
"FALLBACK_PROXY_PORT" "OPENROUTER_HTTP_REFERER" "OPENROUTER_X_TITLE"
|
| 524 |
"API_SERVER_ENABLED" "API_SERVER_PORT" "API_SERVER_HOST"
|
| 525 |
"TELEGRAM_BOT_TOKEN" "TELEGRAM_ALLOWED_USERS" "TELEGRAM_PROXY"
|
| 526 |
"DISCORD_BOT_TOKEN" "DISCORD_CLIENT_ID"
|
|
|
|
| 1378 |
kill $BFF_PID 2>/dev/null || true
|
| 1379 |
wait $BFF_PID 2>/dev/null || true
|
| 1380 |
fi
|
| 1381 |
+
if [ -n "$FALLBACK_PROXY_PID" ] && kill -0 $FALLBACK_PROXY_PID 2>/dev/null; then
|
| 1382 |
+
echo " 馃洃 鍋滄 fallback 浠g悊..."
|
| 1383 |
+
kill $FALLBACK_PROXY_PID 2>/dev/null || true
|
| 1384 |
+
wait $FALLBACK_PROXY_PID 2>/dev/null || true
|
| 1385 |
+
fi
|
| 1386 |
if [ -n "$GATEWAY_PID" ] && kill -0 $GATEWAY_PID 2>/dev/null; then
|
| 1387 |
echo " 🛑 停止 Gateway..."
|
| 1388 |
kill $GATEWAY_PID 2>/dev/null || true
|
src/openai_fallback_proxy.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""OpenAI-compatible fallback proxy for Hermes on Hugging Face Spaces."""
|
| 3 |
+
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
| 7 |
+
from typing import Any, Dict, Optional
|
| 8 |
+
|
| 9 |
+
import requests
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
LISTEN_HOST = os.environ.get("FALLBACK_PROXY_HOST", "127.0.0.1")
|
| 13 |
+
LISTEN_PORT = int(os.environ.get("FALLBACK_PROXY_PORT", "8787"))
|
| 14 |
+
REQUEST_TIMEOUT = int(os.environ.get("FALLBACK_PROXY_TIMEOUT", "180"))
|
| 15 |
+
|
| 16 |
+
PRIMARY_BASE_URL = os.environ.get("PRIMARY_BASE_URL", "").rstrip("/")
|
| 17 |
+
PRIMARY_API_KEY = os.environ.get("PRIMARY_API_KEY", "")
|
| 18 |
+
PRIMARY_MODEL = os.environ.get("PRIMARY_MODEL", "")
|
| 19 |
+
|
| 20 |
+
FALLBACK_BASE_URL = os.environ.get("FALLBACK_BASE_URL", "https://openrouter.ai/api/v1").rstrip("/")
|
| 21 |
+
FALLBACK_API_KEY = os.environ.get("FALLBACK_API_KEY", "")
|
| 22 |
+
FALLBACK_MODEL = os.environ.get("FALLBACK_MODEL", "openrouter/free")
|
| 23 |
+
FALLBACK_REFERER = os.environ.get("OPENROUTER_HTTP_REFERER", "https://huggingface.co")
|
| 24 |
+
FALLBACK_TITLE = os.environ.get("OPENROUTER_X_TITLE", "Hermes HF Fallback")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def is_retryable(status_code: int) -> bool:
|
| 28 |
+
return status_code in {408, 409, 425, 429, 500, 502, 503, 504}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def build_headers(api_key: str, extra: Optional[Dict[str, str]] = None) -> Dict[str, str]:
|
| 32 |
+
headers = {"Content-Type": "application/json"}
|
| 33 |
+
if api_key:
|
| 34 |
+
headers["Authorization"] = f"Bearer {api_key}"
|
| 35 |
+
if extra:
|
| 36 |
+
headers.update(extra)
|
| 37 |
+
return headers
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def create_upstream_response(
|
| 41 |
+
upstream_base: str,
|
| 42 |
+
payload: Dict[str, Any],
|
| 43 |
+
api_key: str,
|
| 44 |
+
model_override: str,
|
| 45 |
+
extra_headers: Optional[Dict[str, str]] = None,
|
| 46 |
+
) -> requests.Response:
|
| 47 |
+
request_payload = dict(payload)
|
| 48 |
+
request_payload["model"] = model_override
|
| 49 |
+
return requests.post(
|
| 50 |
+
f"{upstream_base}/chat/completions",
|
| 51 |
+
headers=build_headers(api_key, extra_headers),
|
| 52 |
+
json=request_payload,
|
| 53 |
+
timeout=REQUEST_TIMEOUT,
|
| 54 |
+
stream=bool(request_payload.get("stream")),
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class Handler(BaseHTTPRequestHandler):
|
| 59 |
+
server_version = "HermesFallbackProxy/0.1"
|
| 60 |
+
protocol_version = "HTTP/1.1"
|
| 61 |
+
|
| 62 |
+
def _send_json(self, status_code: int, payload: Dict[str, Any]) -> None:
|
| 63 |
+
body = json.dumps(payload).encode("utf-8")
|
| 64 |
+
self.send_response(status_code)
|
| 65 |
+
self.send_header("Content-Type", "application/json")
|
| 66 |
+
self.send_header("Content-Length", str(len(body)))
|
| 67 |
+
self.end_headers()
|
| 68 |
+
self.wfile.write(body)
|
| 69 |
+
|
| 70 |
+
def _read_json(self) -> Optional[Dict[str, Any]]:
|
| 71 |
+
length = int(self.headers.get("Content-Length", "0"))
|
| 72 |
+
raw = self.rfile.read(length) if length > 0 else b"{}"
|
| 73 |
+
try:
|
| 74 |
+
return json.loads(raw.decode("utf-8")) if raw else {}
|
| 75 |
+
except json.JSONDecodeError:
|
| 76 |
+
self._send_json(400, {"error": {"message": "Invalid JSON body"}})
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
def _relay_response(self, response: requests.Response, stream: bool) -> None:
|
| 80 |
+
content_type = response.headers.get("Content-Type", "application/json")
|
| 81 |
+
self.send_response(response.status_code)
|
| 82 |
+
self.send_header("Content-Type", content_type)
|
| 83 |
+
if stream:
|
| 84 |
+
self.send_header("Cache-Control", "no-cache")
|
| 85 |
+
self.send_header("Connection", "keep-alive")
|
| 86 |
+
else:
|
| 87 |
+
self.send_header("Content-Length", str(len(response.content)))
|
| 88 |
+
self.end_headers()
|
| 89 |
+
|
| 90 |
+
if stream:
|
| 91 |
+
for chunk in response.iter_content(chunk_size=1024):
|
| 92 |
+
if chunk:
|
| 93 |
+
self.wfile.write(chunk)
|
| 94 |
+
self.wfile.flush()
|
| 95 |
+
response.close()
|
| 96 |
+
else:
|
| 97 |
+
self.wfile.write(response.content)
|
| 98 |
+
|
| 99 |
+
def do_GET(self) -> None:
|
| 100 |
+
if self.path == "/health":
|
| 101 |
+
self._send_json(
|
| 102 |
+
200,
|
| 103 |
+
{
|
| 104 |
+
"status": "ok",
|
| 105 |
+
"primary_configured": bool(PRIMARY_BASE_URL and PRIMARY_MODEL),
|
| 106 |
+
"fallback_configured": bool(FALLBACK_API_KEY and FALLBACK_MODEL),
|
| 107 |
+
},
|
| 108 |
+
)
|
| 109 |
+
return
|
| 110 |
+
|
| 111 |
+
if self.path == "/v1/models":
|
| 112 |
+
self._send_json(
|
| 113 |
+
200,
|
| 114 |
+
{
|
| 115 |
+
"object": "list",
|
| 116 |
+
"data": [
|
| 117 |
+
{
|
| 118 |
+
"id": PRIMARY_MODEL or FALLBACK_MODEL or "openai-fallback-proxy",
|
| 119 |
+
"object": "model",
|
| 120 |
+
"owned_by": "hermes-local-proxy",
|
| 121 |
+
}
|
| 122 |
+
],
|
| 123 |
+
},
|
| 124 |
+
)
|
| 125 |
+
return
|
| 126 |
+
|
| 127 |
+
self._send_json(404, {"error": {"message": "Not found"}})
|
| 128 |
+
|
| 129 |
+
def do_POST(self) -> None:
|
| 130 |
+
if self.path not in {"/v1/chat/completions", "/chat/completions"}:
|
| 131 |
+
self._send_json(404, {"error": {"message": "Not found"}})
|
| 132 |
+
return
|
| 133 |
+
|
| 134 |
+
payload = self._read_json()
|
| 135 |
+
if payload is None:
|
| 136 |
+
return
|
| 137 |
+
|
| 138 |
+
if not PRIMARY_BASE_URL or not PRIMARY_MODEL:
|
| 139 |
+
self._send_json(500, {"error": {"message": "Primary model not configured"}})
|
| 140 |
+
return
|
| 141 |
+
|
| 142 |
+
stream = bool(payload.get("stream"))
|
| 143 |
+
primary_response = None
|
| 144 |
+
try:
|
| 145 |
+
primary_response = create_upstream_response(
|
| 146 |
+
PRIMARY_BASE_URL,
|
| 147 |
+
payload,
|
| 148 |
+
PRIMARY_API_KEY,
|
| 149 |
+
PRIMARY_MODEL,
|
| 150 |
+
)
|
| 151 |
+
if primary_response.status_code < 400:
|
| 152 |
+
self._relay_response(primary_response, stream)
|
| 153 |
+
return
|
| 154 |
+
if not FALLBACK_API_KEY or not is_retryable(primary_response.status_code):
|
| 155 |
+
self._relay_response(primary_response, False)
|
| 156 |
+
return
|
| 157 |
+
except requests.RequestException as error:
|
| 158 |
+
if not FALLBACK_API_KEY:
|
| 159 |
+
self._send_json(502, {"error": {"message": f"Primary upstream request failed: {error}"}})
|
| 160 |
+
return
|
| 161 |
+
finally:
|
| 162 |
+
if primary_response is not None and not stream:
|
| 163 |
+
primary_response.close()
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
fallback_response = create_upstream_response(
|
| 167 |
+
FALLBACK_BASE_URL,
|
| 168 |
+
payload,
|
| 169 |
+
FALLBACK_API_KEY,
|
| 170 |
+
FALLBACK_MODEL,
|
| 171 |
+
{
|
| 172 |
+
"HTTP-Referer": FALLBACK_REFERER,
|
| 173 |
+
"X-Title": FALLBACK_TITLE,
|
| 174 |
+
},
|
| 175 |
+
)
|
| 176 |
+
self._relay_response(fallback_response, stream)
|
| 177 |
+
except requests.RequestException as error:
|
| 178 |
+
self._send_json(502, {"error": {"message": f"Fallback upstream request failed: {error}"}})
|
| 179 |
+
|
| 180 |
+
def log_message(self, fmt: str, *args: Any) -> None:
|
| 181 |
+
print(f"[fallback-proxy] {self.address_string()} - {fmt % args}")
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def main() -> None:
|
| 185 |
+
server = ThreadingHTTPServer((LISTEN_HOST, LISTEN_PORT), Handler)
|
| 186 |
+
print(f"[fallback-proxy] listening on http://{LISTEN_HOST}:{LISTEN_PORT}")
|
| 187 |
+
server.serve_forever()
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
if __name__ == "__main__":
|
| 191 |
+
main()
|