GitHub Action commited on
Commit
b53785b
·
1 Parent(s): 5f890f7

Auto deploy from GitHub 2026-05-11 11:19:17

Browse files
Files changed (2) hide show
  1. entrypoint.sh +117 -4
  2. src/openai_fallback_proxy.py +191 -0
entrypoint.sh CHANGED
@@ -82,6 +82,38 @@ declare -A PROVIDER_BASE_URLS=(
82
  ["longcat"]="https://api.longcat.chat/openai"
83
  )
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # ---- 检测主模型 ----
86
  detect_main_model() {
87
  if [ -n "$MODEL_PROVIDER" ] && [ -n "$MODEL_NAME" ]; then
@@ -144,9 +176,35 @@ echo "⚡ Aux Model: ${AUX_MODEL_VAL:-auto-detect}"
144
  DELEGATION_MODEL_VAL=$(detect_delegation_model)
145
  echo "💻 Delegation Model: ${DELEGATION_MODEL_VAL:-inherit-main}"
146
 
147
- MAIN_BASE_URL="${PROVIDER_BASE_URLS[$MAIN_PROVIDER]}"
 
 
 
 
148
  echo " Base URL: $MAIN_BASE_URL"
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  echo "────────────────────────────────────────"
151
 
152
  # ==================== 生成 config.yaml ====================
@@ -354,6 +412,12 @@ fi
354
  if [ -n "$SILICONFLOW_API_KEY" ]; then
355
  export SILICONFLOW_BASE_URL="${SILICONFLOW_BASE_URL:-https://api.siliconflow.cn/v1}"
356
  fi
 
 
 
 
 
 
357
  if [ -n "$GEMINI_API_KEY" ]; then
358
  export GEMINI_BASE_URL="${GEMINI_BASE_URL:-https://generativelanguage.googleapis.com}"
359
  fi
@@ -400,19 +464,63 @@ echo " ✅ HERMES_MODEL=$HERMES_MODEL (进程级模型覆盖)"
400
 
401
  # ==================== 环境变量注入 ====================
402
  echo "⚙️ 注入环境变量到 .env..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  ENV_FILE="/data/.hermes/.env"
404
  mkdir -p /data/.hermes
405
 
406
  PERSISTENT_VARS=(
407
- "MODEL_PROVIDER" "MODEL_NAME" "HERMES_MODEL"
408
  "VISION_MODEL" "AUX_MODEL" "DELEGATION_MODEL"
409
  "NVIDIA_API_KEY" "NVIDIA_BASE_URL"
410
  "SILICONFLOW_API_KEY" "SILICONFLOW_BASE_URL"
411
- "OPENAI_API_KEY"
412
- "ANTHROPIC_API_KEY"
413
  "GOOGLE_API_KEY" "GEMINI_API_KEY" "GEMINI_BASE_URL"
414
  "OPENROUTER_API_KEY" "OPENROUTER_BASE_URL"
415
  "LONGCAT_API_KEY" "LONGCAT_BASE_URL"
 
 
416
  "API_SERVER_ENABLED" "API_SERVER_PORT" "API_SERVER_HOST"
417
  "TELEGRAM_BOT_TOKEN" "TELEGRAM_ALLOWED_USERS" "TELEGRAM_PROXY"
418
  "DISCORD_BOT_TOKEN" "DISCORD_CLIENT_ID"
@@ -1270,6 +1378,11 @@ cleanup() {
1270
  kill $BFF_PID 2>/dev/null || true
1271
  wait $BFF_PID 2>/dev/null || true
1272
  fi
 
 
 
 
 
1273
  if [ -n "$GATEWAY_PID" ] && kill -0 $GATEWAY_PID 2>/dev/null; then
1274
  echo " 🛑 停止 Gateway..."
1275
  kill $GATEWAY_PID 2>/dev/null || true
 
82
  ["longcat"]="https://api.longcat.chat/openai"
83
  )
84
 
85
+ FALLBACK_PROXY_PORT="${FALLBACK_PROXY_PORT:-8787}"
86
+
87
+ get_provider_base_url() {
88
+ local provider="$1"
89
+ case "$provider" in
90
+ nvidia) echo "${NVIDIA_BASE_URL:-${PROVIDER_BASE_URLS[nvidia]}}" ;;
91
+ siliconflow) echo "${SILICONFLOW_BASE_URL:-${PROVIDER_BASE_URLS[siliconflow]}}" ;;
92
+ openai) echo "${OPENAI_BASE_URL:-${PROVIDER_BASE_URLS[openai]}}" ;;
93
+ anthropic) echo "${ANTHROPIC_BASE_URL:-${PROVIDER_BASE_URLS[anthropic]}}" ;;
94
+ google) echo "${GOOGLE_BASE_URL:-${PROVIDER_BASE_URLS[google]}}" ;;
95
+ gemini) echo "${GEMINI_BASE_URL:-${PROVIDER_BASE_URLS[gemini]}}" ;;
96
+ openrouter) echo "${OPENROUTER_BASE_URL:-${PROVIDER_BASE_URLS[openrouter]}}" ;;
97
+ longcat) echo "${LONGCAT_BASE_URL:-${PROVIDER_BASE_URLS[longcat]}}" ;;
98
+ *) echo "" ;;
99
+ esac
100
+ }
101
+
102
+ get_provider_api_key_var() {
103
+ local provider="$1"
104
+ case "$provider" in
105
+ nvidia) echo "NVIDIA_API_KEY" ;;
106
+ siliconflow) echo "SILICONFLOW_API_KEY" ;;
107
+ openai) echo "OPENAI_API_KEY" ;;
108
+ anthropic) echo "ANTHROPIC_API_KEY" ;;
109
+ google) echo "GOOGLE_API_KEY" ;;
110
+ gemini) echo "GEMINI_API_KEY" ;;
111
+ openrouter) echo "OPENROUTER_API_KEY" ;;
112
+ longcat) echo "LONGCAT_API_KEY" ;;
113
+ *) echo "" ;;
114
+ esac
115
+ }
116
+
117
  # ---- 检测主模型 ----
118
  detect_main_model() {
119
  if [ -n "$MODEL_PROVIDER" ] && [ -n "$MODEL_NAME" ]; then
 
176
  DELEGATION_MODEL_VAL=$(detect_delegation_model)
177
  echo "💻 Delegation Model: ${DELEGATION_MODEL_VAL:-inherit-main}"
178
 
179
+ PRIMARY_PROVIDER_FOR_PROXY="$MAIN_PROVIDER"
180
+ MAIN_BASE_URL="$(get_provider_base_url "$MAIN_PROVIDER")"
181
+ if [ -n "$MODEL_BASE_URL" ]; then
182
+ MAIN_BASE_URL="$MODEL_BASE_URL"
183
+ fi
184
  echo " Base URL: $MAIN_BASE_URL"
185
 
186
+ FALLBACK_PROVIDER="${FALLBACK_MODEL_PROVIDER:-}"
187
+ FALLBACK_MODEL_VAL="${FALLBACK_MODEL_NAME:-}"
188
+ FALLBACK_BASE_URL_VAL="${FALLBACK_MODEL_BASE_URL:-}"
189
+ if [ -n "$FALLBACK_PROVIDER" ] && [ -z "$FALLBACK_MODEL_VAL" ] && [ "$FALLBACK_PROVIDER" = "openrouter" ]; then
190
+ FALLBACK_MODEL_VAL="openrouter/free"
191
+ fi
192
+ if [ -n "$FALLBACK_PROVIDER" ] && [ -z "$FALLBACK_BASE_URL_VAL" ]; then
193
+ FALLBACK_BASE_URL_VAL="$(get_provider_base_url "$FALLBACK_PROVIDER")"
194
+ fi
195
+
196
+ USE_MAIN_FALLBACK_PROXY=false
197
+ if [ -n "$MODEL_BASE_URL" ] && [ -n "$FALLBACK_PROVIDER" ] && [ -n "$FALLBACK_MODEL_VAL" ]; then
198
+ USE_MAIN_FALLBACK_PROXY=true
199
+ MAIN_PROVIDER="openai"
200
+ MAIN_BASE_URL="http://127.0.0.1:${FALLBACK_PROXY_PORT}/v1"
201
+ fi
202
+
203
+ if [ "$USE_MAIN_FALLBACK_PROXY" = true ]; then
204
+ echo "馃洂 Fallback Model: $FALLBACK_PROVIDER/$FALLBACK_MODEL_VAL"
205
+ echo " Fallback URL: $FALLBACK_BASE_URL_VAL"
206
+ fi
207
+
208
  echo "────────────────────────────────────────"
209
 
210
  # ==================== 生成 config.yaml ====================
 
412
  if [ -n "$SILICONFLOW_API_KEY" ]; then
413
  export SILICONFLOW_BASE_URL="${SILICONFLOW_BASE_URL:-https://api.siliconflow.cn/v1}"
414
  fi
415
+ if [ -n "$OPENAI_API_KEY" ]; then
416
+ export OPENAI_BASE_URL="${OPENAI_BASE_URL:-https://api.openai.com/v1}"
417
+ fi
418
+ if [ -n "$ANTHROPIC_API_KEY" ]; then
419
+ export ANTHROPIC_BASE_URL="${ANTHROPIC_BASE_URL:-https://api.anthropic.com/v1}"
420
+ fi
421
  if [ -n "$GEMINI_API_KEY" ]; then
422
  export GEMINI_BASE_URL="${GEMINI_BASE_URL:-https://generativelanguage.googleapis.com}"
423
  fi
 
464
 
465
  # ==================== 环境变量注入 ====================
466
  echo "⚙️ 注入环境变量到 .env..."
467
+ FALLBACK_PROXY_PID=""
468
+ if [ "$USE_MAIN_FALLBACK_PROXY" = true ]; then
469
+ echo "馃洂 鍚姩涓绘ā鍨?fallback 浠g悊..."
470
+ PRIMARY_KEY_VAR=$(get_provider_api_key_var "$PRIMARY_PROVIDER_FOR_PROXY")
471
+ FALLBACK_KEY_VAR=$(get_provider_api_key_var "$FALLBACK_PROVIDER")
472
+ PRIMARY_API_KEY_VAL="${!PRIMARY_KEY_VAR}"
473
+ FALLBACK_API_KEY_VAL="${!FALLBACK_KEY_VAR}"
474
+
475
+ if [ -z "$PRIMARY_KEY_VAR" ] || [ -z "$PRIMARY_API_KEY_VAL" ]; then
476
+ echo " 鉂?涓绘ā鍨?API Key 鏈缃? provider=$PRIMARY_PROVIDER_FOR_PROXY var=$PRIMARY_KEY_VAR"
477
+ exit 1
478
+ fi
479
+ if [ -z "$FALLBACK_KEY_VAR" ] || [ -z "$FALLBACK_API_KEY_VAL" ]; then
480
+ echo " 鉂?fallback API Key 鏈缃? provider=$FALLBACK_PROVIDER var=$FALLBACK_KEY_VAR"
481
+ exit 1
482
+ fi
483
+
484
+ FALLBACK_PROXY_HOST=127.0.0.1 \
485
+ FALLBACK_PROXY_PORT="$FALLBACK_PROXY_PORT" \
486
+ PRIMARY_BASE_URL="$MODEL_BASE_URL" \
487
+ PRIMARY_API_KEY="$PRIMARY_API_KEY_VAL" \
488
+ PRIMARY_MODEL="$MAIN_MODEL" \
489
+ FALLBACK_BASE_URL="$FALLBACK_BASE_URL_VAL" \
490
+ FALLBACK_API_KEY="$FALLBACK_API_KEY_VAL" \
491
+ FALLBACK_MODEL="$FALLBACK_MODEL_VAL" \
492
+ OPENROUTER_HTTP_REFERER="${OPENROUTER_HTTP_REFERER:-https://huggingface.co/spaces/JackKing001/Hermes}" \
493
+ OPENROUTER_X_TITLE="${OPENROUTER_X_TITLE:-Hermes HF Fallback}" \
494
+ python -m src.openai_fallback_proxy &
495
+ FALLBACK_PROXY_PID=$!
496
+
497
+ for i in $(seq 1 10); do
498
+ if curl -sf "http://127.0.0.1:${FALLBACK_PROXY_PORT}/health" > /dev/null 2>&1; then
499
+ echo " 鉁?fallback 浠g悊宸插氨缁?(http://127.0.0.1:${FALLBACK_PROXY_PORT})"
500
+ break
501
+ fi
502
+ sleep 1
503
+ done
504
+
505
+ export OPENAI_API_KEY="local-fallback-proxy"
506
+ export OPENAI_BASE_URL="http://127.0.0.1:${FALLBACK_PROXY_PORT}/v1"
507
+ fi
508
+
509
  ENV_FILE="/data/.hermes/.env"
510
  mkdir -p /data/.hermes
511
 
512
  PERSISTENT_VARS=(
513
+ "MODEL_PROVIDER" "MODEL_NAME" "MODEL_BASE_URL" "HERMES_MODEL"
514
  "VISION_MODEL" "AUX_MODEL" "DELEGATION_MODEL"
515
  "NVIDIA_API_KEY" "NVIDIA_BASE_URL"
516
  "SILICONFLOW_API_KEY" "SILICONFLOW_BASE_URL"
517
+ "OPENAI_API_KEY" "OPENAI_BASE_URL"
518
+ "ANTHROPIC_API_KEY" "ANTHROPIC_BASE_URL"
519
  "GOOGLE_API_KEY" "GEMINI_API_KEY" "GEMINI_BASE_URL"
520
  "OPENROUTER_API_KEY" "OPENROUTER_BASE_URL"
521
  "LONGCAT_API_KEY" "LONGCAT_BASE_URL"
522
+ "FALLBACK_MODEL_PROVIDER" "FALLBACK_MODEL_NAME" "FALLBACK_MODEL_BASE_URL"
523
+ "FALLBACK_PROXY_PORT" "OPENROUTER_HTTP_REFERER" "OPENROUTER_X_TITLE"
524
  "API_SERVER_ENABLED" "API_SERVER_PORT" "API_SERVER_HOST"
525
  "TELEGRAM_BOT_TOKEN" "TELEGRAM_ALLOWED_USERS" "TELEGRAM_PROXY"
526
  "DISCORD_BOT_TOKEN" "DISCORD_CLIENT_ID"
 
1378
  kill $BFF_PID 2>/dev/null || true
1379
  wait $BFF_PID 2>/dev/null || true
1380
  fi
1381
+ if [ -n "$FALLBACK_PROXY_PID" ] && kill -0 $FALLBACK_PROXY_PID 2>/dev/null; then
1382
+ echo " 馃洃 鍋滄 fallback 浠g悊..."
1383
+ kill $FALLBACK_PROXY_PID 2>/dev/null || true
1384
+ wait $FALLBACK_PROXY_PID 2>/dev/null || true
1385
+ fi
1386
  if [ -n "$GATEWAY_PID" ] && kill -0 $GATEWAY_PID 2>/dev/null; then
1387
  echo " 🛑 停止 Gateway..."
1388
  kill $GATEWAY_PID 2>/dev/null || true
src/openai_fallback_proxy.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """OpenAI-compatible fallback proxy for Hermes on Hugging Face Spaces."""
3
+
4
+ import json
5
+ import os
6
+ from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
7
+ from typing import Any, Dict, Optional
8
+
9
+ import requests
10
+
11
+
12
+ LISTEN_HOST = os.environ.get("FALLBACK_PROXY_HOST", "127.0.0.1")
13
+ LISTEN_PORT = int(os.environ.get("FALLBACK_PROXY_PORT", "8787"))
14
+ REQUEST_TIMEOUT = int(os.environ.get("FALLBACK_PROXY_TIMEOUT", "180"))
15
+
16
+ PRIMARY_BASE_URL = os.environ.get("PRIMARY_BASE_URL", "").rstrip("/")
17
+ PRIMARY_API_KEY = os.environ.get("PRIMARY_API_KEY", "")
18
+ PRIMARY_MODEL = os.environ.get("PRIMARY_MODEL", "")
19
+
20
+ FALLBACK_BASE_URL = os.environ.get("FALLBACK_BASE_URL", "https://openrouter.ai/api/v1").rstrip("/")
21
+ FALLBACK_API_KEY = os.environ.get("FALLBACK_API_KEY", "")
22
+ FALLBACK_MODEL = os.environ.get("FALLBACK_MODEL", "openrouter/free")
23
+ FALLBACK_REFERER = os.environ.get("OPENROUTER_HTTP_REFERER", "https://huggingface.co")
24
+ FALLBACK_TITLE = os.environ.get("OPENROUTER_X_TITLE", "Hermes HF Fallback")
25
+
26
+
27
+ def is_retryable(status_code: int) -> bool:
28
+ return status_code in {408, 409, 425, 429, 500, 502, 503, 504}
29
+
30
+
31
+ def build_headers(api_key: str, extra: Optional[Dict[str, str]] = None) -> Dict[str, str]:
32
+ headers = {"Content-Type": "application/json"}
33
+ if api_key:
34
+ headers["Authorization"] = f"Bearer {api_key}"
35
+ if extra:
36
+ headers.update(extra)
37
+ return headers
38
+
39
+
40
+ def create_upstream_response(
41
+ upstream_base: str,
42
+ payload: Dict[str, Any],
43
+ api_key: str,
44
+ model_override: str,
45
+ extra_headers: Optional[Dict[str, str]] = None,
46
+ ) -> requests.Response:
47
+ request_payload = dict(payload)
48
+ request_payload["model"] = model_override
49
+ return requests.post(
50
+ f"{upstream_base}/chat/completions",
51
+ headers=build_headers(api_key, extra_headers),
52
+ json=request_payload,
53
+ timeout=REQUEST_TIMEOUT,
54
+ stream=bool(request_payload.get("stream")),
55
+ )
56
+
57
+
58
+ class Handler(BaseHTTPRequestHandler):
59
+ server_version = "HermesFallbackProxy/0.1"
60
+ protocol_version = "HTTP/1.1"
61
+
62
+ def _send_json(self, status_code: int, payload: Dict[str, Any]) -> None:
63
+ body = json.dumps(payload).encode("utf-8")
64
+ self.send_response(status_code)
65
+ self.send_header("Content-Type", "application/json")
66
+ self.send_header("Content-Length", str(len(body)))
67
+ self.end_headers()
68
+ self.wfile.write(body)
69
+
70
+ def _read_json(self) -> Optional[Dict[str, Any]]:
71
+ length = int(self.headers.get("Content-Length", "0"))
72
+ raw = self.rfile.read(length) if length > 0 else b"{}"
73
+ try:
74
+ return json.loads(raw.decode("utf-8")) if raw else {}
75
+ except json.JSONDecodeError:
76
+ self._send_json(400, {"error": {"message": "Invalid JSON body"}})
77
+ return None
78
+
79
+ def _relay_response(self, response: requests.Response, stream: bool) -> None:
80
+ content_type = response.headers.get("Content-Type", "application/json")
81
+ self.send_response(response.status_code)
82
+ self.send_header("Content-Type", content_type)
83
+ if stream:
84
+ self.send_header("Cache-Control", "no-cache")
85
+ self.send_header("Connection", "keep-alive")
86
+ else:
87
+ self.send_header("Content-Length", str(len(response.content)))
88
+ self.end_headers()
89
+
90
+ if stream:
91
+ for chunk in response.iter_content(chunk_size=1024):
92
+ if chunk:
93
+ self.wfile.write(chunk)
94
+ self.wfile.flush()
95
+ response.close()
96
+ else:
97
+ self.wfile.write(response.content)
98
+
99
+ def do_GET(self) -> None:
100
+ if self.path == "/health":
101
+ self._send_json(
102
+ 200,
103
+ {
104
+ "status": "ok",
105
+ "primary_configured": bool(PRIMARY_BASE_URL and PRIMARY_MODEL),
106
+ "fallback_configured": bool(FALLBACK_API_KEY and FALLBACK_MODEL),
107
+ },
108
+ )
109
+ return
110
+
111
+ if self.path == "/v1/models":
112
+ self._send_json(
113
+ 200,
114
+ {
115
+ "object": "list",
116
+ "data": [
117
+ {
118
+ "id": PRIMARY_MODEL or FALLBACK_MODEL or "openai-fallback-proxy",
119
+ "object": "model",
120
+ "owned_by": "hermes-local-proxy",
121
+ }
122
+ ],
123
+ },
124
+ )
125
+ return
126
+
127
+ self._send_json(404, {"error": {"message": "Not found"}})
128
+
129
+ def do_POST(self) -> None:
130
+ if self.path not in {"/v1/chat/completions", "/chat/completions"}:
131
+ self._send_json(404, {"error": {"message": "Not found"}})
132
+ return
133
+
134
+ payload = self._read_json()
135
+ if payload is None:
136
+ return
137
+
138
+ if not PRIMARY_BASE_URL or not PRIMARY_MODEL:
139
+ self._send_json(500, {"error": {"message": "Primary model not configured"}})
140
+ return
141
+
142
+ stream = bool(payload.get("stream"))
143
+ primary_response = None
144
+ try:
145
+ primary_response = create_upstream_response(
146
+ PRIMARY_BASE_URL,
147
+ payload,
148
+ PRIMARY_API_KEY,
149
+ PRIMARY_MODEL,
150
+ )
151
+ if primary_response.status_code < 400:
152
+ self._relay_response(primary_response, stream)
153
+ return
154
+ if not FALLBACK_API_KEY or not is_retryable(primary_response.status_code):
155
+ self._relay_response(primary_response, False)
156
+ return
157
+ except requests.RequestException as error:
158
+ if not FALLBACK_API_KEY:
159
+ self._send_json(502, {"error": {"message": f"Primary upstream request failed: {error}"}})
160
+ return
161
+ finally:
162
+ if primary_response is not None and not stream:
163
+ primary_response.close()
164
+
165
+ try:
166
+ fallback_response = create_upstream_response(
167
+ FALLBACK_BASE_URL,
168
+ payload,
169
+ FALLBACK_API_KEY,
170
+ FALLBACK_MODEL,
171
+ {
172
+ "HTTP-Referer": FALLBACK_REFERER,
173
+ "X-Title": FALLBACK_TITLE,
174
+ },
175
+ )
176
+ self._relay_response(fallback_response, stream)
177
+ except requests.RequestException as error:
178
+ self._send_json(502, {"error": {"message": f"Fallback upstream request failed: {error}"}})
179
+
180
+ def log_message(self, fmt: str, *args: Any) -> None:
181
+ print(f"[fallback-proxy] {self.address_string()} - {fmt % args}")
182
+
183
+
184
+ def main() -> None:
185
+ server = ThreadingHTTPServer((LISTEN_HOST, LISTEN_PORT), Handler)
186
+ print(f"[fallback-proxy] listening on http://{LISTEN_HOST}:{LISTEN_PORT}")
187
+ server.serve_forever()
188
+
189
+
190
+ if __name__ == "__main__":
191
+ main()