aaxaxax commited on
Commit
14345a7
·
1 Parent(s): 3bef6c3

Better error handling and retry

Browse files
Files changed (1) hide show
  1. app.py +53 -11
app.py CHANGED
@@ -2,55 +2,97 @@ import os
2
  import httpx
3
  from fastapi import FastAPI, Request, HTTPException
4
  from fastapi.responses import JSONResponse, Response
 
5
 
6
  app = FastAPI()
7
 
8
- # Environment variables
9
  BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
10
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "dummy")
11
 
12
- # Load Ollama keys from env
13
  OLLAMA_KEYS = []
14
  for i in range(1, 10):
15
  key = os.getenv(f"OLLAMA_KEY_{i}")
16
  if key:
17
  OLLAMA_KEYS.append(key)
18
 
 
 
 
 
19
  @app.get("/")
20
  def root():
21
  return {
22
  "status": "ok",
23
  "master_key": MASTER_API_KEY[:10] + "...",
24
- "ollama_keys": len(OLLAMA_KEYS)
 
25
  }
26
 
 
 
 
 
 
 
 
 
 
 
27
  @app.post("/v1/chat/completions")
28
  async def chat(req: Request):
29
- # Auth
30
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
31
  if auth_key != MASTER_API_KEY:
32
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
33
 
34
  body = await req.json()
 
35
 
36
- # Try each key
37
- for key in OLLAMA_KEYS:
 
38
  try:
39
- async with httpx.AsyncClient(timeout=60) as client:
40
  resp = await client.post(
41
  f"{BASE_URL}/v1/chat/completions",
42
  json=body,
43
- headers={"Authorization": f"Bearer {key}"}
 
 
 
44
  )
 
45
  if resp.status_code == 200:
 
46
  return Response(resp.content, status_code=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  except Exception as e:
 
48
  continue
49
 
50
- return JSONResponse({"error": "all keys failed"}, status_code=500)
 
 
 
51
 
52
  @app.get("/v1/models")
53
- def models():
54
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
55
  if auth_key != MASTER_API_KEY:
56
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
@@ -67,4 +109,4 @@ def models():
67
  except:
68
  pass
69
 
70
- return JSONResponse({"error": "error"}, status_code=500)
 
2
  import httpx
3
  from fastapi import FastAPI, Request, HTTPException
4
  from fastapi.responses import JSONResponse, Response
5
+ import time
6
 
7
  app = FastAPI()
8
 
 
9
  BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
10
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "dummy")
11
 
12
+ # Load Ollama keys
13
  OLLAMA_KEYS = []
14
  for i in range(1, 10):
15
  key = os.getenv(f"OLLAMA_KEY_{i}")
16
  if key:
17
  OLLAMA_KEYS.append(key)
18
 
19
+ # Track key failures
20
+ key_failures = {k: 0 for k in OLLAMA_KEYS}
21
+ last_used_key = {}
22
+
23
  @app.get("/")
24
  def root():
25
  return {
26
  "status": "ok",
27
  "master_key": MASTER_API_KEY[:10] + "...",
28
+ "ollama_keys": len(OLLAMA_KEYS),
29
+ "key_failures": sum(key_failures.values())
30
  }
31
 
32
+ def get_key():
33
+ """Get least failed key"""
34
+ for k, v in sorted(key_failures.items(), key=lambda x: x[1]):
35
+ if v < 3:
36
+ return k
37
+ # Reset if all failed
38
+ for k in key_failures:
39
+ key_failures[k] = 0
40
+ return OLLAMA_KEYS[0]
41
+
42
  @app.post("/v1/chat/completions")
43
  async def chat(req: Request):
 
44
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
45
  if auth_key != MASTER_API_KEY:
46
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
47
 
48
  body = await req.json()
49
+ max_retries = 3
50
 
51
+ for attempt in range(max_retries):
52
+ key = get_key()
53
+
54
  try:
55
+ async with httpx.AsyncClient(timeout=120.0) as client:
56
  resp = await client.post(
57
  f"{BASE_URL}/v1/chat/completions",
58
  json=body,
59
+ headers={
60
+ "Authorization": f"Bearer {key}",
61
+ "Content-Type": "application/json"
62
+ }
63
  )
64
+
65
  if resp.status_code == 200:
66
+ key_failures[key] = 0 # Reset on success
67
  return Response(resp.content, status_code=200)
68
+
69
+ elif resp.status_code == 429:
70
+ # Rate limited - try another key
71
+ key_failures[key] = key_failures.get(key, 0) + 1
72
+ continue
73
+
74
+ elif resp.status_code >= 500:
75
+ key_failures[key] = key_failures.get(key, 0) + 1
76
+ continue
77
+
78
+ else:
79
+ # Other error - return as is
80
+ return Response(resp.content, status_code=resp.status_code)
81
+
82
+ except httpx.TimeoutException:
83
+ key_failures[key] = key_failures.get(key, 0) + 1
84
+ continue
85
  except Exception as e:
86
+ key_failures[key] = key_failures.get(key, 0) + 1
87
  continue
88
 
89
+ return JSONResponse({
90
+ "error": "all keys failed",
91
+ "key_failures": {k[:10]: v for k, v in key_failures.items()}
92
+ }, status_code=500)
93
 
94
  @app.get("/v1/models")
95
+ def models(req: Request):
96
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
97
  if auth_key != MASTER_API_KEY:
98
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
 
109
  except:
110
  pass
111
 
112
+ return JSONResponse({"error": "no keys available"}, status_code=500)