aaxaxax commited on
Commit
182bfd0
·
1 Parent(s): f7ae0ce

Random key selection for balanced load

Browse files
Files changed (1) hide show
  1. app.py +33 -74
app.py CHANGED
@@ -3,6 +3,7 @@ import httpx
3
  from fastapi import FastAPI, Request
4
  from fastapi.responses import JSONResponse, Response
5
  import time
 
6
 
7
  app = FastAPI()
8
 
@@ -10,47 +11,31 @@ BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
10
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
11
 
12
  OLLAMA_KEYS = []
13
- for i in range(1, 15):
14
  key = os.getenv(f"OLLAMA_KEY_{i}")
15
  if key:
16
  OLLAMA_KEYS.append(key)
17
 
18
  key_status = {}
19
  for idx, k in enumerate(OLLAMA_KEYS, 1):
20
- key_status[k] = {"index": idx, "prefix": k[:8]+"...", "failures": 0, "success": 0, "last_error": None, "healthy": True}
21
 
22
  def log(msg):
23
  print(f"[{time.strftime('%H:%M:%S')}] {msg}")
24
 
25
  @app.get("/")
26
  def root():
 
27
  return {
28
  "status": "ok",
29
- "base_url": BASE_URL,
30
- "master_key": MASTER_API_KEY[:8] + "...",
31
  "keys_loaded": len(OLLAMA_KEYS),
32
- "healthy_keys": sum(1 for v in key_status.values() if v["healthy"]),
33
- "keys_status": {v["prefix"]: {"failures": v["failures"], "success": v["success"], "healthy": v["healthy"]} for v in key_status.values()}
34
  }
35
 
36
- def quick_health_check(key, timeout=5.0):
37
- """Quick ping to check if key is alive"""
38
- try:
39
- resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=timeout)
40
- return resp.status_code == 200
41
- except:
42
- return False
43
-
44
- def get_healthy_keys(max_failures=2):
45
- """Get keys that are healthy (low failures)"""
46
- healthy = [k for k, v in key_status.items() if v["failures"] < max_failures and v["healthy"]]
47
- if not healthy:
48
- # Reset all
49
- for v in key_status.values():
50
- v["failures"] = 0
51
- v["healthy"] = True
52
- return OLLAMA_KEYS[:3]
53
- return healthy[:2] # Only try top 3
54
 
55
  @app.post("/v1/chat/completions")
56
  async def chat(req: Request):
@@ -60,65 +45,45 @@ async def chat(req: Request):
60
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
61
 
62
  body = await req.json()
63
- is_stream = body.get("stream", False)
64
  model = body.get("model", "?")
65
- log(f"REQ: model='{model}' stream={is_stream}")
66
 
67
- # Only try TOP 3 healthy keys - max 3 switches
68
- candidate_keys = get_healthy_keys(max_failures=2)
69
- log(f"Using top {len(candidate_keys)} keys")
 
 
 
 
70
 
71
- for attempt, key in enumerate(candidate_keys):
 
 
 
 
72
  ki = key_status[key]
73
- log(f"TRY #{attempt+1}: key#{ki['index']} ({ki['prefix']})")
74
-
75
  try:
76
- start_time = time.time()
77
  async with httpx.AsyncClient(timeout=30.0) as client:
78
- resp = await client.post(
79
- f"{BASE_URL}/v1/chat/completions",
80
- json=body,
81
- headers={"Authorization": f"Bearer {key}"}
82
- )
83
- elapsed = time.time() - start_time
84
 
85
  if resp.status_code == 200:
86
  ki["success"] += 1
87
- ki["failures"] = 0
88
- ki["healthy"] = True
89
- log(f"OK: key#{ki['index']} in {elapsed:.1f}s")
90
  return Response(resp.content, status_code=200)
91
 
92
  elif resp.status_code == 429:
93
- ki["failures"] += 1
94
  ki["healthy"] = False
95
- log(f"RATE LIMIT: key#{ki['index']} - skip to next")
96
- continue
97
-
98
- elif resp.status_code >= 500:
99
- ki["failures"] += 1
100
- ki["last_error"] = f"{resp.status_code}"
101
- log(f"ERR {resp.status_code}: key#{ki['index']}")
102
  continue
103
-
104
- else:
105
- return Response(resp.content, status_code=resp.status_code)
106
 
107
- except httpx.TimeoutException:
108
- ki["failures"] += 1
109
- ki["healthy"] = False
110
- ki["last_error"] = "timeout"
111
- log(f"TIMEOUT: key#{ki['index']} - already healthy=False, try next")
112
- continue
113
-
114
  except Exception as e:
115
- ki["failures"] += 1
116
- ki["last_error"] = str(e)[:30]
117
- log(f"EXCEPTION: key#{ki['index']} - {e}")
118
  continue
119
 
120
- log(f"ALL FAILED: model='{model}'")
121
- return JSONResponse({"error": "all keys failed after 3 attempts"}, status_code=500)
122
 
123
  @app.get("/v1/models")
124
  def models(req: Request):
@@ -126,17 +91,11 @@ def models(req: Request):
126
  if auth_key != MASTER_API_KEY:
127
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
128
 
129
- log("REQ: GET models")
130
-
131
- for key in OLLAMA_KEYS[:3]:
132
- ki = key_status[key]
133
  try:
134
  resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=10)
135
  if resp.status_code == 200:
136
- ki["success"] += 1
137
- log(f"MODELS OK: key#{ki['index']}")
138
  return Response(resp.content, status_code=200)
139
- except Exception as e:
140
- log(f"MODELS FAIL: key#{ki['index']} - {e}")
141
-
142
  return JSONResponse({"error": "no keys"}, status_code=500)
 
3
  from fastapi import FastAPI, Request
4
  from fastapi.responses import JSONResponse, Response
5
  import time
6
+ import random
7
 
8
  app = FastAPI()
9
 
 
11
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
12
 
13
  OLLAMA_KEYS = []
14
+ for i in range(1, 20):
15
  key = os.getenv(f"OLLAMA_KEY_{i}")
16
  if key:
17
  OLLAMA_KEYS.append(key)
18
 
19
  key_status = {}
20
  for idx, k in enumerate(OLLAMA_KEYS, 1):
21
+ key_status[k] = {"index": idx, "prefix": k[:8]+"...", "success": 0, "healthy": True}
22
 
23
  def log(msg):
24
  print(f"[{time.strftime('%H:%M:%S')}] {msg}")
25
 
26
  @app.get("/")
27
  def root():
28
+ healthy = [k for k, v in key_status.items() if v["healthy"]]
29
  return {
30
  "status": "ok",
 
 
31
  "keys_loaded": len(OLLAMA_KEYS),
32
+ "healthy_keys": len(healthy),
33
+ "keys_status": {v["prefix"]: {"success": v["success"], "healthy": v["healthy"]} for v in key_status.values()}
34
  }
35
 
36
+ def get_healthy_keys():
37
+ """Get all healthy keys"""
38
+ return [k for k, v in key_status.items() if v["healthy"]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  @app.post("/v1/chat/completions")
41
  async def chat(req: Request):
 
45
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
46
 
47
  body = await req.json()
 
48
  model = body.get("model", "?")
 
49
 
50
+ # Get all healthy keys
51
+ healthy = get_healthy_keys()
52
+ if not healthy:
53
+ # Reset all
54
+ for v in key_status.values():
55
+ v["healthy"] = True
56
+ healthy = OLLAMA_KEYS[:2]
57
 
58
+ # RANDOMLY pick up to 2 keys
59
+ selected = random.sample(healthy, min(2, len(healthy)))
60
+ log(f"REQ: model='{model}' | Using: {[key_status[k]['prefix'] for k in selected]}")
61
+
62
+ for key in selected:
63
  ki = key_status[key]
 
 
64
  try:
65
+ start = time.time()
66
  async with httpx.AsyncClient(timeout=30.0) as client:
67
+ resp = await client.post(f"{BASE_URL}/v1/chat/completions", json=body, headers={"Authorization": f"Bearer {key}"})
68
+ elapsed = time.time() - start
 
 
 
 
69
 
70
  if resp.status_code == 200:
71
  ki["success"] += 1
72
+ log(f"OK: key#{ki['index']} ({ki['prefix']}) in {elapsed:.2f}s")
 
 
73
  return Response(resp.content, status_code=200)
74
 
75
  elif resp.status_code == 429:
 
76
  ki["healthy"] = False
77
+ log(f"RATE LIMIT: key#{ki['index']} - marking unhealthy")
 
 
 
 
 
 
78
  continue
 
 
 
79
 
 
 
 
 
 
 
 
80
  except Exception as e:
81
+ ki["healthy"] = False
82
+ log(f"FAIL: key#{ki['index']} - {str(e)[:30]}")
 
83
  continue
84
 
85
+ log(f"ALL FAILED")
86
+ return JSONResponse({"error": "all keys failed"}, status_code=500)
87
 
88
  @app.get("/v1/models")
89
  def models(req: Request):
 
91
  if auth_key != MASTER_API_KEY:
92
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
93
 
94
+ for key in get_healthy_keys()[:2]:
 
 
 
95
  try:
96
  resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=10)
97
  if resp.status_code == 200:
 
 
98
  return Response(resp.content, status_code=200)
99
+ except:
100
+ pass
 
101
  return JSONResponse({"error": "no keys"}, status_code=500)