aaxaxax commited on
Commit
cf504ff
·
1 Parent(s): 182bfd0

Random load balance

Browse files
Files changed (1) hide show
  1. app.py +7 -23
app.py CHANGED
@@ -26,38 +26,29 @@ def log(msg):
26
  @app.get("/")
27
  def root():
28
  healthy = [k for k, v in key_status.items() if v["healthy"]]
29
- return {
30
- "status": "ok",
31
- "keys_loaded": len(OLLAMA_KEYS),
32
- "healthy_keys": len(healthy),
33
- "keys_status": {v["prefix"]: {"success": v["success"], "healthy": v["healthy"]} for v in key_status.values()}
34
- }
35
 
36
  def get_healthy_keys():
37
- """Get all healthy keys"""
38
  return [k for k, v in key_status.items() if v["healthy"]]
39
 
40
  @app.post("/v1/chat/completions")
41
  async def chat(req: Request):
42
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
43
  if auth_key != MASTER_API_KEY:
44
- log(f"AUTH FAIL")
45
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
46
 
47
  body = await req.json()
48
  model = body.get("model", "?")
49
 
50
- # Get all healthy keys
51
  healthy = get_healthy_keys()
52
  if not healthy:
53
- # Reset all
54
- for v in key_status.values():
55
- v["healthy"] = True
56
  healthy = OLLAMA_KEYS[:2]
57
 
58
- # RANDOMLY pick up to 2 keys
59
  selected = random.sample(healthy, min(2, len(healthy)))
60
- log(f"REQ: model='{model}' | Using: {[key_status[k]['prefix'] for k in selected]}")
61
 
62
  for key in selected:
63
  ki = key_status[key]
@@ -65,32 +56,25 @@ async def chat(req: Request):
65
  start = time.time()
66
  async with httpx.AsyncClient(timeout=30.0) as client:
67
  resp = await client.post(f"{BASE_URL}/v1/chat/completions", json=body, headers={"Authorization": f"Bearer {key}"})
68
- elapsed = time.time() - start
69
 
70
  if resp.status_code == 200:
71
  ki["success"] += 1
72
- log(f"OK: key#{ki['index']} ({ki['prefix']}) in {elapsed:.2f}s")
73
  return Response(resp.content, status_code=200)
74
-
75
  elif resp.status_code == 429:
76
  ki["healthy"] = False
77
- log(f"RATE LIMIT: key#{ki['index']} - marking unhealthy")
78
  continue
79
-
80
  except Exception as e:
81
  ki["healthy"] = False
82
- log(f"FAIL: key#{ki['index']} - {str(e)[:30]}")
83
  continue
84
 
85
- log(f"ALL FAILED")
86
- return JSONResponse({"error": "all keys failed"}, status_code=500)
87
 
88
  @app.get("/v1/models")
89
  def models(req: Request):
90
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
91
  if auth_key != MASTER_API_KEY:
92
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
93
-
94
  for key in get_healthy_keys()[:2]:
95
  try:
96
  resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=10)
 
26
  @app.get("/")
27
  def root():
28
  healthy = [k for k, v in key_status.items() if v["healthy"]]
29
+ return {"status": "ok", "keys_loaded": len(OLLAMA_KEYS), "healthy": len(healthy),
30
+ "keys_status": {v["prefix"]: {"s": v["success"], "h": v["healthy"]} for v in key_status.values()}}
 
 
 
 
31
 
32
  def get_healthy_keys():
 
33
  return [k for k, v in key_status.items() if v["healthy"]]
34
 
35
  @app.post("/v1/chat/completions")
36
  async def chat(req: Request):
37
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
38
  if auth_key != MASTER_API_KEY:
 
39
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
40
 
41
  body = await req.json()
42
  model = body.get("model", "?")
43
 
 
44
  healthy = get_healthy_keys()
45
  if not healthy:
46
+ for v in key_status.values(): v["healthy"] = True
 
 
47
  healthy = OLLAMA_KEYS[:2]
48
 
49
+ # RANDOMLY pick 2 keys
50
  selected = random.sample(healthy, min(2, len(healthy)))
51
+ log(f"REQ: {model} -> {[key_status[k]['prefix'] for k in selected]}")
52
 
53
  for key in selected:
54
  ki = key_status[key]
 
56
  start = time.time()
57
  async with httpx.AsyncClient(timeout=30.0) as client:
58
  resp = await client.post(f"{BASE_URL}/v1/chat/completions", json=body, headers={"Authorization": f"Bearer {key}"})
 
59
 
60
  if resp.status_code == 200:
61
  ki["success"] += 1
62
+ log(f"OK: key#{ki['index']} in {time.time()-start:.2f}s")
63
  return Response(resp.content, status_code=200)
 
64
  elif resp.status_code == 429:
65
  ki["healthy"] = False
 
66
  continue
 
67
  except Exception as e:
68
  ki["healthy"] = False
 
69
  continue
70
 
71
+ return JSONResponse({"error": "all failed"}, status_code=500)
 
72
 
73
  @app.get("/v1/models")
74
  def models(req: Request):
75
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
76
  if auth_key != MASTER_API_KEY:
77
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
 
78
  for key in get_healthy_keys()[:2]:
79
  try:
80
  resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=10)