Spaces:

Elysiadev11
/

proxyollma

Sleeping

App Files Files Community

aaxaxax commited on 24 days ago

Commit

182bfd0

1 Parent(s): f7ae0ce

Random key selection for balanced load

Browse files

Files changed (1) hide show

app.py +33 -74

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import httpx
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse, Response
 import time
 app = FastAPI()
@@ -10,47 +11,31 @@ BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
 MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
 OLLAMA_KEYS = []
-for i in range(1, 15):
     key = os.getenv(f"OLLAMA_KEY_{i}")
     if key:
         OLLAMA_KEYS.append(key)
 key_status = {}
 for idx, k in enumerate(OLLAMA_KEYS, 1):
-    key_status[k] = {"index": idx, "prefix": k[:8]+"...", "failures": 0, "success": 0, "last_error": None, "healthy": True}
 def log(msg):
     print(f"[{time.strftime('%H:%M:%S')}] {msg}")
 @app.get("/")
 def root():
     return {
         "status": "ok",
-        "base_url": BASE_URL,
-        "master_key": MASTER_API_KEY[:8] + "...",
         "keys_loaded": len(OLLAMA_KEYS),
-        "healthy_keys": sum(1 for v in key_status.values() if v["healthy"]),
-        "keys_status": {v["prefix"]: {"failures": v["failures"], "success": v["success"], "healthy": v["healthy"]} for v in key_status.values()}
     }
-def quick_health_check(key, timeout=5.0):
-    """Quick ping to check if key is alive"""
-    try:
-        resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=timeout)
-        return resp.status_code == 200
-    except:
-        return False
-def get_healthy_keys(max_failures=2):
-    """Get keys that are healthy (low failures)"""
-    healthy = [k for k, v in key_status.items() if v["failures"] < max_failures and v["healthy"]]
-    if not healthy:
-        # Reset all
-        for v in key_status.values():
-            v["failures"] = 0
-            v["healthy"] = True
-        return OLLAMA_KEYS[:3]
-    return healthy[:2]  # Only try top 3
 @app.post("/v1/chat/completions")
 async def chat(req: Request):
@@ -60,65 +45,45 @@ async def chat(req: Request):
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
     body = await req.json()
-    is_stream = body.get("stream", False)
     model = body.get("model", "?")
-    log(f"REQ: model='{model}' stream={is_stream}")
-    # Only try TOP 3 healthy keys - max 3 switches
-    candidate_keys = get_healthy_keys(max_failures=2)
-    log(f"Using top {len(candidate_keys)} keys")
-    for attempt, key in enumerate(candidate_keys):
         ki = key_status[key]
-        log(f"TRY #{attempt+1}: key#{ki['index']} ({ki['prefix']})")
         try:
-            start_time = time.time()
             async with httpx.AsyncClient(timeout=30.0) as client:
-                resp = await client.post(
-                    f"{BASE_URL}/v1/chat/completions",
-                    json=body,
-                    headers={"Authorization": f"Bearer {key}"}
-                )
-            elapsed = time.time() - start_time
             if resp.status_code == 200:
                 ki["success"] += 1
-                ki["failures"] = 0
-                ki["healthy"] = True
-                log(f"OK: key#{ki['index']} in {elapsed:.1f}s")
                 return Response(resp.content, status_code=200)
             elif resp.status_code == 429:
-                ki["failures"] += 1
                 ki["healthy"] = False
-                log(f"RATE LIMIT: key#{ki['index']} - skip to next")
-                continue
-            elif resp.status_code >= 500:
-                ki["failures"] += 1
-                ki["last_error"] = f"{resp.status_code}"
-                log(f"ERR {resp.status_code}: key#{ki['index']}")
                 continue
-            else:
-                return Response(resp.content, status_code=resp.status_code)
-        except httpx.TimeoutException:
-            ki["failures"] += 1
-            ki["healthy"] = False
-            ki["last_error"] = "timeout"
-            log(f"TIMEOUT: key#{ki['index']} - already healthy=False, try next")
-            continue
         except Exception as e:
-            ki["failures"] += 1
-            ki["last_error"] = str(e)[:30]
-            log(f"EXCEPTION: key#{ki['index']} - {e}")
             continue
-    log(f"ALL FAILED: model='{model}'")
-    return JSONResponse({"error": "all keys failed after 3 attempts"}, status_code=500)
 @app.get("/v1/models")
 def models(req: Request):
@@ -126,17 +91,11 @@ def models(req: Request):
     if auth_key != MASTER_API_KEY:
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
-    log("REQ: GET models")
-    for key in OLLAMA_KEYS[:3]:
-        ki = key_status[key]
         try:
             resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=10)
             if resp.status_code == 200:
-                ki["success"] += 1
-                log(f"MODELS OK: key#{ki['index']}")
                 return Response(resp.content, status_code=200)
-        except Exception as e:
-            log(f"MODELS FAIL: key#{ki['index']} - {e}")
     return JSONResponse({"error": "no keys"}, status_code=500)

 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse, Response
 import time
+import random
 app = FastAPI()
 MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
 OLLAMA_KEYS = []
+for i in range(1, 20):
     key = os.getenv(f"OLLAMA_KEY_{i}")
     if key:
         OLLAMA_KEYS.append(key)
 key_status = {}
 for idx, k in enumerate(OLLAMA_KEYS, 1):
+    key_status[k] = {"index": idx, "prefix": k[:8]+"...", "success": 0, "healthy": True}
 def log(msg):
     print(f"[{time.strftime('%H:%M:%S')}] {msg}")
 @app.get("/")
 def root():
+    healthy = [k for k, v in key_status.items() if v["healthy"]]
     return {
         "status": "ok",
         "keys_loaded": len(OLLAMA_KEYS),
+        "healthy_keys": len(healthy),
+        "keys_status": {v["prefix"]: {"success": v["success"], "healthy": v["healthy"]} for v in key_status.values()}
     }
+def get_healthy_keys():
+    """Get all healthy keys"""
+    return [k for k, v in key_status.items() if v["healthy"]]
 @app.post("/v1/chat/completions")
 async def chat(req: Request):
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
     body = await req.json()
     model = body.get("model", "?")
+    # Get all healthy keys
+    healthy = get_healthy_keys()
+    if not healthy:
+        # Reset all
+        for v in key_status.values():
+            v["healthy"] = True
+        healthy = OLLAMA_KEYS[:2]
+    # RANDOMLY pick up to 2 keys
+    selected = random.sample(healthy, min(2, len(healthy)))
+    log(f"REQ: model='{model}' | Using: {[key_status[k]['prefix'] for k in selected]}")
+    for key in selected:
         ki = key_status[key]
         try:
+            start = time.time()
             async with httpx.AsyncClient(timeout=30.0) as client:
+                resp = await client.post(f"{BASE_URL}/v1/chat/completions", json=body, headers={"Authorization": f"Bearer {key}"})
+            elapsed = time.time() - start
             if resp.status_code == 200:
                 ki["success"] += 1
+                log(f"OK: key#{ki['index']} ({ki['prefix']}) in {elapsed:.2f}s")
                 return Response(resp.content, status_code=200)
             elif resp.status_code == 429:
                 ki["healthy"] = False
+                log(f"RATE LIMIT: key#{ki['index']} - marking unhealthy")
                 continue
         except Exception as e:
+            ki["healthy"] = False
+            log(f"FAIL: key#{ki['index']} - {str(e)[:30]}")
             continue
+    log(f"ALL FAILED")
+    return JSONResponse({"error": "all keys failed"}, status_code=500)
 @app.get("/v1/models")
 def models(req: Request):
     if auth_key != MASTER_API_KEY:
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
+    for key in get_healthy_keys()[:2]:
         try:
             resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=10)
             if resp.status_code == 200:
                 return Response(resp.content, status_code=200)
+        except:
+            pass
     return JSONResponse({"error": "no keys"}, status_code=500)