proxycf

Sleeping

App Files Files Community

aaxaxax commited on 24 days ago

Commit

2143b4a

1 Parent(s): 295776f

Max 3 key switches, healthy tracking, faster fail

Browse files

Files changed (1) hide show

app.py +51 -61

app.py CHANGED Viewed

@@ -9,28 +9,18 @@ app = FastAPI()
 BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
 MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
-# Load Ollama keys from env
 OLLAMA_KEYS = []
-for i in range(1, 10):
     key = os.getenv(f"OLLAMA_KEY_{i}")
     if key:
         OLLAMA_KEYS.append(key)
-# Track each key's status
 key_status = {}
 for idx, k in enumerate(OLLAMA_KEYS, 1):
-    key_status[k] = {
-        "index": idx,
-        "prefix": k[:8] + "...",
-        "failures": 0,
-        "success": 0,
-        "last_error": None
-    }
 def log(msg):
-    """Print timestamped log"""
-    ts = time.strftime("%H:%M:%S")
-    print(f"[{ts}] {msg}")
 @app.get("/")
 def root():
@@ -39,34 +29,48 @@ def root():
         "base_url": BASE_URL,
         "master_key": MASTER_API_KEY[:8] + "...",
         "keys_loaded": len(OLLAMA_KEYS),
-        "keys_status": {
-            v["prefix"]: {"failures": v["failures"], "success": v["success"], "last_error": v["last_error"]}
-            for v in key_status.values()
-        }
     }
-def get_best_key():
-    """Get key with least failures"""
-    return min(key_status.items(), key=lambda x: x[1]["failures"])[0]
 @app.post("/v1/chat/completions")
 async def chat(req: Request):
-    # Auth check
     auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
     if auth_key != MASTER_API_KEY:
-        log(f"AUTH FAIL: received '{auth_key[:8]}...' expected '{MASTER_API_KEY[:8]}...'")
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
     body = await req.json()
-    model = body.get("model", "unknown")
-    log(f"REQUEST: model='{model}'")
-    # Try each key
-    for attempt in range(len(OLLAMA_KEYS)):
-        key = get_best_key()
         ki = key_status[key]
-        log(f"TRY #{attempt+1}: using key#{ki['index']} ({ki['prefix']})")
         try:
             start_time = time.time()
@@ -80,50 +84,41 @@ async def chat(req: Request):
             if resp.status_code == 200:
                 ki["success"] += 1
-                ki["failures"] = 0  # Reset on success
-                ki["last_error"] = None
-                log(f"SUCCESS: key#{ki['index']} responded in {elapsed:.2f}s")
                 return Response(resp.content, status_code=200)
             elif resp.status_code == 429:
                 ki["failures"] += 1
-                ki["last_error"] = "429 rate limited"
-                log(f"RATE LIMIT: key#{ki['index']} hit rate limit, trying next key")
                 continue
             elif resp.status_code >= 500:
                 ki["failures"] += 1
-                ki["last_error"] = f"http {resp.status_code}"
-                log(f"SERVER ERROR: key#{ki['index']} got {resp.status_code}, trying next")
                 continue
             else:
-                ki["last_error"] = f"http {resp.status_code}"
-                log(f"ERROR: key#{ki['index']} got {resp.status_code}")
                 return Response(resp.content, status_code=resp.status_code)
         except httpx.TimeoutException:
             ki["failures"] += 1
-            ki["last_error"] = "timeout after 120s"
-            log(f"TIMEOUT: key#{ki['index']} timed out")
             continue
         except Exception as e:
             ki["failures"] += 1
-            ki["last_error"] = str(e)[:50]
-            log(f"EXCEPTION: key#{ki['index']} error: {e}")
             continue
-    # All keys failed
-    log(f"ALL KEYS FAILED for model='{model}'")
-    return JSONResponse({
-        "error": "all keys failed",
-        "model": model,
-        "keys_status": {
-            v["prefix"]: {"failures": v["failures"], "last_error": v["last_error"]}
-            for v in key_status.values()
-        }
-    }, status_code=500)
 @app.get("/v1/models")
 def models(req: Request):
@@ -131,22 +126,17 @@ def models(req: Request):
     if auth_key != MASTER_API_KEY:
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
-    log("REQUEST: GET /v1/models")
-    for key in OLLAMA_KEYS:
         ki = key_status[key]
         try:
-            resp = httpx.get(
-                f"{BASE_URL}/v1/models",
-                headers={"Authorization": f"Bearer {key}"},
-                timeout=30
-            )
             if resp.status_code == 200:
                 ki["success"] += 1
                 log(f"MODELS OK: key#{ki['index']}")
                 return Response(resp.content, status_code=200)
         except Exception as e:
-            ki["last_error"] = str(e)[:50]
             log(f"MODELS FAIL: key#{ki['index']} - {e}")
-    return JSONResponse({"error": "no keys available"}, status_code=500)

 BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
 MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
 OLLAMA_KEYS = []
+for i in range(1, 15):
     key = os.getenv(f"OLLAMA_KEY_{i}")
     if key:
         OLLAMA_KEYS.append(key)
 key_status = {}
 for idx, k in enumerate(OLLAMA_KEYS, 1):
+    key_status[k] = {"index": idx, "prefix": k[:8]+"...", "failures": 0, "success": 0, "last_error": None, "healthy": True}
 def log(msg):
+    print(f"[{time.strftime('%H:%M:%S')}] {msg}")
 @app.get("/")
 def root():
         "base_url": BASE_URL,
         "master_key": MASTER_API_KEY[:8] + "...",
         "keys_loaded": len(OLLAMA_KEYS),
+        "healthy_keys": sum(1 for v in key_status.values() if v["healthy"]),
+        "keys_status": {v["prefix"]: {"failures": v["failures"], "success": v["success"], "healthy": v["healthy"]} for v in key_status.values()}
     }
+def quick_health_check(key, timeout=5.0):
+    """Quick ping to check if key is alive"""
+    try:
+        resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=timeout)
+        return resp.status_code == 200
+    except:
+        return False
+def get_healthy_keys(max_failures=2):
+    """Get keys that are healthy (low failures)"""
+    healthy = [k for k, v in key_status.items() if v["failures"] < max_failures and v["healthy"]]
+    if not healthy:
+        # Reset all
+        for v in key_status.values():
+            v["failures"] = 0
+            v["healthy"] = True
+        return OLLAMA_KEYS[:3]
+    return healthy[:3]  # Only try top 3
 @app.post("/v1/chat/completions")
 async def chat(req: Request):
     auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
     if auth_key != MASTER_API_KEY:
+        log(f"AUTH FAIL")
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
     body = await req.json()
+    is_stream = body.get("stream", False)
+    model = body.get("model", "?")
+    log(f"REQ: model='{model}' stream={is_stream}")
+    # Only try TOP 3 healthy keys - max 3 switches
+    candidate_keys = get_healthy_keys(max_failures=2)
+    log(f"Using top {len(candidate_keys)} keys")
+    for attempt, key in enumerate(candidate_keys):
         ki = key_status[key]
+        log(f"TRY #{attempt+1}: key#{ki['index']} ({ki['prefix']})")
         try:
             start_time = time.time()
             if resp.status_code == 200:
                 ki["success"] += 1
+                ki["failures"] = 0
+                ki["healthy"] = True
+                log(f"OK: key#{ki['index']} in {elapsed:.1f}s")
                 return Response(resp.content, status_code=200)
             elif resp.status_code == 429:
                 ki["failures"] += 1
+                ki["healthy"] = False
+                log(f"RATE LIMIT: key#{ki['index']} - skip to next")
                 continue
             elif resp.status_code >= 500:
                 ki["failures"] += 1
+                ki["last_error"] = f"{resp.status_code}"
+                log(f"ERR {resp.status_code}: key#{ki['index']}")
                 continue
             else:
                 return Response(resp.content, status_code=resp.status_code)
         except httpx.TimeoutException:
             ki["failures"] += 1
+            ki["healthy"] = False
+            ki["last_error"] = "timeout"
+            log(f"TIMEOUT: key#{ki['index']} - already healthy=False, try next")
             continue
         except Exception as e:
             ki["failures"] += 1
+            ki["last_error"] = str(e)[:30]
+            log(f"EXCEPTION: key#{ki['index']} - {e}")
             continue
+    log(f"ALL FAILED: model='{model}'")
+    return JSONResponse({"error": "all keys failed after 3 attempts"}, status_code=500)
 @app.get("/v1/models")
 def models(req: Request):
     if auth_key != MASTER_API_KEY:
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
+    log("REQ: GET models")
+    for key in OLLAMA_KEYS[:3]:
         ki = key_status[key]
         try:
+            resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=10)
             if resp.status_code == 200:
                 ki["success"] += 1
                 log(f"MODELS OK: key#{ki['index']}")
                 return Response(resp.content, status_code=200)
         except Exception as e:
             log(f"MODELS FAIL: key#{ki['index']} - {e}")
+    return JSONResponse({"error": "no keys"}, status_code=500)