proxycf

Sleeping

App Files Files Community

aaxaxax commited on 24 days ago

Commit

271eb18

1 Parent(s): 14345a7

Add detailed logging for API key usage

Browse files

Files changed (1) hide show

app.py +87 -47

app.py CHANGED Viewed

@@ -1,94 +1,128 @@
 import os
 import httpx
-from fastapi import FastAPI, Request, HTTPException
 from fastapi.responses import JSONResponse, Response
 import time
 app = FastAPI()
 BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
-MASTER_API_KEY = os.getenv("MASTER_API_KEY", "dummy")
-# Load Ollama keys
 OLLAMA_KEYS = []
 for i in range(1, 10):
     key = os.getenv(f"OLLAMA_KEY_{i}")
     if key:
         OLLAMA_KEYS.append(key)
-# Track key failures
-key_failures = {k: 0 for k in OLLAMA_KEYS}
-last_used_key = {}
 @app.get("/")
 def root():
     return {
         "status": "ok",
-        "master_key": MASTER_API_KEY[:10] + "...",
-        "ollama_keys": len(OLLAMA_KEYS),
-        "key_failures": sum(key_failures.values())
     }
-def get_key():
-    """Get least failed key"""
-    for k, v in sorted(key_failures.items(), key=lambda x: x[1]):
-        if v < 3:
-            return k
-    # Reset if all failed
-    for k in key_failures:
-        key_failures[k] = 0
-    return OLLAMA_KEYS[0]
 @app.post("/v1/chat/completions")
 async def chat(req: Request):
     auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
     if auth_key != MASTER_API_KEY:
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
     body = await req.json()
-    max_retries = 3
-    for attempt in range(max_retries):
-        key = get_key()
         try:
             async with httpx.AsyncClient(timeout=120.0) as client:
                 resp = await client.post(
                     f"{BASE_URL}/v1/chat/completions",
                     json=body,
-                    headers={
-                        "Authorization": f"Bearer {key}",
-                        "Content-Type": "application/json"
-                    }
                 )
-                if resp.status_code == 200:
-                    key_failures[key] = 0  # Reset on success
-                    return Response(resp.content, status_code=200)
-                elif resp.status_code == 429:
-                    # Rate limited - try another key
-                    key_failures[key] = key_failures.get(key, 0) + 1
-                    continue
-                elif resp.status_code >= 500:
-                    key_failures[key] = key_failures.get(key, 0) + 1
-                    continue
-                else:
-                    # Other error - return as is
-                    return Response(resp.content, status_code=resp.status_code)
         except httpx.TimeoutException:
-            key_failures[key] = key_failures.get(key, 0) + 1
             continue
         except Exception as e:
-            key_failures[key] = key_failures.get(key, 0) + 1
             continue
     return JSONResponse({
         "error": "all keys failed",
-        "key_failures": {k[:10]: v for k, v in key_failures.items()}
     }, status_code=500)
 @app.get("/v1/models")
@@ -97,7 +131,10 @@ def models(req: Request):
     if auth_key != MASTER_API_KEY:
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
     for key in OLLAMA_KEYS:
         try:
             resp = httpx.get(
                 f"{BASE_URL}/v1/models",
@@ -105,8 +142,11 @@ def models(req: Request):
                 timeout=30
             )
             if resp.status_code == 200:
                 return Response(resp.content, status_code=200)
-        except:
-            pass
     return JSONResponse({"error": "no keys available"}, status_code=500)

 import os
 import httpx
+from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse, Response
 import time
 app = FastAPI()
 BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
+MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
+# Load Ollama keys from env
 OLLAMA_KEYS = []
 for i in range(1, 10):
     key = os.getenv(f"OLLAMA_KEY_{i}")
     if key:
         OLLAMA_KEYS.append(key)
+# Track each key's status
+key_status = {}
+for idx, k in enumerate(OLLAMA_KEYS, 1):
+    key_status[k] = {
+        "index": idx,
+        "prefix": k[:8] + "...",
+        "failures": 0,
+        "success": 0,
+        "last_error": None
+    }
+def log(msg):
+    """Print timestamped log"""
+    ts = time.strftime("%H:%M:%S")
+    print(f"[{ts}] {msg}")
 @app.get("/")
 def root():
     return {
         "status": "ok",
+        "base_url": BASE_URL,
+        "master_key": MASTER_API_KEY[:8] + "...",
+        "keys_loaded": len(OLLAMA_KEYS),
+        "keys_status": {
+            v["prefix"]: {"failures": v["failures"], "success": v["success"], "last_error": v["last_error"]}
+            for v in key_status.values()
+        }
     }
+def get_best_key():
+    """Get key with least failures"""
+    return min(key_status.items(), key=lambda x: x[1]["failures"])[0]
 @app.post("/v1/chat/completions")
 async def chat(req: Request):
+    # Auth check
     auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
     if auth_key != MASTER_API_KEY:
+        log(f"AUTH FAIL: received '{auth_key[:8]}...' expected '{MASTER_API_KEY[:8]}...'")
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
     body = await req.json()
+    model = body.get("model", "unknown")
+    log(f"REQUEST: model='{model}'")
+    # Try each key
+    for attempt in range(len(OLLAMA_KEYS)):
+        key = get_best_key()
+        ki = key_status[key]
+        log(f"TRY #{attempt+1}: using key#{ki['index']} ({ki['prefix']})")
         try:
+            start_time = time.time()
             async with httpx.AsyncClient(timeout=120.0) as client:
                 resp = await client.post(
                     f"{BASE_URL}/v1/chat/completions",
                     json=body,
+                    headers={"Authorization": f"Bearer {key}"}
                 )
+            elapsed = time.time() - start_time
+            if resp.status_code == 200:
+                ki["success"] += 1
+                ki["failures"] = 0  # Reset on success
+                ki["last_error"] = None
+                log(f"SUCCESS: key#{ki['index']} responded in {elapsed:.2f}s")
+                return Response(resp.content, status_code=200)
+            elif resp.status_code == 429:
+                ki["failures"] += 1
+                ki["last_error"] = "429 rate limited"
+                log(f"RATE LIMIT: key#{ki['index']} hit rate limit, trying next key")
+                continue
+            elif resp.status_code >= 500:
+                ki["failures"] += 1
+                ki["last_error"] = f"http {resp.status_code}"
+                log(f"SERVER ERROR: key#{ki['index']} got {resp.status_code}, trying next")
+                continue
+            else:
+                ki["last_error"] = f"http {resp.status_code}"
+                log(f"ERROR: key#{ki['index']} got {resp.status_code}")
+                return Response(resp.content, status_code=resp.status_code)
         except httpx.TimeoutException:
+            ki["failures"] += 1
+            ki["last_error"] = "timeout after 120s"
+            log(f"TIMEOUT: key#{ki['index']} timed out")
             continue
         except Exception as e:
+            ki["failures"] += 1
+            ki["last_error"] = str(e)[:50]
+            log(f"EXCEPTION: key#{ki['index']} error: {e}")
             continue
+    # All keys failed
+    log(f"ALL KEYS FAILED for model='{model}'")
     return JSONResponse({
         "error": "all keys failed",
+        "model": model,
+        "keys_status": {
+            v["prefix"]: {"failures": v["failures"], "last_error": v["last_error"]}
+            for v in key_status.values()
+        }
     }, status_code=500)
 @app.get("/v1/models")
     if auth_key != MASTER_API_KEY:
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
+    log("REQUEST: GET /v1/models")
     for key in OLLAMA_KEYS:
+        ki = key_status[key]
         try:
             resp = httpx.get(
                 f"{BASE_URL}/v1/models",
                 timeout=30
             )
             if resp.status_code == 200:
+                ki["success"] += 1
+                log(f"MODELS OK: key#{ki['index']}")
                 return Response(resp.content, status_code=200)
+        except Exception as e:
+            ki["last_error"] = str(e)[:50]
+            log(f"MODELS FAIL: key#{ki['index']} - {e}")
     return JSONResponse({"error": "no keys available"}, status_code=500)