aaxaxax commited on
Commit
271eb18
·
1 Parent(s): 14345a7

Add detailed logging for API key usage

Browse files
Files changed (1) hide show
  1. app.py +87 -47
app.py CHANGED
@@ -1,94 +1,128 @@
1
  import os
2
  import httpx
3
- from fastapi import FastAPI, Request, HTTPException
4
  from fastapi.responses import JSONResponse, Response
5
  import time
6
 
7
  app = FastAPI()
8
 
9
  BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
10
- MASTER_API_KEY = os.getenv("MASTER_API_KEY", "dummy")
11
 
12
- # Load Ollama keys
13
  OLLAMA_KEYS = []
14
  for i in range(1, 10):
15
  key = os.getenv(f"OLLAMA_KEY_{i}")
16
  if key:
17
  OLLAMA_KEYS.append(key)
18
 
19
- # Track key failures
20
- key_failures = {k: 0 for k in OLLAMA_KEYS}
21
- last_used_key = {}
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  @app.get("/")
24
  def root():
25
  return {
26
  "status": "ok",
27
- "master_key": MASTER_API_KEY[:10] + "...",
28
- "ollama_keys": len(OLLAMA_KEYS),
29
- "key_failures": sum(key_failures.values())
 
 
 
 
30
  }
31
 
32
- def get_key():
33
- """Get least failed key"""
34
- for k, v in sorted(key_failures.items(), key=lambda x: x[1]):
35
- if v < 3:
36
- return k
37
- # Reset if all failed
38
- for k in key_failures:
39
- key_failures[k] = 0
40
- return OLLAMA_KEYS[0]
41
 
42
  @app.post("/v1/chat/completions")
43
  async def chat(req: Request):
 
44
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
45
  if auth_key != MASTER_API_KEY:
 
46
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
47
 
48
  body = await req.json()
49
- max_retries = 3
 
50
 
51
- for attempt in range(max_retries):
52
- key = get_key()
 
 
 
 
53
 
54
  try:
 
55
  async with httpx.AsyncClient(timeout=120.0) as client:
56
  resp = await client.post(
57
  f"{BASE_URL}/v1/chat/completions",
58
  json=body,
59
- headers={
60
- "Authorization": f"Bearer {key}",
61
- "Content-Type": "application/json"
62
- }
63
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- if resp.status_code == 200:
66
- key_failures[key] = 0 # Reset on success
67
- return Response(resp.content, status_code=200)
68
-
69
- elif resp.status_code == 429:
70
- # Rate limited - try another key
71
- key_failures[key] = key_failures.get(key, 0) + 1
72
- continue
73
-
74
- elif resp.status_code >= 500:
75
- key_failures[key] = key_failures.get(key, 0) + 1
76
- continue
77
-
78
- else:
79
- # Other error - return as is
80
- return Response(resp.content, status_code=resp.status_code)
81
-
82
  except httpx.TimeoutException:
83
- key_failures[key] = key_failures.get(key, 0) + 1
 
 
84
  continue
 
85
  except Exception as e:
86
- key_failures[key] = key_failures.get(key, 0) + 1
 
 
87
  continue
88
 
 
 
89
  return JSONResponse({
90
  "error": "all keys failed",
91
- "key_failures": {k[:10]: v for k, v in key_failures.items()}
 
 
 
 
92
  }, status_code=500)
93
 
94
  @app.get("/v1/models")
@@ -97,7 +131,10 @@ def models(req: Request):
97
  if auth_key != MASTER_API_KEY:
98
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
99
 
 
 
100
  for key in OLLAMA_KEYS:
 
101
  try:
102
  resp = httpx.get(
103
  f"{BASE_URL}/v1/models",
@@ -105,8 +142,11 @@ def models(req: Request):
105
  timeout=30
106
  )
107
  if resp.status_code == 200:
 
 
108
  return Response(resp.content, status_code=200)
109
- except:
110
- pass
 
111
 
112
  return JSONResponse({"error": "no keys available"}, status_code=500)
 
1
  import os
2
  import httpx
3
+ from fastapi import FastAPI, Request
4
  from fastapi.responses import JSONResponse, Response
5
  import time
6
 
7
  app = FastAPI()
8
 
9
  BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
10
+ MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
11
 
12
+ # Load Ollama keys from env
13
  OLLAMA_KEYS = []
14
  for i in range(1, 10):
15
  key = os.getenv(f"OLLAMA_KEY_{i}")
16
  if key:
17
  OLLAMA_KEYS.append(key)
18
 
19
+ # Track each key's status
20
+ key_status = {}
21
+ for idx, k in enumerate(OLLAMA_KEYS, 1):
22
+ key_status[k] = {
23
+ "index": idx,
24
+ "prefix": k[:8] + "...",
25
+ "failures": 0,
26
+ "success": 0,
27
+ "last_error": None
28
+ }
29
+
30
+ def log(msg):
31
+ """Print timestamped log"""
32
+ ts = time.strftime("%H:%M:%S")
33
+ print(f"[{ts}] {msg}")
34
 
35
  @app.get("/")
36
  def root():
37
  return {
38
  "status": "ok",
39
+ "base_url": BASE_URL,
40
+ "master_key": MASTER_API_KEY[:8] + "...",
41
+ "keys_loaded": len(OLLAMA_KEYS),
42
+ "keys_status": {
43
+ v["prefix"]: {"failures": v["failures"], "success": v["success"], "last_error": v["last_error"]}
44
+ for v in key_status.values()
45
+ }
46
  }
47
 
48
+ def get_best_key():
49
+ """Get key with least failures"""
50
+ return min(key_status.items(), key=lambda x: x[1]["failures"])[0]
 
 
 
 
 
 
51
 
52
  @app.post("/v1/chat/completions")
53
  async def chat(req: Request):
54
+ # Auth check
55
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
56
  if auth_key != MASTER_API_KEY:
57
+ log(f"AUTH FAIL: received '{auth_key[:8]}...' expected '{MASTER_API_KEY[:8]}...'")
58
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
59
 
60
  body = await req.json()
61
+ model = body.get("model", "unknown")
62
+ log(f"REQUEST: model='{model}'")
63
 
64
+ # Try each key
65
+ for attempt in range(len(OLLAMA_KEYS)):
66
+ key = get_best_key()
67
+ ki = key_status[key]
68
+
69
+ log(f"TRY #{attempt+1}: using key#{ki['index']} ({ki['prefix']})")
70
 
71
  try:
72
+ start_time = time.time()
73
  async with httpx.AsyncClient(timeout=120.0) as client:
74
  resp = await client.post(
75
  f"{BASE_URL}/v1/chat/completions",
76
  json=body,
77
+ headers={"Authorization": f"Bearer {key}"}
 
 
 
78
  )
79
+ elapsed = time.time() - start_time
80
+
81
+ if resp.status_code == 200:
82
+ ki["success"] += 1
83
+ ki["failures"] = 0 # Reset on success
84
+ ki["last_error"] = None
85
+ log(f"SUCCESS: key#{ki['index']} responded in {elapsed:.2f}s")
86
+ return Response(resp.content, status_code=200)
87
+
88
+ elif resp.status_code == 429:
89
+ ki["failures"] += 1
90
+ ki["last_error"] = "429 rate limited"
91
+ log(f"RATE LIMIT: key#{ki['index']} hit rate limit, trying next key")
92
+ continue
93
+
94
+ elif resp.status_code >= 500:
95
+ ki["failures"] += 1
96
+ ki["last_error"] = f"http {resp.status_code}"
97
+ log(f"SERVER ERROR: key#{ki['index']} got {resp.status_code}, trying next")
98
+ continue
99
+
100
+ else:
101
+ ki["last_error"] = f"http {resp.status_code}"
102
+ log(f"ERROR: key#{ki['index']} got {resp.status_code}")
103
+ return Response(resp.content, status_code=resp.status_code)
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  except httpx.TimeoutException:
106
+ ki["failures"] += 1
107
+ ki["last_error"] = "timeout after 120s"
108
+ log(f"TIMEOUT: key#{ki['index']} timed out")
109
  continue
110
+
111
  except Exception as e:
112
+ ki["failures"] += 1
113
+ ki["last_error"] = str(e)[:50]
114
+ log(f"EXCEPTION: key#{ki['index']} error: {e}")
115
  continue
116
 
117
+ # All keys failed
118
+ log(f"ALL KEYS FAILED for model='{model}'")
119
  return JSONResponse({
120
  "error": "all keys failed",
121
+ "model": model,
122
+ "keys_status": {
123
+ v["prefix"]: {"failures": v["failures"], "last_error": v["last_error"]}
124
+ for v in key_status.values()
125
+ }
126
  }, status_code=500)
127
 
128
  @app.get("/v1/models")
 
131
  if auth_key != MASTER_API_KEY:
132
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
133
 
134
+ log("REQUEST: GET /v1/models")
135
+
136
  for key in OLLAMA_KEYS:
137
+ ki = key_status[key]
138
  try:
139
  resp = httpx.get(
140
  f"{BASE_URL}/v1/models",
 
142
  timeout=30
143
  )
144
  if resp.status_code == 200:
145
+ ki["success"] += 1
146
+ log(f"MODELS OK: key#{ki['index']}")
147
  return Response(resp.content, status_code=200)
148
+ except Exception as e:
149
+ ki["last_error"] = str(e)[:50]
150
+ log(f"MODELS FAIL: key#{ki['index']} - {e}")
151
 
152
  return JSONResponse({"error": "no keys available"}, status_code=500)