aaxaxax commited on
Commit
2143b4a
·
1 Parent(s): 295776f

Max 3 key switches, healthy tracking, faster fail

Browse files
Files changed (1) hide show
  1. app.py +51 -61
app.py CHANGED
@@ -9,28 +9,18 @@ app = FastAPI()
9
  BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
10
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
11
 
12
- # Load Ollama keys from env
13
  OLLAMA_KEYS = []
14
- for i in range(1, 10):
15
  key = os.getenv(f"OLLAMA_KEY_{i}")
16
  if key:
17
  OLLAMA_KEYS.append(key)
18
 
19
- # Track each key's status
20
  key_status = {}
21
  for idx, k in enumerate(OLLAMA_KEYS, 1):
22
- key_status[k] = {
23
- "index": idx,
24
- "prefix": k[:8] + "...",
25
- "failures": 0,
26
- "success": 0,
27
- "last_error": None
28
- }
29
 
30
  def log(msg):
31
- """Print timestamped log"""
32
- ts = time.strftime("%H:%M:%S")
33
- print(f"[{ts}] {msg}")
34
 
35
  @app.get("/")
36
  def root():
@@ -39,34 +29,48 @@ def root():
39
  "base_url": BASE_URL,
40
  "master_key": MASTER_API_KEY[:8] + "...",
41
  "keys_loaded": len(OLLAMA_KEYS),
42
- "keys_status": {
43
- v["prefix"]: {"failures": v["failures"], "success": v["success"], "last_error": v["last_error"]}
44
- for v in key_status.values()
45
- }
46
  }
47
 
48
- def get_best_key():
49
- """Get key with least failures"""
50
- return min(key_status.items(), key=lambda x: x[1]["failures"])[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  @app.post("/v1/chat/completions")
53
  async def chat(req: Request):
54
- # Auth check
55
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
56
  if auth_key != MASTER_API_KEY:
57
- log(f"AUTH FAIL: received '{auth_key[:8]}...' expected '{MASTER_API_KEY[:8]}...'")
58
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
59
 
60
  body = await req.json()
61
- model = body.get("model", "unknown")
62
- log(f"REQUEST: model='{model}'")
 
63
 
64
- # Try each key
65
- for attempt in range(len(OLLAMA_KEYS)):
66
- key = get_best_key()
 
 
67
  ki = key_status[key]
68
-
69
- log(f"TRY #{attempt+1}: using key#{ki['index']} ({ki['prefix']})")
70
 
71
  try:
72
  start_time = time.time()
@@ -80,50 +84,41 @@ async def chat(req: Request):
80
 
81
  if resp.status_code == 200:
82
  ki["success"] += 1
83
- ki["failures"] = 0 # Reset on success
84
- ki["last_error"] = None
85
- log(f"SUCCESS: key#{ki['index']} responded in {elapsed:.2f}s")
86
  return Response(resp.content, status_code=200)
87
 
88
  elif resp.status_code == 429:
89
  ki["failures"] += 1
90
- ki["last_error"] = "429 rate limited"
91
- log(f"RATE LIMIT: key#{ki['index']} hit rate limit, trying next key")
92
  continue
93
 
94
  elif resp.status_code >= 500:
95
  ki["failures"] += 1
96
- ki["last_error"] = f"http {resp.status_code}"
97
- log(f"SERVER ERROR: key#{ki['index']} got {resp.status_code}, trying next")
98
  continue
99
 
100
  else:
101
- ki["last_error"] = f"http {resp.status_code}"
102
- log(f"ERROR: key#{ki['index']} got {resp.status_code}")
103
  return Response(resp.content, status_code=resp.status_code)
104
 
105
  except httpx.TimeoutException:
106
  ki["failures"] += 1
107
- ki["last_error"] = "timeout after 120s"
108
- log(f"TIMEOUT: key#{ki['index']} timed out")
 
109
  continue
110
 
111
  except Exception as e:
112
  ki["failures"] += 1
113
- ki["last_error"] = str(e)[:50]
114
- log(f"EXCEPTION: key#{ki['index']} error: {e}")
115
  continue
116
 
117
- # All keys failed
118
- log(f"ALL KEYS FAILED for model='{model}'")
119
- return JSONResponse({
120
- "error": "all keys failed",
121
- "model": model,
122
- "keys_status": {
123
- v["prefix"]: {"failures": v["failures"], "last_error": v["last_error"]}
124
- for v in key_status.values()
125
- }
126
- }, status_code=500)
127
 
128
  @app.get("/v1/models")
129
  def models(req: Request):
@@ -131,22 +126,17 @@ def models(req: Request):
131
  if auth_key != MASTER_API_KEY:
132
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
133
 
134
- log("REQUEST: GET /v1/models")
135
 
136
- for key in OLLAMA_KEYS:
137
  ki = key_status[key]
138
  try:
139
- resp = httpx.get(
140
- f"{BASE_URL}/v1/models",
141
- headers={"Authorization": f"Bearer {key}"},
142
- timeout=30
143
- )
144
  if resp.status_code == 200:
145
  ki["success"] += 1
146
  log(f"MODELS OK: key#{ki['index']}")
147
  return Response(resp.content, status_code=200)
148
  except Exception as e:
149
- ki["last_error"] = str(e)[:50]
150
  log(f"MODELS FAIL: key#{ki['index']} - {e}")
151
 
152
- return JSONResponse({"error": "no keys available"}, status_code=500)
 
9
  BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
10
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "ollama-proxy-free")
11
 
 
12
  OLLAMA_KEYS = []
13
+ for i in range(1, 15):
14
  key = os.getenv(f"OLLAMA_KEY_{i}")
15
  if key:
16
  OLLAMA_KEYS.append(key)
17
 
 
18
  key_status = {}
19
  for idx, k in enumerate(OLLAMA_KEYS, 1):
20
+ key_status[k] = {"index": idx, "prefix": k[:8]+"...", "failures": 0, "success": 0, "last_error": None, "healthy": True}
 
 
 
 
 
 
21
 
22
  def log(msg):
23
+ print(f"[{time.strftime('%H:%M:%S')}] {msg}")
 
 
24
 
25
  @app.get("/")
26
  def root():
 
29
  "base_url": BASE_URL,
30
  "master_key": MASTER_API_KEY[:8] + "...",
31
  "keys_loaded": len(OLLAMA_KEYS),
32
+ "healthy_keys": sum(1 for v in key_status.values() if v["healthy"]),
33
+ "keys_status": {v["prefix"]: {"failures": v["failures"], "success": v["success"], "healthy": v["healthy"]} for v in key_status.values()}
 
 
34
  }
35
 
36
+ def quick_health_check(key, timeout=5.0):
37
+ """Quick ping to check if key is alive"""
38
+ try:
39
+ resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=timeout)
40
+ return resp.status_code == 200
41
+ except:
42
+ return False
43
+
44
+ def get_healthy_keys(max_failures=2):
45
+ """Get keys that are healthy (low failures)"""
46
+ healthy = [k for k, v in key_status.items() if v["failures"] < max_failures and v["healthy"]]
47
+ if not healthy:
48
+ # Reset all
49
+ for v in key_status.values():
50
+ v["failures"] = 0
51
+ v["healthy"] = True
52
+ return OLLAMA_KEYS[:3]
53
+ return healthy[:3] # Only try top 3
54
 
55
  @app.post("/v1/chat/completions")
56
  async def chat(req: Request):
 
57
  auth_key = req.headers.get("Authorization", "").replace("Bearer ", "")
58
  if auth_key != MASTER_API_KEY:
59
+ log(f"AUTH FAIL")
60
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
61
 
62
  body = await req.json()
63
+ is_stream = body.get("stream", False)
64
+ model = body.get("model", "?")
65
+ log(f"REQ: model='{model}' stream={is_stream}")
66
 
67
+ # Only try TOP 3 healthy keys - max 3 switches
68
+ candidate_keys = get_healthy_keys(max_failures=2)
69
+ log(f"Using top {len(candidate_keys)} keys")
70
+
71
+ for attempt, key in enumerate(candidate_keys):
72
  ki = key_status[key]
73
+ log(f"TRY #{attempt+1}: key#{ki['index']} ({ki['prefix']})")
 
74
 
75
  try:
76
  start_time = time.time()
 
84
 
85
  if resp.status_code == 200:
86
  ki["success"] += 1
87
+ ki["failures"] = 0
88
+ ki["healthy"] = True
89
+ log(f"OK: key#{ki['index']} in {elapsed:.1f}s")
90
  return Response(resp.content, status_code=200)
91
 
92
  elif resp.status_code == 429:
93
  ki["failures"] += 1
94
+ ki["healthy"] = False
95
+ log(f"RATE LIMIT: key#{ki['index']} - skip to next")
96
  continue
97
 
98
  elif resp.status_code >= 500:
99
  ki["failures"] += 1
100
+ ki["last_error"] = f"{resp.status_code}"
101
+ log(f"ERR {resp.status_code}: key#{ki['index']}")
102
  continue
103
 
104
  else:
 
 
105
  return Response(resp.content, status_code=resp.status_code)
106
 
107
  except httpx.TimeoutException:
108
  ki["failures"] += 1
109
+ ki["healthy"] = False
110
+ ki["last_error"] = "timeout"
111
+ log(f"TIMEOUT: key#{ki['index']} - already healthy=False, try next")
112
  continue
113
 
114
  except Exception as e:
115
  ki["failures"] += 1
116
+ ki["last_error"] = str(e)[:30]
117
+ log(f"EXCEPTION: key#{ki['index']} - {e}")
118
  continue
119
 
120
+ log(f"ALL FAILED: model='{model}'")
121
+ return JSONResponse({"error": "all keys failed after 3 attempts"}, status_code=500)
 
 
 
 
 
 
 
 
122
 
123
  @app.get("/v1/models")
124
  def models(req: Request):
 
126
  if auth_key != MASTER_API_KEY:
127
  return JSONResponse({"error": "Unauthorized"}, status_code=401)
128
 
129
+ log("REQ: GET models")
130
 
131
+ for key in OLLAMA_KEYS[:3]:
132
  ki = key_status[key]
133
  try:
134
+ resp = httpx.get(f"{BASE_URL}/v1/models", headers={"Authorization": f"Bearer {key}"}, timeout=10)
 
 
 
 
135
  if resp.status_code == 200:
136
  ki["success"] += 1
137
  log(f"MODELS OK: key#{ki['index']}")
138
  return Response(resp.content, status_code=200)
139
  except Exception as e:
 
140
  log(f"MODELS FAIL: key#{ki['index']} - {e}")
141
 
142
+ return JSONResponse({"error": "no keys"}, status_code=500)