Elysiadev11 commited on
Commit
8df83b1
·
verified ·
1 Parent(s): 7076d96

Update proxy_cerebras.py

Browse files
Files changed (1) hide show
  1. proxy_cerebras.py +163 -265
proxy_cerebras.py CHANGED
@@ -1,25 +1,19 @@
 
1
  # ==========================================================
2
- # FULL CODE APP.PY
3
- # Proxy 2 endpoint:
4
- # /v1/chat/completions (OpenAI)
5
- # /v1/messages (Anthropic)
6
- # Multi key rotation
7
- # HuggingFace Spaces ready
8
  # ==========================================================
9
 
10
  import os
11
- import time
12
  import json
 
13
  import uuid
14
  import asyncio
15
  import httpx
16
 
17
  from fastapi import FastAPI, Request
18
- from fastapi.responses import (
19
- JSONResponse,
20
- Response,
21
- StreamingResponse
22
- )
23
  from starlette.requests import ClientDisconnect
24
 
25
  app = FastAPI()
@@ -27,15 +21,8 @@ app = FastAPI()
27
  # ==========================================================
28
  # CONFIG
29
  # ==========================================================
30
- BASE_URL = os.getenv(
31
- "BASE_URL",
32
- "https://elysiadev11-proxyollma.hf.space"
33
- )
34
-
35
- MASTER_API_KEY = os.getenv(
36
- "MASTER_API_KEY",
37
- "olla"
38
- )
39
 
40
  # ==========================================================
41
  # MODEL MAP
@@ -47,10 +34,8 @@ MODEL_MAP = {
47
  "claude-haiku-4-5": "minimax-m2.7:cloud",
48
  }
49
 
50
-
51
- def map_model(model: str) -> str:
52
- return MODEL_MAP.get(model, model)
53
-
54
 
55
  # ==========================================================
56
  # LOAD KEYS
@@ -58,74 +43,54 @@ def map_model(model: str) -> str:
58
  OLLAMA_KEYS = []
59
 
60
  for i in range(1, 101):
61
- key = os.getenv(f"OLLAMA_KEY_{i}")
62
- if key:
63
- OLLAMA_KEYS.append(key)
64
 
65
  if not OLLAMA_KEYS:
66
- OLLAMA_KEYS.append("dummy-key")
67
-
68
 
69
  # ==========================================================
70
- # KEY STATUS
71
  # ==========================================================
72
- last_used_index = 0
73
 
74
  key_status = {}
75
 
76
- for idx, key in enumerate(OLLAMA_KEYS, 1):
77
- key_status[key] = {
78
- "index": idx,
79
  "busy": False,
80
- "healthy": True,
81
- "success": 0,
82
- "fail": 0
83
  }
84
 
85
-
86
- # ==========================================================
87
- # LOG
88
- # ==========================================================
89
- def log(msg):
90
- print(f"[{time.strftime('%H:%M:%S')}] {msg}")
91
-
92
-
93
- # ==========================================================
94
- # AUTH
95
- # ==========================================================
96
- def check_auth(req: Request):
97
- token = req.headers.get(
98
- "Authorization", ""
99
- ).replace("Bearer ", "")
100
-
101
- return token == MASTER_API_KEY
102
-
103
-
104
- # ==========================================================
105
- # PICK KEY
106
- # ==========================================================
107
  def get_key():
108
- global last_used_index
109
 
110
  total = len(OLLAMA_KEYS)
111
 
112
- for i in range(total):
113
- idx = (last_used_index + i) % total
114
  key = OLLAMA_KEYS[idx]
115
- st = key_status[key]
116
 
117
- if st["healthy"] and not st["busy"]:
118
- st["busy"] = True
119
- last_used_index = idx + 1
120
  return key
121
 
122
  return None
123
 
124
-
125
  def release_key(key):
126
  if key in key_status:
127
  key_status[key]["busy"] = False
128
 
 
 
 
 
 
 
 
129
 
130
  # ==========================================================
131
  # ROOT
@@ -134,71 +99,46 @@ def release_key(key):
134
  def root():
135
  return {
136
  "status": "ok",
137
- "keys": len(OLLAMA_KEYS),
138
  "base_url": BASE_URL
139
  }
140
 
141
-
142
  # ==========================================================
143
  # MODELS
144
  # ==========================================================
145
  @app.get("/v1/models")
146
  async def models(req: Request):
147
- if not check_auth(req):
148
- return JSONResponse(
149
- {"error": "Unauthorized"},
150
- status_code=401
151
- )
152
 
153
  key = OLLAMA_KEYS[0]
154
 
155
- try:
156
- async with httpx.AsyncClient(timeout=60) as client:
157
- r = await client.get(
158
- f"{BASE_URL}/v1/models",
159
- headers={
160
- "Authorization":
161
- f"Bearer {key}"
162
- }
163
- )
164
-
165
- return Response(
166
- content=r.content,
167
- media_type="application/json"
168
- )
169
-
170
- except Exception as e:
171
- return JSONResponse(
172
- {"error": str(e)},
173
- status_code=500
174
  )
175
 
 
 
 
 
176
 
177
  # ==========================================================
178
- # OPENAI ENDPOINT
179
  # ==========================================================
180
  @app.post("/v1/chat/completions")
181
- async def chat(req: Request):
182
- if not check_auth(req):
183
- return JSONResponse(
184
- {"error": "Unauthorized"},
185
- status_code=401
186
- )
187
 
188
  try:
189
  body = await req.json()
190
-
191
- except ClientDisconnect:
192
- return Response(status_code=499)
193
-
194
  except:
195
- return JSONResponse(
196
- {"error": "Invalid JSON"},
197
- status_code=400
198
- )
199
 
200
- model = body.get("model", "")
201
- body["model"] = map_model(model)
202
 
203
  is_stream = body.get("stream", False)
204
 
@@ -212,47 +152,29 @@ async def chat(req: Request):
212
  key = get_key()
213
 
214
  if not key:
215
- await asyncio.sleep(0.3)
216
  continue
217
 
218
  try:
219
- async with httpx.AsyncClient(
220
- timeout=180
221
- ) as client:
222
-
223
  r = await client.post(
224
  f"{BASE_URL}/v1/chat/completions",
225
  json=body,
226
- headers={
227
- "Authorization":
228
- f"Bearer {key}"
229
- }
230
  )
231
 
232
- if r.status_code == 200:
233
- key_status[key]["success"] += 1
234
-
235
- return Response(
236
- content=r.content,
237
- media_type=r.headers.get(
238
- "content-type",
239
- "application/json"
240
- )
241
- )
242
-
243
- else:
244
- key_status[key]["fail"] += 1
245
 
246
- except Exception:
247
- key_status[key]["fail"] += 1
248
 
249
  finally:
250
  release_key(key)
251
 
252
- return JSONResponse(
253
- {"error": "All keys failed"},
254
- status_code=500
255
- )
256
 
257
  # ------------------------------------------------------
258
  # STREAM
@@ -264,71 +186,50 @@ async def chat(req: Request):
264
  key = get_key()
265
 
266
  if not key:
267
- await asyncio.sleep(0.3)
268
  continue
269
 
270
  try:
271
- timeout = httpx.Timeout(
272
- connect=15,
273
- read=None,
274
- write=15,
275
- pool=10
276
- )
277
 
278
- async with httpx.AsyncClient(
279
- timeout=timeout
280
- ) as client:
281
 
282
  async with client.stream(
283
  "POST",
284
  f"{BASE_URL}/v1/chat/completions",
285
  json=body,
286
- headers={
287
- "Authorization":
288
- f"Bearer {key}"
289
- }
290
  ) as r:
291
 
292
- if r.status_code != 200:
293
- key_status[key]["fail"] += 1
294
- continue
295
-
296
  async for chunk in r.aiter_bytes():
297
- if chunk:
298
- yield chunk
299
 
300
- key_status[key]["success"] += 1
301
  return
302
 
303
- except Exception:
304
- key_status[key]["fail"] += 1
305
 
306
  finally:
307
  release_key(key)
308
 
309
- yield b'data: {"error":"all keys failed"}\n\n'
310
-
311
- return StreamingResponse(
312
- gen(),
313
- media_type="text/event-stream"
314
- )
315
 
 
316
 
317
  # ==========================================================
318
- # ANTHROPIC CONVERTER
319
  # ==========================================================
320
- def to_anthropic(resp, original_model):
321
 
322
  text = ""
323
 
324
- if "choices" in resp:
325
- try:
326
- text = resp["choices"][0]["message"]["content"]
327
- except:
328
- text = ""
329
-
330
- elif "message" in resp:
331
- text = resp["message"].get("content", "")
332
 
333
  return {
334
  "id": f"msg_{uuid.uuid4().hex[:10]}",
@@ -340,7 +241,7 @@ def to_anthropic(resp, original_model):
340
  "text": text
341
  }
342
  ],
343
- "model": original_model,
344
  "stop_reason": "end_turn",
345
  "stop_sequence": None,
346
  "usage": {
@@ -349,17 +250,39 @@ def to_anthropic(resp, original_model):
349
  }
350
  }
351
 
352
-
353
  # ==========================================================
354
- # ANTHROPIC STREAM
355
  # ==========================================================
356
  async def anthropic_stream(lines, model):
357
 
358
  msg_id = f"msg_{uuid.uuid4().hex[:10]}"
359
 
360
- yield f"data: {json.dumps({'type':'message_start','message':{'id':msg_id,'type':'message','role':'assistant','model':model,'content':[],'stop_reason':None,'stop_sequence':None,'usage':{'input_tokens':0,'output_tokens':0}})}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
- yield f"data: {json.dumps({'type':'content_block_start','index':0,'content_block':{'type':'text'}})}\n\n"
 
 
 
 
 
 
 
 
363
 
364
  async for line in lines:
365
 
@@ -381,66 +304,71 @@ async def anthropic_stream(lines, model):
381
 
382
  text = ""
383
 
384
- if "choices" in data:
385
- try:
386
  delta = data["choices"][0]["delta"]
387
  text = delta.get("content", "")
388
 
389
  if not text:
390
  text = delta.get("reasoning", "")
391
- except:
392
- pass
393
 
394
- elif "message" in data:
395
- text = data["message"].get(
396
- "content", ""
397
- )
398
 
399
  if text:
400
- yield f"data: {json.dumps({'type':'content_block_delta','index':0,'delta':{'type':'text_delta','text':text}})}\n\n"
 
 
 
 
 
 
 
401
 
402
- yield f"data: {json.dumps({'type':'content_block_stop','index':0})}\n\n"
403
 
404
- yield f"data: {json.dumps({'type':'message_delta','delta':{'stop_reason':'end_turn','stop_sequence':None},'usage':{'output_tokens':0}})}\n\n"
 
 
 
405
 
406
- yield f"data: {json.dumps({'type':'message_stop'})}\n\n"
 
 
 
 
 
 
 
 
 
407
 
 
 
 
408
 
409
  # ==========================================================
410
- # ANTHROPIC ENDPOINT
411
  # ==========================================================
412
  @app.post("/v1/messages")
413
- async def messages(req: Request):
414
 
415
- if not check_auth(req):
416
- return JSONResponse(
417
- {"error": "Unauthorized"},
418
- status_code=401
419
- )
420
 
421
  try:
422
  body = await req.json()
423
-
424
- except ClientDisconnect:
425
- return Response(status_code=499)
426
-
427
  except:
428
- return JSONResponse(
429
- {"error": "Invalid JSON"},
430
- status_code=400
431
- )
432
-
433
- original_model = body.get(
434
- "model",
435
- "claude-opus-4-7"
436
- )
437
 
438
- backend_model = map_model(original_model)
439
 
440
  payload = {
441
- "model": backend_model,
442
- "stream": body.get("stream", False),
443
- "messages": body.get("messages", [])
444
  }
445
 
446
  is_stream = body.get("stream", False)
@@ -455,33 +383,22 @@ async def messages(req: Request):
455
  key = get_key()
456
 
457
  if not key:
458
- await asyncio.sleep(0.3)
459
  continue
460
 
461
  try:
462
- async with httpx.AsyncClient(
463
- timeout=180
464
- ) as client:
465
-
466
  r = await client.post(
467
  f"{BASE_URL}/v1/chat/completions",
468
  json=payload,
469
- headers={
470
- "Authorization":
471
- f"Bearer {key}"
472
- }
473
  )
474
 
475
- if r.status_code == 200:
476
-
477
- data = r.json()
478
 
479
- return JSONResponse(
480
- to_anthropic(
481
- data,
482
- original_model
483
- )
484
- )
485
 
486
  except:
487
  pass
@@ -489,10 +406,7 @@ async def messages(req: Request):
489
  finally:
490
  release_key(key)
491
 
492
- return JSONResponse(
493
- {"error": "All keys failed"},
494
- status_code=500
495
- )
496
 
497
  # ------------------------------------------------------
498
  # STREAM
@@ -504,39 +418,26 @@ async def messages(req: Request):
504
  key = get_key()
505
 
506
  if not key:
507
- await asyncio.sleep(0.3)
508
  continue
509
 
510
  try:
511
- timeout = httpx.Timeout(
512
- connect=15,
513
- read=None,
514
- write=15,
515
- pool=10
516
- )
517
 
518
- async with httpx.AsyncClient(
519
- timeout=timeout
520
- ) as client:
521
 
522
  async with client.stream(
523
  "POST",
524
  f"{BASE_URL}/v1/chat/completions",
525
  json=payload,
526
- headers={
527
- "Authorization":
528
- f"Bearer {key}"
529
- }
530
  ) as r:
531
 
532
- if r.status_code != 200:
533
- continue
534
-
535
- async for x in anthropic_stream(
536
  r.aiter_lines(),
537
  original_model
538
  ):
539
- yield x
540
 
541
  return
542
 
@@ -546,9 +447,6 @@ async def messages(req: Request):
546
  finally:
547
  release_key(key)
548
 
549
- yield 'data: {"error":"all keys failed"}\n\n'
550
 
551
- return StreamingResponse(
552
- gen(),
553
- media_type="text/event-stream"
554
- )
 
1
+ # app.py
2
  # ==========================================================
3
+ # FULL FIXED VERSION
4
+ # OpenAI + Anthropic Proxy
5
+ # HuggingFace Spaces Ready
 
 
 
6
  # ==========================================================
7
 
8
  import os
 
9
  import json
10
+ import time
11
  import uuid
12
  import asyncio
13
  import httpx
14
 
15
  from fastapi import FastAPI, Request
16
+ from fastapi.responses import JSONResponse, Response, StreamingResponse
 
 
 
 
17
  from starlette.requests import ClientDisconnect
18
 
19
  app = FastAPI()
 
21
  # ==========================================================
22
  # CONFIG
23
  # ==========================================================
24
+ BASE_URL = os.getenv("BASE_URL", "https://elysiadev11-proxyollma.hf.space")
25
+ MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
 
 
 
 
 
 
 
26
 
27
  # ==========================================================
28
  # MODEL MAP
 
34
  "claude-haiku-4-5": "minimax-m2.7:cloud",
35
  }
36
 
37
+ def map_model(name):
38
+ return MODEL_MAP.get(name, name)
 
 
39
 
40
  # ==========================================================
41
  # LOAD KEYS
 
43
  OLLAMA_KEYS = []
44
 
45
  for i in range(1, 101):
46
+ k = os.getenv(f"OLLAMA_KEY_{i}")
47
+ if k:
48
+ OLLAMA_KEYS.append(k)
49
 
50
  if not OLLAMA_KEYS:
51
+ OLLAMA_KEYS.append("dummy")
 
52
 
53
  # ==========================================================
54
+ # KEY MANAGER
55
  # ==========================================================
56
+ last_index = 0
57
 
58
  key_status = {}
59
 
60
+ for i, k in enumerate(OLLAMA_KEYS, 1):
61
+ key_status[k] = {
 
62
  "busy": False,
63
+ "ok": True,
64
+ "index": i
 
65
  }
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def get_key():
68
+ global last_index
69
 
70
  total = len(OLLAMA_KEYS)
71
 
72
+ for x in range(total):
73
+ idx = (last_index + x) % total
74
  key = OLLAMA_KEYS[idx]
 
75
 
76
+ if not key_status[key]["busy"]:
77
+ key_status[key]["busy"] = True
78
+ last_index = idx + 1
79
  return key
80
 
81
  return None
82
 
 
83
  def release_key(key):
84
  if key in key_status:
85
  key_status[key]["busy"] = False
86
 
87
+ # ==========================================================
88
+ # AUTH
89
+ # ==========================================================
90
+ def authorized(req: Request):
91
+ token = req.headers.get("Authorization", "")
92
+ token = token.replace("Bearer ", "")
93
+ return token == MASTER_API_KEY
94
 
95
  # ==========================================================
96
  # ROOT
 
99
  def root():
100
  return {
101
  "status": "ok",
102
+ "keys_loaded": len(OLLAMA_KEYS),
103
  "base_url": BASE_URL
104
  }
105
 
 
106
  # ==========================================================
107
  # MODELS
108
  # ==========================================================
109
  @app.get("/v1/models")
110
  async def models(req: Request):
111
+ if not authorized(req):
112
+ return JSONResponse({"error": "Unauthorized"}, status_code=401)
 
 
 
113
 
114
  key = OLLAMA_KEYS[0]
115
 
116
+ async with httpx.AsyncClient(timeout=60) as client:
117
+ r = await client.get(
118
+ f"{BASE_URL}/v1/models",
119
+ headers={"Authorization": f"Bearer {key}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  )
121
 
122
+ return Response(
123
+ content=r.content,
124
+ media_type="application/json"
125
+ )
126
 
127
  # ==========================================================
128
+ # OPENAI CHAT
129
  # ==========================================================
130
  @app.post("/v1/chat/completions")
131
+ async def openai_chat(req: Request):
132
+
133
+ if not authorized(req):
134
+ return JSONResponse({"error": "Unauthorized"}, status_code=401)
 
 
135
 
136
  try:
137
  body = await req.json()
 
 
 
 
138
  except:
139
+ return JSONResponse({"error": "Invalid JSON"}, status_code=400)
 
 
 
140
 
141
+ body["model"] = map_model(body.get("model", ""))
 
142
 
143
  is_stream = body.get("stream", False)
144
 
 
152
  key = get_key()
153
 
154
  if not key:
155
+ await asyncio.sleep(0.2)
156
  continue
157
 
158
  try:
159
+ async with httpx.AsyncClient(timeout=180) as client:
 
 
 
160
  r = await client.post(
161
  f"{BASE_URL}/v1/chat/completions",
162
  json=body,
163
+ headers={"Authorization": f"Bearer {key}"}
 
 
 
164
  )
165
 
166
+ return Response(
167
+ content=r.content,
168
+ media_type=r.headers.get("content-type", "application/json")
169
+ )
 
 
 
 
 
 
 
 
 
170
 
171
+ except:
172
+ pass
173
 
174
  finally:
175
  release_key(key)
176
 
177
+ return JSONResponse({"error": "All keys failed"}, status_code=500)
 
 
 
178
 
179
  # ------------------------------------------------------
180
  # STREAM
 
186
  key = get_key()
187
 
188
  if not key:
189
+ await asyncio.sleep(0.2)
190
  continue
191
 
192
  try:
193
+ timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
 
 
 
 
 
194
 
195
+ async with httpx.AsyncClient(timeout=timeout) as client:
 
 
196
 
197
  async with client.stream(
198
  "POST",
199
  f"{BASE_URL}/v1/chat/completions",
200
  json=body,
201
+ headers={"Authorization": f"Bearer {key}"}
 
 
 
202
  ) as r:
203
 
 
 
 
 
204
  async for chunk in r.aiter_bytes():
205
+ yield chunk
 
206
 
 
207
  return
208
 
209
+ except:
210
+ pass
211
 
212
  finally:
213
  release_key(key)
214
 
215
+ yield b'data: {"error":"failed"}\n\n'
 
 
 
 
 
216
 
217
+ return StreamingResponse(gen(), media_type="text/event-stream")
218
 
219
  # ==========================================================
220
+ # ANTHROPIC RESPONSE CONVERTER
221
  # ==========================================================
222
+ def to_anthropic(data, model_name):
223
 
224
  text = ""
225
 
226
+ try:
227
+ if "choices" in data:
228
+ text = data["choices"][0]["message"]["content"]
229
+ elif "message" in data:
230
+ text = data["message"]["content"]
231
+ except:
232
+ pass
 
233
 
234
  return {
235
  "id": f"msg_{uuid.uuid4().hex[:10]}",
 
241
  "text": text
242
  }
243
  ],
244
+ "model": model_name,
245
  "stop_reason": "end_turn",
246
  "stop_sequence": None,
247
  "usage": {
 
250
  }
251
  }
252
 
 
253
  # ==========================================================
254
+ # ANTHROPIC STREAM CONVERTER
255
  # ==========================================================
256
  async def anthropic_stream(lines, model):
257
 
258
  msg_id = f"msg_{uuid.uuid4().hex[:10]}"
259
 
260
+ start_payload = {
261
+ "type": "message_start",
262
+ "message": {
263
+ "id": msg_id,
264
+ "type": "message",
265
+ "role": "assistant",
266
+ "model": model,
267
+ "content": [],
268
+ "stop_reason": None,
269
+ "stop_sequence": None,
270
+ "usage": {
271
+ "input_tokens": 0,
272
+ "output_tokens": 0
273
+ }
274
+ }
275
+ }
276
 
277
+ yield "data: " + json.dumps(start_payload) + "\n\n"
278
+
279
+ yield "data: " + json.dumps({
280
+ "type": "content_block_start",
281
+ "index": 0,
282
+ "content_block": {
283
+ "type": "text"
284
+ }
285
+ }) + "\n\n"
286
 
287
  async for line in lines:
288
 
 
304
 
305
  text = ""
306
 
307
+ try:
308
+ if "choices" in data:
309
  delta = data["choices"][0]["delta"]
310
  text = delta.get("content", "")
311
 
312
  if not text:
313
  text = delta.get("reasoning", "")
 
 
314
 
315
+ elif "message" in data:
316
+ text = data["message"].get("content", "")
317
+ except:
318
+ pass
319
 
320
  if text:
321
+ payload = {
322
+ "type": "content_block_delta",
323
+ "index": 0,
324
+ "delta": {
325
+ "type": "text_delta",
326
+ "text": text
327
+ }
328
+ }
329
 
330
+ yield "data: " + json.dumps(payload) + "\n\n"
331
 
332
+ yield "data: " + json.dumps({
333
+ "type": "content_block_stop",
334
+ "index": 0
335
+ }) + "\n\n"
336
 
337
+ yield "data: " + json.dumps({
338
+ "type": "message_delta",
339
+ "delta": {
340
+ "stop_reason": "end_turn",
341
+ "stop_sequence": None
342
+ },
343
+ "usage": {
344
+ "output_tokens": 0
345
+ }
346
+ }) + "\n\n"
347
 
348
+ yield "data: " + json.dumps({
349
+ "type": "message_stop"
350
+ }) + "\n\n"
351
 
352
  # ==========================================================
353
+ # ANTHROPIC CHAT
354
  # ==========================================================
355
  @app.post("/v1/messages")
356
+ async def anthropic_chat(req: Request):
357
 
358
+ if not authorized(req):
359
+ return JSONResponse({"error": "Unauthorized"}, status_code=401)
 
 
 
360
 
361
  try:
362
  body = await req.json()
 
 
 
 
363
  except:
364
+ return JSONResponse({"error": "Invalid JSON"}, status_code=400)
 
 
 
 
 
 
 
 
365
 
366
+ original_model = body.get("model", "claude-opus-4-7")
367
 
368
  payload = {
369
+ "model": map_model(original_model),
370
+ "messages": body.get("messages", []),
371
+ "stream": body.get("stream", False)
372
  }
373
 
374
  is_stream = body.get("stream", False)
 
383
  key = get_key()
384
 
385
  if not key:
386
+ await asyncio.sleep(0.2)
387
  continue
388
 
389
  try:
390
+ async with httpx.AsyncClient(timeout=180) as client:
 
 
 
391
  r = await client.post(
392
  f"{BASE_URL}/v1/chat/completions",
393
  json=payload,
394
+ headers={"Authorization": f"Bearer {key}"}
 
 
 
395
  )
396
 
397
+ data = r.json()
 
 
398
 
399
+ return JSONResponse(
400
+ to_anthropic(data, original_model)
401
+ )
 
 
 
402
 
403
  except:
404
  pass
 
406
  finally:
407
  release_key(key)
408
 
409
+ return JSONResponse({"error": "All keys failed"}, status_code=500)
 
 
 
410
 
411
  # ------------------------------------------------------
412
  # STREAM
 
418
  key = get_key()
419
 
420
  if not key:
421
+ await asyncio.sleep(0.2)
422
  continue
423
 
424
  try:
425
+ timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
 
 
 
 
 
426
 
427
+ async with httpx.AsyncClient(timeout=timeout) as client:
 
 
428
 
429
  async with client.stream(
430
  "POST",
431
  f"{BASE_URL}/v1/chat/completions",
432
  json=payload,
433
+ headers={"Authorization": f"Bearer {key}"}
 
 
 
434
  ) as r:
435
 
436
+ async for chunk in anthropic_stream(
 
 
 
437
  r.aiter_lines(),
438
  original_model
439
  ):
440
+ yield chunk
441
 
442
  return
443
 
 
447
  finally:
448
  release_key(key)
449
 
450
+ yield 'data: {"error":"failed"}\n\n'
451
 
452
+ return StreamingResponse(gen(), media_type="text/event-stream")