Elysiadev11 commited on
Commit
f6549ae
·
verified ·
1 Parent(s): 8df83b1

Update proxy_cerebras.py

Browse files
Files changed (1) hide show
  1. proxy_cerebras.py +217 -280
proxy_cerebras.py CHANGED
@@ -1,9 +1,18 @@
1
  # app.py
2
- # ==========================================================
3
- # FULL FIXED VERSION
4
- # OpenAI + Anthropic Proxy
5
- # HuggingFace Spaces Ready
6
- # ==========================================================
 
 
 
 
 
 
 
 
 
7
 
8
  import os
9
  import json
@@ -18,28 +27,15 @@ from starlette.requests import ClientDisconnect
18
 
19
  app = FastAPI()
20
 
21
- # ==========================================================
22
  # CONFIG
23
- # ==========================================================
24
- BASE_URL = os.getenv("BASE_URL", "https://elysiadev11-proxyollma.hf.space")
25
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
26
 
27
- # ==========================================================
28
- # MODEL MAP
29
- # ==========================================================
30
- MODEL_MAP = {
31
- "claude-opus-4-7": "minimax-m2.7:cloud",
32
- "claude-opus-4-6": "minimax-m2.7:cloud",
33
- "claude-sonnet-4-6": "minimax-m2.7:cloud",
34
- "claude-haiku-4-5": "minimax-m2.7:cloud",
35
- }
36
-
37
- def map_model(name):
38
- return MODEL_MAP.get(name, name)
39
-
40
- # ==========================================================
41
  # LOAD KEYS
42
- # ==========================================================
43
  OLLAMA_KEYS = []
44
 
45
  for i in range(1, 101):
@@ -50,111 +46,148 @@ for i in range(1, 101):
50
  if not OLLAMA_KEYS:
51
  OLLAMA_KEYS.append("dummy")
52
 
53
- # ==========================================================
54
- # KEY MANAGER
55
- # ==========================================================
56
- last_index = 0
57
 
58
  key_status = {}
59
-
60
- for i, k in enumerate(OLLAMA_KEYS, 1):
61
  key_status[k] = {
62
- "busy": False,
63
- "ok": True,
64
- "index": i
 
 
 
65
  }
66
 
67
- def get_key():
68
- global last_index
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- total = len(OLLAMA_KEYS)
 
 
 
 
 
71
 
72
- for x in range(total):
73
- idx = (last_index + x) % total
 
 
 
 
 
 
74
  key = OLLAMA_KEYS[idx]
 
75
 
76
- if not key_status[key]["busy"]:
77
- key_status[key]["busy"] = True
78
- last_index = idx + 1
79
  return key
80
 
81
  return None
82
 
83
  def release_key(key):
84
  if key in key_status:
85
- key_status[key]["busy"] = False
86
-
87
- # ==========================================================
88
- # AUTH
89
- # ==========================================================
90
- def authorized(req: Request):
91
- token = req.headers.get("Authorization", "")
92
- token = token.replace("Bearer ", "")
93
- return token == MASTER_API_KEY
94
 
95
- # ==========================================================
96
- # ROOT
97
- # ==========================================================
98
  @app.get("/")
99
  def root():
100
  return {
101
  "status": "ok",
102
- "keys_loaded": len(OLLAMA_KEYS),
103
- "base_url": BASE_URL
 
 
 
 
 
 
 
 
104
  }
105
 
106
- # ==========================================================
107
  # MODELS
108
- # ==========================================================
109
  @app.get("/v1/models")
110
  async def models(req: Request):
111
- if not authorized(req):
112
- return JSONResponse({"error": "Unauthorized"}, status_code=401)
113
 
114
  key = OLLAMA_KEYS[0]
115
 
116
  async with httpx.AsyncClient(timeout=60) as client:
117
  r = await client.get(
118
- f"{BASE_URL}/v1/models",
119
  headers={"Authorization": f"Bearer {key}"}
120
  )
121
 
122
- return Response(
123
- content=r.content,
124
- media_type="application/json"
125
- )
126
 
127
- # ==========================================================
128
- # OPENAI CHAT
129
- # ==========================================================
130
- @app.post("/v1/chat/completions")
131
- async def openai_chat(req: Request):
 
 
 
 
 
 
 
132
 
133
- if not authorized(req):
134
- return JSONResponse({"error": "Unauthorized"}, status_code=401)
 
 
 
 
 
 
 
135
 
136
  try:
137
  body = await req.json()
138
  except:
139
- return JSONResponse({"error": "Invalid JSON"}, status_code=400)
140
 
141
- body["model"] = map_model(body.get("model", ""))
142
 
143
- is_stream = body.get("stream", False)
144
 
145
- # ------------------------------------------------------
146
  # NON STREAM
147
- # ------------------------------------------------------
148
- if not is_stream:
149
-
150
  for _ in range(len(OLLAMA_KEYS)):
151
-
152
- key = get_key()
153
-
154
  if not key:
155
- await asyncio.sleep(0.2)
156
  continue
157
 
 
 
158
  try:
159
  async with httpx.AsyncClient(timeout=180) as client:
160
  r = await client.post(
@@ -163,37 +196,41 @@ async def openai_chat(req: Request):
163
  headers={"Authorization": f"Bearer {key}"}
164
  )
165
 
166
- return Response(
167
- content=r.content,
168
- media_type=r.headers.get("content-type", "application/json")
169
- )
 
 
 
 
 
170
 
171
- except:
172
- pass
 
 
 
173
 
174
  finally:
175
  release_key(key)
176
 
177
- return JSONResponse({"error": "All keys failed"}, status_code=500)
178
 
179
- # ------------------------------------------------------
180
  # STREAM
181
- # ------------------------------------------------------
182
  async def gen():
183
-
184
  for _ in range(len(OLLAMA_KEYS)):
185
-
186
  key = get_key()
187
-
188
  if not key:
189
- await asyncio.sleep(0.2)
190
  continue
191
 
192
  try:
193
  timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
194
 
195
  async with httpx.AsyncClient(timeout=timeout) as client:
196
-
197
  async with client.stream(
198
  "POST",
199
  f"{BASE_URL}/v1/chat/completions",
@@ -201,252 +238,152 @@ async def openai_chat(req: Request):
201
  headers={"Authorization": f"Bearer {key}"}
202
  ) as r:
203
 
204
- async for chunk in r.aiter_bytes():
205
- yield chunk
 
 
 
 
 
206
 
 
207
  return
208
 
209
- except:
210
- pass
 
211
 
212
  finally:
213
  release_key(key)
214
 
215
- yield b'data: {"error":"failed"}\n\n'
216
 
217
  return StreamingResponse(gen(), media_type="text/event-stream")
218
 
219
- # ==========================================================
220
- # ANTHROPIC RESPONSE CONVERTER
221
- # ==========================================================
222
- def to_anthropic(data, model_name):
223
-
224
- text = ""
225
-
226
- try:
227
- if "choices" in data:
228
- text = data["choices"][0]["message"]["content"]
229
- elif "message" in data:
230
- text = data["message"]["content"]
231
- except:
232
- pass
233
-
234
- return {
235
- "id": f"msg_{uuid.uuid4().hex[:10]}",
236
- "type": "message",
237
- "role": "assistant",
238
- "content": [
239
- {
240
- "type": "text",
241
- "text": text
242
- }
243
- ],
244
- "model": model_name,
245
- "stop_reason": "end_turn",
246
- "stop_sequence": None,
247
- "usage": {
248
- "input_tokens": 0,
249
- "output_tokens": 0
250
- }
251
- }
252
-
253
- # ==========================================================
254
- # ANTHROPIC STREAM CONVERTER
255
- # ==========================================================
256
- async def anthropic_stream(lines, model):
257
-
258
- msg_id = f"msg_{uuid.uuid4().hex[:10]}"
259
-
260
- start_payload = {
261
- "type": "message_start",
262
- "message": {
263
- "id": msg_id,
264
- "type": "message",
265
- "role": "assistant",
266
- "model": model,
267
- "content": [],
268
- "stop_reason": None,
269
- "stop_sequence": None,
270
- "usage": {
271
- "input_tokens": 0,
272
- "output_tokens": 0
273
- }
274
- }
275
- }
276
-
277
- yield "data: " + json.dumps(start_payload) + "\n\n"
278
-
279
- yield "data: " + json.dumps({
280
- "type": "content_block_start",
281
- "index": 0,
282
- "content_block": {
283
- "type": "text"
284
- }
285
- }) + "\n\n"
286
-
287
- async for line in lines:
288
-
289
- if not line:
290
- continue
291
-
292
- if not line.startswith("data: "):
293
- continue
294
-
295
- raw = line[6:].strip()
296
-
297
- if raw == "[DONE]":
298
- break
299
-
300
- try:
301
- data = json.loads(raw)
302
- except:
303
- continue
304
-
305
- text = ""
306
-
307
- try:
308
- if "choices" in data:
309
- delta = data["choices"][0]["delta"]
310
- text = delta.get("content", "")
311
-
312
- if not text:
313
- text = delta.get("reasoning", "")
314
-
315
- elif "message" in data:
316
- text = data["message"].get("content", "")
317
- except:
318
- pass
319
-
320
- if text:
321
- payload = {
322
- "type": "content_block_delta",
323
- "index": 0,
324
- "delta": {
325
- "type": "text_delta",
326
- "text": text
327
- }
328
- }
329
-
330
- yield "data: " + json.dumps(payload) + "\n\n"
331
-
332
- yield "data: " + json.dumps({
333
- "type": "content_block_stop",
334
- "index": 0
335
- }) + "\n\n"
336
-
337
- yield "data: " + json.dumps({
338
- "type": "message_delta",
339
- "delta": {
340
- "stop_reason": "end_turn",
341
- "stop_sequence": None
342
- },
343
- "usage": {
344
- "output_tokens": 0
345
- }
346
- }) + "\n\n"
347
-
348
- yield "data: " + json.dumps({
349
- "type": "message_stop"
350
- }) + "\n\n"
351
-
352
- # ==========================================================
353
- # ANTHROPIC CHAT
354
- # ==========================================================
355
  @app.post("/v1/messages")
356
- async def anthropic_chat(req: Request):
357
-
358
- if not authorized(req):
359
- return JSONResponse({"error": "Unauthorized"}, status_code=401)
360
 
361
  try:
362
  body = await req.json()
363
  except:
364
- return JSONResponse({"error": "Invalid JSON"}, status_code=400)
365
 
366
- original_model = body.get("model", "claude-opus-4-7")
367
 
368
- payload = {
369
- "model": map_model(original_model),
370
  "messages": body.get("messages", []),
371
- "stream": body.get("stream", False)
372
  }
373
 
374
- is_stream = body.get("stream", False)
375
-
376
- # ------------------------------------------------------
377
  # NON STREAM
378
- # ------------------------------------------------------
379
- if not is_stream:
 
 
380
 
381
- for _ in range(len(OLLAMA_KEYS)):
382
-
383
- key = get_key()
384
 
 
 
385
  if not key:
386
- await asyncio.sleep(0.2)
387
  continue
388
 
 
 
389
  try:
390
  async with httpx.AsyncClient(timeout=180) as client:
391
  r = await client.post(
392
  f"{BASE_URL}/v1/chat/completions",
393
- json=payload,
394
  headers={"Authorization": f"Bearer {key}"}
395
  )
396
 
397
- data = r.json()
398
-
399
- return JSONResponse(
400
- to_anthropic(data, original_model)
401
- )
402
-
403
- except:
404
- pass
 
 
 
 
 
 
 
 
405
 
406
  finally:
407
  release_key(key)
408
 
409
- return JSONResponse({"error": "All keys failed"}, status_code=500)
410
 
411
- # ------------------------------------------------------
412
  # STREAM
413
- # ------------------------------------------------------
414
  async def gen():
 
415
 
416
- for _ in range(len(OLLAMA_KEYS)):
 
417
 
418
- key = get_key()
419
 
 
 
420
  if not key:
421
- await asyncio.sleep(0.2)
422
  continue
423
 
 
 
424
  try:
425
  timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
426
 
427
  async with httpx.AsyncClient(timeout=timeout) as client:
428
-
429
  async with client.stream(
430
  "POST",
431
  f"{BASE_URL}/v1/chat/completions",
432
- json=payload,
433
  headers={"Authorization": f"Bearer {key}"}
434
  ) as r:
435
 
436
- async for chunk in anthropic_stream(
437
- r.aiter_lines(),
438
- original_model
439
- ):
440
- yield chunk
441
 
442
- return
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
- except:
445
- pass
446
 
447
  finally:
448
  release_key(key)
449
 
450
- yield 'data: {"error":"failed"}\n\n'
 
 
451
 
452
  return StreamingResponse(gen(), media_type="text/event-stream")
 
1
  # app.py
2
+ # FULL RESTORE VERSION
3
+ # Semua fitur utama dibalikin:
4
+ # Multi key rotate
5
+ # Round robin
6
+ # ✅ Key lock
7
+ # ✅ Dashboard /
8
+ # ✅ /v1/models
9
+ # ✅ /v1/chat/completions
10
+ # ✅ /v1/messages
11
+ # ✅ Stream OpenAI
12
+ # ✅ Stream Anthropic
13
+ # ✅ Claude -> Ollama model map
14
+ # ✅ Retry jika key limit
15
+ # ✅ Health monitor
16
 
17
  import os
18
  import json
 
27
 
28
  app = FastAPI()
29
 
30
+ # =========================================================
31
  # CONFIG
32
+ # =========================================================
33
+ BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
34
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
35
 
36
+ # =========================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # LOAD KEYS
38
+ # =========================================================
39
  OLLAMA_KEYS = []
40
 
41
  for i in range(1, 101):
 
46
  if not OLLAMA_KEYS:
47
  OLLAMA_KEYS.append("dummy")
48
 
49
+ # =========================================================
50
+ # STATUS
51
+ # =========================================================
52
+ last_used_index = 0
53
 
54
  key_status = {}
55
+ for idx, k in enumerate(OLLAMA_KEYS, 1):
 
56
  key_status[k] = {
57
+ "index": idx,
58
+ "prefix": k[:8] + "...",
59
+ "success": 0,
60
+ "failures": 0,
61
+ "healthy": True,
62
+ "in_use": False
63
  }
64
 
65
+ # =========================================================
66
+ # MODEL MAP
67
+ # =========================================================
68
+ MODEL_MAP = {
69
+ "claude-opus-4-7": "minimax-m2.7:cloud",
70
+ "claude-sonnet-4-6": "minimax-m2.7:cloud",
71
+ "claude-haiku-4-5": "minimax-m2.7:cloud"
72
+ }
73
+
74
+ # =========================================================
75
+ # UTILS
76
+ # =========================================================
77
+ def log(msg):
78
+ print(f"[{time.strftime('%H:%M:%S')}] {msg}")
79
 
80
+ def auth_ok(req: Request):
81
+ token = req.headers.get("Authorization", "").replace("Bearer ", "")
82
+ return token == MASTER_API_KEY
83
+
84
+ def map_model(name):
85
+ return MODEL_MAP.get(name, "minimax-m2.7:cloud")
86
 
87
+ def get_key(exclude=None):
88
+ global last_used_index
89
+
90
+ if exclude is None:
91
+ exclude = set()
92
+
93
+ for i in range(len(OLLAMA_KEYS)):
94
+ idx = (last_used_index + i) % len(OLLAMA_KEYS)
95
  key = OLLAMA_KEYS[idx]
96
+ st = key_status[key]
97
 
98
+ if st["healthy"] and not st["in_use"] and key not in exclude:
99
+ st["in_use"] = True
100
+ last_used_index = idx + 1
101
  return key
102
 
103
  return None
104
 
105
  def release_key(key):
106
  if key in key_status:
107
+ key_status[key]["in_use"] = False
 
 
 
 
 
 
 
 
108
 
109
+ # =========================================================
110
+ # ROOT DASHBOARD
111
+ # =========================================================
112
  @app.get("/")
113
  def root():
114
  return {
115
  "status": "ok",
116
+ "total_keys": len(OLLAMA_KEYS),
117
+ "keys": {
118
+ v["prefix"]: {
119
+ "healthy": v["healthy"],
120
+ "busy": v["in_use"],
121
+ "success": v["success"],
122
+ "failures": v["failures"]
123
+ }
124
+ for v in key_status.values()
125
+ }
126
  }
127
 
128
+ # =========================================================
129
  # MODELS
130
+ # =========================================================
131
  @app.get("/v1/models")
132
  async def models(req: Request):
133
+ if not auth_ok(req):
134
+ return JSONResponse({"error": "Unauthorized"}, 401)
135
 
136
  key = OLLAMA_KEYS[0]
137
 
138
  async with httpx.AsyncClient(timeout=60) as client:
139
  r = await client.get(
140
+ f"{BASE_URL}/api/tags",
141
  headers={"Authorization": f"Bearer {key}"}
142
  )
143
 
144
+ if r.status_code != 200:
145
+ return JSONResponse({"error": r.text}, r.status_code)
 
 
146
 
147
+ data = r.json()
148
+
149
+ models = []
150
+ now = int(time.time())
151
+
152
+ for m in data.get("models", []):
153
+ models.append({
154
+ "id": m["name"],
155
+ "object": "model",
156
+ "created": now,
157
+ "owned_by": "ollama"
158
+ })
159
 
160
+ return {"object": "list", "data": models}
161
+
162
+ # =========================================================
163
+ # OPENAI NORMAL + STREAM
164
+ # =========================================================
165
+ @app.post("/v1/chat/completions")
166
+ async def chat(req: Request):
167
+ if not auth_ok(req):
168
+ return JSONResponse({"error": "Unauthorized"}, 401)
169
 
170
  try:
171
  body = await req.json()
172
  except:
173
+ return JSONResponse({"error": "Invalid JSON"}, 400)
174
 
175
+ stream = body.get("stream", False)
176
 
177
+ tried = set()
178
 
179
+ # -----------------------------------------------------
180
  # NON STREAM
181
+ # -----------------------------------------------------
182
+ if not stream:
 
183
  for _ in range(len(OLLAMA_KEYS)):
184
+ key = get_key(tried)
 
 
185
  if not key:
186
+ await asyncio.sleep(1)
187
  continue
188
 
189
+ tried.add(key)
190
+
191
  try:
192
  async with httpx.AsyncClient(timeout=180) as client:
193
  r = await client.post(
 
196
  headers={"Authorization": f"Bearer {key}"}
197
  )
198
 
199
+ if r.status_code == 200:
200
+ key_status[key]["success"] += 1
201
+ return Response(
202
+ content=r.content,
203
+ media_type=r.headers.get("content-type")
204
+ )
205
+
206
+ if r.status_code == 429:
207
+ key_status[key]["healthy"] = False
208
 
209
+ key_status[key]["failures"] += 1
210
+
211
+ except Exception as e:
212
+ log(str(e))
213
+ key_status[key]["failures"] += 1
214
 
215
  finally:
216
  release_key(key)
217
 
218
+ return JSONResponse({"error": "All keys failed"}, 500)
219
 
220
+ # -----------------------------------------------------
221
  # STREAM
222
+ # -----------------------------------------------------
223
  async def gen():
 
224
  for _ in range(len(OLLAMA_KEYS)):
 
225
  key = get_key()
 
226
  if not key:
227
+ await asyncio.sleep(1)
228
  continue
229
 
230
  try:
231
  timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
232
 
233
  async with httpx.AsyncClient(timeout=timeout) as client:
 
234
  async with client.stream(
235
  "POST",
236
  f"{BASE_URL}/v1/chat/completions",
 
238
  headers={"Authorization": f"Bearer {key}"}
239
  ) as r:
240
 
241
+ if r.status_code == 429:
242
+ key_status[key]["healthy"] = False
243
+ continue
244
+
245
+ async for line in r.aiter_lines():
246
+ if line:
247
+ yield line + "\n\n"
248
 
249
+ key_status[key]["success"] += 1
250
  return
251
 
252
+ except Exception as e:
253
+ log(str(e))
254
+ key_status[key]["failures"] += 1
255
 
256
  finally:
257
  release_key(key)
258
 
259
+ yield 'data: {"error":"All stream keys failed"}\n\n'
260
 
261
  return StreamingResponse(gen(), media_type="text/event-stream")
262
 
263
+ # =========================================================
264
+ # ANTHROPIC
265
+ # =========================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  @app.post("/v1/messages")
267
+ async def claude(req: Request):
268
+ if not auth_ok(req):
269
+ return JSONResponse({"error": "Unauthorized"}, 401)
 
270
 
271
  try:
272
  body = await req.json()
273
  except:
274
+ return JSONResponse({"error": "Invalid JSON"}, 400)
275
 
276
+ stream = body.get("stream", False)
277
 
278
+ openai_body = {
279
+ "model": map_model(body.get("model")),
280
  "messages": body.get("messages", []),
281
+ "stream": stream
282
  }
283
 
284
+ # -----------------------------------------------------
 
 
285
  # NON STREAM
286
+ # -----------------------------------------------------
287
+ if not stream:
288
+ fake = Request(scope=req.scope)
289
+ req._body = json.dumps(openai_body).encode()
290
 
291
+ # manual call
292
+ tried = set()
 
293
 
294
+ for _ in range(len(OLLAMA_KEYS)):
295
+ key = get_key(tried)
296
  if not key:
297
+ await asyncio.sleep(1)
298
  continue
299
 
300
+ tried.add(key)
301
+
302
  try:
303
  async with httpx.AsyncClient(timeout=180) as client:
304
  r = await client.post(
305
  f"{BASE_URL}/v1/chat/completions",
306
+ json=openai_body,
307
  headers={"Authorization": f"Bearer {key}"}
308
  )
309
 
310
+ if r.status_code == 200:
311
+ data = r.json()
312
+ txt = data["choices"][0]["message"]["content"]
313
+
314
+ return {
315
+ "id": "msg_" + uuid.uuid4().hex[:10],
316
+ "type": "message",
317
+ "role": "assistant",
318
+ "content": [
319
+ {
320
+ "type": "text",
321
+ "text": txt
322
+ }
323
+ ],
324
+ "model": body.get("model")
325
+ }
326
 
327
  finally:
328
  release_key(key)
329
 
330
+ return JSONResponse({"error": "All keys failed"}, 500)
331
 
332
+ # -----------------------------------------------------
333
  # STREAM
334
+ # -----------------------------------------------------
335
  async def gen():
336
+ msg_id = "msg_" + uuid.uuid4().hex[:10]
337
 
338
+ yield f'data: {json.dumps({"type":"message_start","message":{"id":msg_id,"type":"message","role":"assistant","model":body.get("model"),"content":[],"stop_reason":None,"stop_sequence":None,"usage":{"input_tokens":0,"output_tokens":0}})}\n\n'
339
+ yield f'data: {json.dumps({"type":"content_block_start","index":0,"content_block":{"type":"text"}})}\n\n'
340
 
341
+ tried = set()
342
 
343
+ for _ in range(len(OLLAMA_KEYS)):
344
+ key = get_key(tried)
345
  if not key:
346
+ await asyncio.sleep(1)
347
  continue
348
 
349
+ tried.add(key)
350
+
351
  try:
352
  timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
353
 
354
  async with httpx.AsyncClient(timeout=timeout) as client:
 
355
  async with client.stream(
356
  "POST",
357
  f"{BASE_URL}/v1/chat/completions",
358
+ json=openai_body,
359
  headers={"Authorization": f"Bearer {key}"}
360
  ) as r:
361
 
362
+ async for line in r.aiter_lines():
363
+ if line.startswith("data: "):
364
+ raw = line[6:]
 
 
365
 
366
+ if raw == "[DONE]":
367
+ break
368
+
369
+ try:
370
+ j = json.loads(raw)
371
+ delta = j["choices"][0]["delta"]
372
+ txt = delta.get("content", "")
373
+
374
+ if txt:
375
+ yield f'data: {json.dumps({"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":txt}})}\n\n'
376
+
377
+ except:
378
+ pass
379
 
380
+ break
 
381
 
382
  finally:
383
  release_key(key)
384
 
385
+ yield f'data: {json.dumps({"type":"content_block_stop","index":0})}\n\n'
386
+ yield f'data: {json.dumps({"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":None},"usage":{"output_tokens":0}})}\n\n'
387
+ yield f'data: {json.dumps({"type":"message_stop"})}\n\n'
388
 
389
  return StreamingResponse(gen(), media_type="text/event-stream")