Elysiadev11 commited on
Commit
f1c283c
Β·
verified Β·
1 Parent(s): f6549ae

Update proxy_cerebras.py

Browse files
Files changed (1) hide show
  1. proxy_cerebras.py +265 -178
proxy_cerebras.py CHANGED
@@ -1,19 +1,3 @@
1
- # app.py
2
- # FULL RESTORE VERSION
3
- # Semua fitur utama dibalikin:
4
- # βœ… Multi key rotate
5
- # βœ… Round robin
6
- # βœ… Key lock
7
- # βœ… Dashboard /
8
- # βœ… /v1/models
9
- # βœ… /v1/chat/completions
10
- # βœ… /v1/messages
11
- # βœ… Stream OpenAI
12
- # βœ… Stream Anthropic
13
- # βœ… Claude -> Ollama model map
14
- # βœ… Retry jika key limit
15
- # βœ… Health monitor
16
-
17
  import os
18
  import json
19
  import time
@@ -27,17 +11,16 @@ from starlette.requests import ClientDisconnect
27
 
28
  app = FastAPI()
29
 
30
- # =========================================================
31
  # CONFIG
32
- # =========================================================
33
  BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
34
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
35
 
36
- # =========================================================
37
  # LOAD KEYS
38
- # =========================================================
39
  OLLAMA_KEYS = []
40
-
41
  for i in range(1, 101):
42
  k = os.getenv(f"OLLAMA_KEY_{i}")
43
  if k:
@@ -46,92 +29,89 @@ for i in range(1, 101):
46
  if not OLLAMA_KEYS:
47
  OLLAMA_KEYS.append("dummy")
48
 
49
- # =========================================================
50
- # STATUS
51
- # =========================================================
52
- last_used_index = 0
53
-
54
  key_status = {}
55
  for idx, k in enumerate(OLLAMA_KEYS, 1):
56
  key_status[k] = {
57
  "index": idx,
58
- "prefix": k[:8] + "...",
59
- "success": 0,
60
- "failures": 0,
61
  "healthy": True,
62
- "in_use": False
 
 
63
  }
64
 
65
- # =========================================================
66
- # MODEL MAP
67
- # =========================================================
68
- MODEL_MAP = {
69
- "claude-opus-4-7": "minimax-m2.7:cloud",
70
- "claude-sonnet-4-6": "minimax-m2.7:cloud",
71
- "claude-haiku-4-5": "minimax-m2.7:cloud"
72
- }
73
-
74
- # =========================================================
75
- # UTILS
76
- # =========================================================
77
- def log(msg):
78
- print(f"[{time.strftime('%H:%M:%S')}] {msg}")
79
 
80
  def auth_ok(req: Request):
81
  token = req.headers.get("Authorization", "").replace("Bearer ", "")
82
  return token == MASTER_API_KEY
83
 
84
- def map_model(name):
85
- return MODEL_MAP.get(name, "minimax-m2.7:cloud")
86
 
87
  def get_key(exclude=None):
88
- global last_used_index
89
 
90
  if exclude is None:
91
  exclude = set()
92
 
93
- for i in range(len(OLLAMA_KEYS)):
94
- idx = (last_used_index + i) % len(OLLAMA_KEYS)
95
- key = OLLAMA_KEYS[idx]
96
- st = key_status[key]
 
97
 
98
- if st["healthy"] and not st["in_use"] and key not in exclude:
99
- st["in_use"] = True
100
- last_used_index = idx + 1
101
- return key
102
 
103
  return None
104
 
105
- def release_key(key):
106
- if key in key_status:
107
- key_status[key]["in_use"] = False
108
 
109
- # =========================================================
110
- # ROOT DASHBOARD
111
- # =========================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  @app.get("/")
113
- def root():
114
  return {
115
  "status": "ok",
116
- "total_keys": len(OLLAMA_KEYS),
117
- "keys": {
118
- v["prefix"]: {
119
- "healthy": v["healthy"],
120
- "busy": v["in_use"],
121
- "success": v["success"],
122
- "failures": v["failures"]
123
- }
124
- for v in key_status.values()
125
- }
126
  }
127
 
128
- # =========================================================
129
- # MODELS
130
- # =========================================================
 
131
  @app.get("/v1/models")
132
  async def models(req: Request):
133
  if not auth_ok(req):
134
- return JSONResponse({"error": "Unauthorized"}, 401)
135
 
136
  key = OLLAMA_KEYS[0]
137
 
@@ -142,48 +122,50 @@ async def models(req: Request):
142
  )
143
 
144
  if r.status_code != 200:
145
- return JSONResponse({"error": r.text}, r.status_code)
146
 
147
  data = r.json()
148
 
149
- models = []
150
  now = int(time.time())
151
 
152
  for m in data.get("models", []):
153
- models.append({
154
- "id": m["name"],
155
  "object": "model",
156
  "created": now,
157
  "owned_by": "ollama"
158
  })
159
 
160
- return {"object": "list", "data": models}
 
161
 
162
- # =========================================================
163
- # OPENAI NORMAL + STREAM
164
- # =========================================================
165
  @app.post("/v1/chat/completions")
166
  async def chat(req: Request):
167
  if not auth_ok(req):
168
- return JSONResponse({"error": "Unauthorized"}, 401)
169
 
170
  try:
171
  body = await req.json()
172
  except:
173
- return JSONResponse({"error": "Invalid JSON"}, 400)
174
-
175
- stream = body.get("stream", False)
176
 
177
- tried = set()
178
 
179
- # -----------------------------------------------------
180
  # NON STREAM
181
- # -----------------------------------------------------
182
- if not stream:
 
 
183
  for _ in range(len(OLLAMA_KEYS)):
184
  key = get_key(tried)
 
185
  if not key:
186
- await asyncio.sleep(1)
187
  continue
188
 
189
  tried.add(key)
@@ -196,41 +178,48 @@ async def chat(req: Request):
196
  headers={"Authorization": f"Bearer {key}"}
197
  )
198
 
199
- if r.status_code == 200:
200
- key_status[key]["success"] += 1
201
- return Response(
202
- content=r.content,
203
- media_type=r.headers.get("content-type")
204
- )
205
 
206
- if r.status_code == 429:
207
- key_status[key]["healthy"] = False
208
 
209
- key_status[key]["failures"] += 1
 
 
 
 
 
 
210
 
211
  except Exception as e:
212
- log(str(e))
213
- key_status[key]["failures"] += 1
214
 
215
  finally:
216
  release_key(key)
217
 
218
- return JSONResponse({"error": "All keys failed"}, 500)
219
 
220
- # -----------------------------------------------------
221
  # STREAM
222
- # -----------------------------------------------------
223
  async def gen():
 
 
224
  for _ in range(len(OLLAMA_KEYS)):
225
- key = get_key()
 
226
  if not key:
227
- await asyncio.sleep(1)
228
  continue
229
 
230
- try:
231
- timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
232
 
233
- async with httpx.AsyncClient(timeout=timeout) as client:
 
234
  async with client.stream(
235
  "POST",
236
  f"{BASE_URL}/v1/chat/completions",
@@ -239,62 +228,84 @@ async def chat(req: Request):
239
  ) as r:
240
 
241
  if r.status_code == 429:
242
- key_status[key]["healthy"] = False
243
  continue
244
 
245
  async for line in r.aiter_lines():
246
  if line:
247
  yield line + "\n\n"
248
 
249
- key_status[key]["success"] += 1
250
  return
251
 
252
  except Exception as e:
253
- log(str(e))
254
- key_status[key]["failures"] += 1
255
 
256
  finally:
257
  release_key(key)
258
 
259
- yield 'data: {"error":"All stream keys failed"}\n\n'
 
260
 
261
  return StreamingResponse(gen(), media_type="text/event-stream")
262
 
263
- # =========================================================
264
- # ANTHROPIC
265
- # =========================================================
 
266
  @app.post("/v1/messages")
267
- async def claude(req: Request):
268
  if not auth_ok(req):
269
- return JSONResponse({"error": "Unauthorized"}, 401)
270
 
271
  try:
272
  body = await req.json()
273
- except:
274
- return JSONResponse({"error": "Invalid JSON"}, 400)
275
 
276
  stream = body.get("stream", False)
277
 
278
- openai_body = {
279
- "model": map_model(body.get("model")),
280
- "messages": body.get("messages", []),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  "stream": stream
282
  }
283
 
284
- # -----------------------------------------------------
285
  # NON STREAM
286
- # -----------------------------------------------------
287
  if not stream:
288
- fake = Request(scope=req.scope)
289
- req._body = json.dumps(openai_body).encode()
290
-
291
- # manual call
292
  tried = set()
293
 
294
  for _ in range(len(OLLAMA_KEYS)):
295
  key = get_key(tried)
 
296
  if not key:
297
- await asyncio.sleep(1)
298
  continue
299
 
300
  tried.add(key)
@@ -303,87 +314,163 @@ async def claude(req: Request):
303
  async with httpx.AsyncClient(timeout=180) as client:
304
  r = await client.post(
305
  f"{BASE_URL}/v1/chat/completions",
306
- json=openai_body,
307
  headers={"Authorization": f"Bearer {key}"}
308
  )
309
 
310
- if r.status_code == 200:
311
- data = r.json()
312
- txt = data["choices"][0]["message"]["content"]
313
-
314
- return {
315
- "id": "msg_" + uuid.uuid4().hex[:10],
316
- "type": "message",
317
- "role": "assistant",
318
- "content": [
319
- {
320
- "type": "text",
321
- "text": txt
322
- }
323
- ],
324
- "model": body.get("model")
 
 
 
 
 
 
 
 
 
 
 
325
  }
 
 
 
 
 
 
 
 
326
 
327
  finally:
328
  release_key(key)
329
 
330
- return JSONResponse({"error": "All keys failed"}, 500)
331
 
332
- # -----------------------------------------------------
333
  # STREAM
334
- # -----------------------------------------------------
335
- async def gen():
 
336
  msg_id = "msg_" + uuid.uuid4().hex[:10]
337
 
338
- yield f'data: {json.dumps({"type":"message_start","message":{"id":msg_id,"type":"message","role":"assistant","model":body.get("model"),"content":[],"stop_reason":None,"stop_sequence":None,"usage":{"input_tokens":0,"output_tokens":0}})}\n\n'
339
- yield f'data: {json.dumps({"type":"content_block_start","index":0,"content_block":{"type":"text"}})}\n\n'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
- tried = set()
 
 
 
 
342
 
343
  for _ in range(len(OLLAMA_KEYS)):
344
  key = get_key(tried)
 
345
  if not key:
346
- await asyncio.sleep(1)
347
  continue
348
 
349
  tried.add(key)
350
 
351
  try:
352
- timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
353
-
354
- async with httpx.AsyncClient(timeout=timeout) as client:
355
  async with client.stream(
356
  "POST",
357
  f"{BASE_URL}/v1/chat/completions",
358
- json=openai_body,
359
  headers={"Authorization": f"Bearer {key}"}
360
  ) as r:
361
 
 
 
 
 
362
  async for line in r.aiter_lines():
363
- if line.startswith("data: "):
364
- raw = line[6:]
365
 
366
- if raw == "[DONE]":
367
- break
368
 
369
- try:
370
- j = json.loads(raw)
371
- delta = j["choices"][0]["delta"]
372
- txt = delta.get("content", "")
373
 
374
- if txt:
375
- yield f'data: {json.dumps({"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":txt}})}\n\n'
 
 
376
 
377
- except:
378
- pass
379
 
 
 
 
 
 
 
 
 
 
 
 
380
  break
381
 
 
 
 
 
382
  finally:
383
  release_key(key)
384
 
385
- yield f'data: {json.dumps({"type":"content_block_stop","index":0})}\n\n'
386
- yield f'data: {json.dumps({"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":None},"usage":{"output_tokens":0}})}\n\n'
387
- yield f'data: {json.dumps({"type":"message_stop"})}\n\n'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
- return StreamingResponse(gen(), media_type="text/event-stream")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import json
3
  import time
 
11
 
12
  app = FastAPI()
13
 
14
+ # =====================================================
15
  # CONFIG
16
+ # =====================================================
17
  BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
18
  MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
19
 
20
+ # =====================================================
21
  # LOAD KEYS
22
+ # =====================================================
23
  OLLAMA_KEYS = []
 
24
  for i in range(1, 101):
25
  k = os.getenv(f"OLLAMA_KEY_{i}")
26
  if k:
 
29
  if not OLLAMA_KEYS:
30
  OLLAMA_KEYS.append("dummy")
31
 
 
 
 
 
 
32
  key_status = {}
33
  for idx, k in enumerate(OLLAMA_KEYS, 1):
34
  key_status[k] = {
35
  "index": idx,
 
 
 
36
  "healthy": True,
37
+ "busy": False,
38
+ "success": 0,
39
+ "fail": 0,
40
  }
41
 
42
+ rr_index = 0
43
+
44
+
45
+ # =====================================================
46
+ # HELPERS
47
+ # =====================================================
48
+ def log(x):
49
+ print(f"[{time.strftime('%H:%M:%S')}] {x}")
50
+
51
+
52
+ def sse(obj):
53
+ return "data: " + json.dumps(obj, ensure_ascii=False) + "\n\n"
54
+
 
55
 
56
  def auth_ok(req: Request):
57
  token = req.headers.get("Authorization", "").replace("Bearer ", "")
58
  return token == MASTER_API_KEY
59
 
 
 
60
 
61
  def get_key(exclude=None):
62
+ global rr_index
63
 
64
  if exclude is None:
65
  exclude = set()
66
 
67
+ for _ in range(len(OLLAMA_KEYS)):
68
+ rr_index = (rr_index + 1) % len(OLLAMA_KEYS)
69
+ k = OLLAMA_KEYS[rr_index]
70
+
71
+ st = key_status[k]
72
 
73
+ if st["healthy"] and not st["busy"] and k not in exclude:
74
+ st["busy"] = True
75
+ return k
 
76
 
77
  return None
78
 
 
 
 
79
 
80
+ def release_key(k):
81
+ if k in key_status:
82
+ key_status[k]["busy"] = False
83
+
84
+
85
+ def mark_fail(k):
86
+ if k in key_status:
87
+ key_status[k]["fail"] += 1
88
+
89
+
90
+ def mark_ok(k):
91
+ if k in key_status:
92
+ key_status[k]["success"] += 1
93
+ key_status[k]["fail"] = 0
94
+
95
+
96
+ # =====================================================
97
+ # ROOT
98
+ # =====================================================
99
  @app.get("/")
100
+ async def root():
101
  return {
102
  "status": "ok",
103
+ "keys": len(OLLAMA_KEYS),
104
+ "detail": key_status
 
 
 
 
 
 
 
 
105
  }
106
 
107
+
108
+ # =====================================================
109
+ # /v1/models
110
+ # =====================================================
111
  @app.get("/v1/models")
112
  async def models(req: Request):
113
  if not auth_ok(req):
114
+ return JSONResponse({"error": "Unauthorized"}, status_code=401)
115
 
116
  key = OLLAMA_KEYS[0]
117
 
 
122
  )
123
 
124
  if r.status_code != 200:
125
+ return JSONResponse({"error": r.text}, status_code=r.status_code)
126
 
127
  data = r.json()
128
 
129
+ out = []
130
  now = int(time.time())
131
 
132
  for m in data.get("models", []):
133
+ out.append({
134
+ "id": m.get("name"),
135
  "object": "model",
136
  "created": now,
137
  "owned_by": "ollama"
138
  })
139
 
140
+ return {"object": "list", "data": out}
141
+
142
 
143
+ # =====================================================
144
+ # OPENAI CHAT
145
+ # =====================================================
146
  @app.post("/v1/chat/completions")
147
  async def chat(req: Request):
148
  if not auth_ok(req):
149
+ return JSONResponse({"error": "Unauthorized"}, status_code=401)
150
 
151
  try:
152
  body = await req.json()
153
  except:
154
+ return JSONResponse({"error": "Bad JSON"}, status_code=400)
 
 
155
 
156
+ is_stream = body.get("stream", False)
157
 
158
+ # -----------------------------------------
159
  # NON STREAM
160
+ # -----------------------------------------
161
+ if not is_stream:
162
+ tried = set()
163
+
164
  for _ in range(len(OLLAMA_KEYS)):
165
  key = get_key(tried)
166
+
167
  if not key:
168
+ await asyncio.sleep(0.3)
169
  continue
170
 
171
  tried.add(key)
 
178
  headers={"Authorization": f"Bearer {key}"}
179
  )
180
 
181
+ txt = r.text.lower()
182
+
183
+ if "weekly usage limit" in txt or r.status_code == 429:
184
+ mark_fail(key)
185
+ continue
 
186
 
187
+ mark_ok(key)
 
188
 
189
+ return Response(
190
+ content=r.content,
191
+ media_type=r.headers.get(
192
+ "content-type",
193
+ "application/json"
194
+ )
195
+ )
196
 
197
  except Exception as e:
198
+ log(e)
199
+ mark_fail(key)
200
 
201
  finally:
202
  release_key(key)
203
 
204
+ return JSONResponse({"error": "All keys failed"}, status_code=500)
205
 
206
+ # -----------------------------------------
207
  # STREAM
208
+ # -----------------------------------------
209
  async def gen():
210
+ tried = set()
211
+
212
  for _ in range(len(OLLAMA_KEYS)):
213
+ key = get_key(tried)
214
+
215
  if not key:
216
+ await asyncio.sleep(0.3)
217
  continue
218
 
219
+ tried.add(key)
 
220
 
221
+ try:
222
+ async with httpx.AsyncClient(timeout=None) as client:
223
  async with client.stream(
224
  "POST",
225
  f"{BASE_URL}/v1/chat/completions",
 
228
  ) as r:
229
 
230
  if r.status_code == 429:
231
+ mark_fail(key)
232
  continue
233
 
234
  async for line in r.aiter_lines():
235
  if line:
236
  yield line + "\n\n"
237
 
238
+ mark_ok(key)
239
  return
240
 
241
  except Exception as e:
242
+ log(e)
243
+ mark_fail(key)
244
 
245
  finally:
246
  release_key(key)
247
 
248
+ yield sse({"error": "All keys failed"})
249
+ yield "data: [DONE]\n\n"
250
 
251
  return StreamingResponse(gen(), media_type="text/event-stream")
252
 
253
+
254
+ # =====================================================
255
+ # ANTHROPIC /v1/messages
256
+ # =====================================================
257
  @app.post("/v1/messages")
258
+ async def anthropic(req: Request):
259
  if not auth_ok(req):
260
+ return JSONResponse({"error": "Unauthorized"}, status_code=401)
261
 
262
  try:
263
  body = await req.json()
264
+ except ClientDisconnect:
265
+ return Response(status_code=499)
266
 
267
  stream = body.get("stream", False)
268
 
269
+ messages = []
270
+
271
+ if body.get("system"):
272
+ messages.append({
273
+ "role": "system",
274
+ "content": body["system"]
275
+ })
276
+
277
+ for m in body.get("messages", []):
278
+ content = m.get("content", "")
279
+
280
+ if isinstance(content, list):
281
+ txt = ""
282
+ for x in content:
283
+ if x.get("type") == "text":
284
+ txt += x.get("text", "")
285
+ content = txt
286
+
287
+ messages.append({
288
+ "role": m["role"],
289
+ "content": content
290
+ })
291
+
292
+ proxy_body = {
293
+ "model": "minimax-m2.7:cloud",
294
+ "messages": messages,
295
  "stream": stream
296
  }
297
 
298
+ # -----------------------------------------
299
  # NON STREAM
300
+ # -----------------------------------------
301
  if not stream:
 
 
 
 
302
  tried = set()
303
 
304
  for _ in range(len(OLLAMA_KEYS)):
305
  key = get_key(tried)
306
+
307
  if not key:
308
+ await asyncio.sleep(0.3)
309
  continue
310
 
311
  tried.add(key)
 
314
  async with httpx.AsyncClient(timeout=180) as client:
315
  r = await client.post(
316
  f"{BASE_URL}/v1/chat/completions",
317
+ json=proxy_body,
318
  headers={"Authorization": f"Bearer {key}"}
319
  )
320
 
321
+ txt = r.text.lower()
322
+
323
+ if "weekly usage limit" in txt or r.status_code == 429:
324
+ mark_fail(key)
325
+ continue
326
+
327
+ data = r.json()
328
+
329
+ ans = data["choices"][0]["message"]["content"]
330
+
331
+ out = {
332
+ "id": "msg_" + uuid.uuid4().hex[:10],
333
+ "type": "message",
334
+ "role": "assistant",
335
+ "model": body.get("model", "claude-opus-4-7"),
336
+ "content": [
337
+ {
338
+ "type": "text",
339
+ "text": ans
340
+ }
341
+ ],
342
+ "stop_reason": "end_turn",
343
+ "stop_sequence": None,
344
+ "usage": {
345
+ "input_tokens": 0,
346
+ "output_tokens": 0
347
  }
348
+ }
349
+
350
+ mark_ok(key)
351
+ return JSONResponse(out)
352
+
353
+ except Exception as e:
354
+ log(e)
355
+ mark_fail(key)
356
 
357
  finally:
358
  release_key(key)
359
 
360
+ return JSONResponse({"error": "All keys failed"}, status_code=500)
361
 
362
+ # -----------------------------------------
363
  # STREAM
364
+ # -----------------------------------------
365
+ async def agen():
366
+ tried = set()
367
  msg_id = "msg_" + uuid.uuid4().hex[:10]
368
 
369
+ start_payload = {
370
+ "type": "message_start",
371
+ "message": {
372
+ "id": msg_id,
373
+ "type": "message",
374
+ "role": "assistant",
375
+ "model": body.get("model", "claude-opus-4-7"),
376
+ "content": [],
377
+ "stop_reason": None,
378
+ "stop_sequence": None,
379
+ "usage": {
380
+ "input_tokens": 0,
381
+ "output_tokens": 0
382
+ }
383
+ }
384
+ }
385
+
386
+ yield sse(start_payload)
387
 
388
+ yield sse({
389
+ "type": "content_block_start",
390
+ "index": 0,
391
+ "content_block": {"type": "text"}
392
+ })
393
 
394
  for _ in range(len(OLLAMA_KEYS)):
395
  key = get_key(tried)
396
+
397
  if not key:
398
+ await asyncio.sleep(0.3)
399
  continue
400
 
401
  tried.add(key)
402
 
403
  try:
404
+ async with httpx.AsyncClient(timeout=None) as client:
 
 
405
  async with client.stream(
406
  "POST",
407
  f"{BASE_URL}/v1/chat/completions",
408
+ json=proxy_body,
409
  headers={"Authorization": f"Bearer {key}"}
410
  ) as r:
411
 
412
+ if r.status_code == 429:
413
+ mark_fail(key)
414
+ continue
415
+
416
  async for line in r.aiter_lines():
417
+ if not line.startswith("data: "):
418
+ continue
419
 
420
+ raw = line[6:].strip()
 
421
 
422
+ if raw == "[DONE]":
423
+ break
 
 
424
 
425
+ try:
426
+ j = json.loads(raw)
427
+ except:
428
+ continue
429
 
430
+ delta = j["choices"][0]["delta"]
431
+ txt = delta.get("content", "")
432
 
433
+ if txt:
434
+ yield sse({
435
+ "type": "content_block_delta",
436
+ "index": 0,
437
+ "delta": {
438
+ "type": "text_delta",
439
+ "text": txt
440
+ }
441
+ })
442
+
443
+ mark_ok(key)
444
  break
445
 
446
+ except Exception as e:
447
+ log(e)
448
+ mark_fail(key)
449
+
450
  finally:
451
  release_key(key)
452
 
453
+ yield sse({
454
+ "type": "content_block_stop",
455
+ "index": 0
456
+ })
457
+
458
+ yield sse({
459
+ "type": "message_delta",
460
+ "delta": {
461
+ "stop_reason": "end_turn",
462
+ "stop_sequence": None
463
+ },
464
+ "usage": {
465
+ "output_tokens": 0
466
+ }
467
+ })
468
+
469
+ yield sse({
470
+ "type": "message_stop"
471
+ })
472
 
473
+ return StreamingResponse(
474
+ agen(),
475
+ media_type="text/event-stream"
476
+ )