Spaces:
Sleeping
Sleeping
| """ | |
| proxy_cerebras.py β Proxy ke Ollama backend dengan Anthropic + OpenAI compatible API | |
| FIXED: | |
| - Tool calling support penuh (Anthropic <-> OpenAI) | |
| - Non-stream tidak crash saat finish_reason=tool_calls | |
| - Stream handle delta.tool_calls | |
| - Model tidak hardcoded, diteruskan dari request | |
| - Infinite loop dengan smart cooldown (tidak hammering) | |
| """ | |
| import os | |
| import json | |
| import time | |
| import uuid | |
| import asyncio | |
| import httpx | |
| from fastapi import FastAPI, Request | |
| from fastapi.responses import JSONResponse, Response, StreamingResponse | |
| from starlette.requests import ClientDisconnect | |
| app = FastAPI() | |
| # ===================================================== | |
| # CONFIG | |
| # ===================================================== | |
| BASE_URL = os.getenv("BASE_URL", "https://ollama.com") | |
| MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla") | |
| DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "minimax-m2.7:cloud") | |
| RATE_LIMIT_COOLDOWN = int(os.getenv("RATE_LIMIT_COOLDOWN", "62")) | |
| # Model mapping Claude/GPT β Ollama model | |
| MODEL_MAP = { | |
| "claude-opus-4-7": DEFAULT_MODEL, | |
| "claude-opus-4-6": DEFAULT_MODEL, | |
| "claude-opus-4-5": DEFAULT_MODEL, | |
| "claude-opus-4-1": DEFAULT_MODEL, | |
| "claude-opus-4-20250514": DEFAULT_MODEL, | |
| "claude-sonnet-4-6": DEFAULT_MODEL, | |
| "claude-sonnet-4-5": DEFAULT_MODEL, | |
| "claude-sonnet-4-20250514": DEFAULT_MODEL, | |
| "claude-haiku-4-5": DEFAULT_MODEL, | |
| "claude-haiku-4-5-20251001": DEFAULT_MODEL, | |
| "gpt-4": DEFAULT_MODEL, | |
| "gpt-4o": DEFAULT_MODEL, | |
| "gpt-4o-mini": DEFAULT_MODEL, | |
| "gpt-4-turbo": DEFAULT_MODEL, | |
| "gpt-3.5-turbo": DEFAULT_MODEL, | |
| } | |
| def map_model(name: str) -> str: | |
| return MODEL_MAP.get(name, name) | |
| # ===================================================== | |
| # LOAD KEYS | |
| # ===================================================== | |
| OLLAMA_KEYS = [] | |
| for i in range(1, 101): | |
| k = os.getenv(f"OLLAMA_KEY_{i}") | |
| if k: | |
| OLLAMA_KEYS.append(k) | |
| if not OLLAMA_KEYS: | |
| OLLAMA_KEYS.append("dummy") | |
| key_status = {} | |
| for idx, k in enumerate(OLLAMA_KEYS, 1): | |
| key_status[k] = { | |
| "index": idx, | |
| "prefix": k[:6] + "..." if len(k) > 6 else k, | |
| "busy": False, | |
| "success": 0, | |
| "fail": 0, | |
| "rate_limited_until": 0.0, | |
| } | |
| rr_index = 0 | |
| _lock = asyncio.Lock() | |
| # ===================================================== | |
| # HELPERS | |
| # ===================================================== | |
| def log(x): | |
| print(f"[{time.strftime('%H:%M:%S')}] {x}", flush=True) | |
| def sse(obj): | |
| return "data: " + json.dumps(obj, ensure_ascii=False) + "\n\n" | |
| def auth_ok(req: Request): | |
| return req.headers.get("Authorization", "").replace("Bearer ", "") == MASTER_API_KEY | |
| def is_rate_limited(status: int, body_text: str = "") -> bool: | |
| if status == 429: | |
| return True | |
| t = body_text.lower() | |
| return "weekly usage limit" in t or "rate limit" in t or "too many requests" in t | |
| # ===================================================== | |
| # KEY MANAGEMENT | |
| # ===================================================== | |
| def _pick_key(exclude: set): | |
| """Sync, dipanggil dalam _lock. Pilih key yang ready (tidak busy & tidak cooldown).""" | |
| global rr_index | |
| now = time.time() | |
| for _ in range(len(OLLAMA_KEYS)): | |
| rr_index = (rr_index + 1) % len(OLLAMA_KEYS) | |
| k = OLLAMA_KEYS[rr_index] | |
| st = key_status[k] | |
| if not st["busy"] and now >= st["rate_limited_until"] and k not in exclude: | |
| st["busy"] = True | |
| return k | |
| return None | |
| def _next_ready_time() -> float: | |
| """Epoch time kapan key pertama keluar dari cooldown.""" | |
| now = time.time() | |
| times = [st["rate_limited_until"] for st in key_status.values() if st["rate_limited_until"] > now] | |
| return min(times) if times else now | |
| async def get_key(exclude=None): | |
| async with _lock: | |
| return _pick_key(exclude or set()) | |
| async def release_key(k): | |
| async with _lock: | |
| if k in key_status: | |
| key_status[k]["busy"] = False | |
| async def mark_rate_limited(k): | |
| async with _lock: | |
| if k in key_status: | |
| until = time.time() + RATE_LIMIT_COOLDOWN | |
| key_status[k]["rate_limited_until"] = until | |
| key_status[k]["fail"] += 1 | |
| idx = key_status[k]["index"] | |
| log(f"β³ key#{idx} cooldown {RATE_LIMIT_COOLDOWN}s (ready {time.strftime('%H:%M:%S', time.localtime(until))})") | |
| async def mark_fail(k): | |
| async with _lock: | |
| if k in key_status: | |
| key_status[k]["fail"] += 1 | |
| async def mark_ok(k): | |
| async with _lock: | |
| if k in key_status: | |
| key_status[k]["success"] += 1 | |
| key_status[k]["fail"] = 0 | |
| key_status[k]["rate_limited_until"] = 0.0 | |
| async def get_key_infinite(exclude=None): | |
| """ | |
| Tunggu key tanpa batas. Kalau semua cooldown, sleep TEPAT sampai key paling cepat ready. | |
| Return: (key, exclude_set) | |
| """ | |
| local_exclude = set(exclude) if exclude else set() | |
| cycle = 0 | |
| while True: | |
| async with _lock: | |
| k = _pick_key(local_exclude) | |
| if k: | |
| return k, local_exclude | |
| now = time.time() | |
| next_ready = _next_ready_time() | |
| wait_sec = max(0.5, next_ready - now) | |
| all_cooldown = all( | |
| st["rate_limited_until"] > now or st["busy"] | |
| for st in key_status.values() | |
| ) | |
| if all_cooldown: | |
| cycle += 1 | |
| log(f"β³ Semua key cooldown. Tunggu {wait_sec:.1f}s... (cycle #{cycle})") | |
| local_exclude.clear() | |
| await asyncio.sleep(wait_sec) | |
| else: | |
| await asyncio.sleep(0.3) | |
| # ===================================================== | |
| # TOOL CONVERSION: Anthropic β OpenAI | |
| # ===================================================== | |
| def anthropic_tools_to_openai(tools: list) -> list: | |
| """ | |
| Anthropic: {"name", "description", "input_schema"} | |
| OpenAI: {"type": "function", "function": {"name", "description", "parameters"}} | |
| """ | |
| return [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": t.get("name", ""), | |
| "description": t.get("description", ""), | |
| "parameters": t.get("input_schema", {"type": "object", "properties": {}}), | |
| } | |
| } | |
| for t in tools | |
| ] | |
| def anthropic_tool_choice_to_openai(tc): | |
| if tc is None: | |
| return None | |
| if isinstance(tc, str): | |
| return {"auto": "auto", "any": "required", "none": "none"}.get(tc, "auto") | |
| if isinstance(tc, dict): | |
| t = tc.get("type", "") | |
| if t == "tool": | |
| return {"type": "function", "function": {"name": tc.get("name", "")}} | |
| return {"auto": "auto", "any": "required", "none": "none"}.get(t, "auto") | |
| return "auto" | |
| def convert_anthropic_messages(messages: list) -> list: | |
| """ | |
| Convert Anthropic message list β OpenAI message list. | |
| Handles: text, tool_use (assistant), tool_result (user). | |
| """ | |
| out = [] | |
| for m in messages: | |
| role = m.get("role", "user") | |
| content = m.get("content", "") | |
| # String content β langsung | |
| if isinstance(content, str): | |
| out.append({"role": role, "content": content}) | |
| continue | |
| if not isinstance(content, list): | |
| out.append({"role": role, "content": str(content)}) | |
| continue | |
| tool_use_blocks = [b for b in content if b.get("type") == "tool_use"] | |
| tool_result_blocks = [b for b in content if b.get("type") == "tool_result"] | |
| text_blocks = [b for b in content if b.get("type") == "text"] | |
| # Assistant dengan tool_use β OpenAI assistant + tool_calls | |
| if tool_use_blocks and role == "assistant": | |
| text_content = "".join(b.get("text", "") for b in text_blocks) or None | |
| tool_calls = [ | |
| { | |
| "id": b.get("id", "call_" + uuid.uuid4().hex[:8]), | |
| "type": "function", | |
| "function": { | |
| "name": b.get("name", ""), | |
| "arguments": json.dumps(b.get("input", {})) | |
| } | |
| } | |
| for b in tool_use_blocks | |
| ] | |
| out.append({"role": "assistant", "content": text_content, "tool_calls": tool_calls}) | |
| continue | |
| # User dengan tool_result β OpenAI role=tool messages | |
| if tool_result_blocks and role == "user": | |
| for b in tool_result_blocks: | |
| rc = b.get("content", "") | |
| if isinstance(rc, list): | |
| rc = "".join(x.get("text", "") if isinstance(x, dict) else str(x) for x in rc) | |
| out.append({ | |
| "role": "tool", | |
| "tool_call_id": b.get("tool_use_id", ""), | |
| "content": str(rc), | |
| }) | |
| if text_blocks: | |
| txt = "".join(b.get("text", "") for b in text_blocks) | |
| if txt: | |
| out.append({"role": "user", "content": txt}) | |
| continue | |
| # Default: gabung semua text | |
| out.append({"role": role, "content": "".join(b.get("text", "") for b in text_blocks)}) | |
| return out | |
| def openai_to_anthropic_response(data: dict, original_model: str) -> dict: | |
| """ | |
| Convert OpenAI non-stream response β Anthropic format. | |
| Handle text + tool_calls. | |
| """ | |
| choice = data["choices"][0] | |
| message = choice.get("message", {}) | |
| finish_reason = choice.get("finish_reason", "stop") | |
| usage = data.get("usage", {}) | |
| stop_map = {"stop": "end_turn", "length": "max_tokens", "eos": "end_turn", "tool_calls": "tool_use"} | |
| stop_reason = stop_map.get(finish_reason, "end_turn") | |
| content_blocks = [] | |
| text = message.get("content") or "" | |
| if text: | |
| content_blocks.append({"type": "text", "text": text}) | |
| for tc in (message.get("tool_calls") or []): | |
| fn = tc.get("function", {}) | |
| try: | |
| inp = json.loads(fn.get("arguments", "{}")) | |
| except Exception: | |
| inp = {"_raw": fn.get("arguments", "")} | |
| content_blocks.append({ | |
| "type": "tool_use", | |
| "id": tc.get("id", "toolu_" + uuid.uuid4().hex[:10]), | |
| "name": fn.get("name", ""), | |
| "input": inp, | |
| }) | |
| return { | |
| "id": "msg_" + uuid.uuid4().hex[:10], | |
| "type": "message", | |
| "role": "assistant", | |
| "model": original_model, | |
| "content": content_blocks, | |
| "stop_reason": stop_reason, | |
| "stop_sequence": None, | |
| "usage": { | |
| "input_tokens": usage.get("prompt_tokens", 0), | |
| "output_tokens": usage.get("completion_tokens", 0), | |
| } | |
| } | |
| # ===================================================== | |
| # ROOT | |
| # ===================================================== | |
| async def root(): | |
| now = time.time() | |
| async with _lock: | |
| safe = {} | |
| for k, v in key_status.items(): | |
| cd = max(0, v["rate_limited_until"] - now) | |
| safe[v["prefix"]] = { | |
| "index": v["index"], | |
| "status": "BUSY" if v["busy"] else ("COOLDOWN" if cd > 0 else "IDLE"), | |
| "cooldown_sec": round(cd, 1) if cd > 0 else 0, | |
| "success": v["success"], | |
| "fail": v["fail"], | |
| } | |
| return {"status": "ok", "base_url": BASE_URL, "default_model": DEFAULT_MODEL, "keys": safe} | |
| # ===================================================== | |
| # /v1/models | |
| # ===================================================== | |
| async def models(req: Request): | |
| if not auth_ok(req): | |
| return JSONResponse({"error": "Unauthorized"}, status_code=401) | |
| key = OLLAMA_KEYS[0] | |
| try: | |
| async with httpx.AsyncClient(timeout=60) as client: | |
| r = await client.get(f"{BASE_URL}/api/tags", headers={"Authorization": f"Bearer {key}"}) | |
| if r.status_code == 200: | |
| data = r.json() | |
| now = int(time.time()) | |
| out = [{"id": m.get("name"), "object": "model", "created": now, "owned_by": "ollama"} | |
| for m in data.get("models", [])] | |
| return {"object": "list", "data": out} | |
| except Exception as e: | |
| log(f"[/v1/models] {e}") | |
| return JSONResponse({"error": "Failed to fetch models"}, status_code=500) | |
| # ===================================================== | |
| # /v1/chat/completions (OpenAI-compatible, pipe through) | |
| # ===================================================== | |
| async def chat(req: Request): | |
| if not auth_ok(req): | |
| return JSONResponse({"error": "Unauthorized"}, status_code=401) | |
| try: | |
| body = await req.json() | |
| except Exception: | |
| return JSONResponse({"error": "Bad JSON"}, status_code=400) | |
| is_stream = body.get("stream", False) | |
| # ----------------------------------------- | |
| # NON STREAM | |
| # ----------------------------------------- | |
| if not is_stream: | |
| tried = set() | |
| for _ in range(len(OLLAMA_KEYS)): | |
| key = await get_key(tried) | |
| if not key: | |
| await asyncio.sleep(0.5) | |
| continue | |
| tried.add(key) | |
| try: | |
| async with httpx.AsyncClient(timeout=180) as client: | |
| r = await client.post( | |
| f"{BASE_URL}/v1/chat/completions", | |
| json=body, | |
| headers={"Authorization": f"Bearer {key}"} | |
| ) | |
| if is_rate_limited(r.status_code, r.text if r.status_code != 200 else ""): | |
| await mark_rate_limited(key) | |
| continue | |
| await mark_ok(key) | |
| return Response(content=r.content, media_type=r.headers.get("content-type", "application/json")) | |
| except Exception as e: | |
| log(e) | |
| await mark_fail(key) | |
| finally: | |
| await release_key(key) | |
| return JSONResponse({"error": "All keys failed"}, status_code=500) | |
| # ----------------------------------------- | |
| # STREAM β infinite loop | |
| # ----------------------------------------- | |
| async def gen(): | |
| exclude = set() | |
| while True: | |
| key, exclude = await get_key_infinite(exclude) | |
| try: | |
| async with httpx.AsyncClient(timeout=None) as client: | |
| async with client.stream( | |
| "POST", f"{BASE_URL}/v1/chat/completions", | |
| json=body, headers={"Authorization": f"Bearer {key}"} | |
| ) as r: | |
| if is_rate_limited(r.status_code): | |
| await mark_rate_limited(key) | |
| continue | |
| hit_limit = False | |
| async for line in r.aiter_lines(): | |
| if not line: | |
| continue | |
| if line.strip() == "data: [DONE]": | |
| break | |
| raw = line[6:] if line.startswith("data: ") else line | |
| try: | |
| j = json.loads(raw) | |
| if "error" in j and "choices" not in j and is_rate_limited(0, json.dumps(j)): | |
| hit_limit = True | |
| break | |
| except Exception: | |
| pass | |
| yield line + "\n\n" | |
| if hit_limit: | |
| await mark_rate_limited(key) | |
| continue | |
| yield "data: [DONE]\n\n" | |
| await mark_ok(key) | |
| return | |
| except Exception as e: | |
| log(e) | |
| await mark_fail(key) | |
| finally: | |
| await release_key(key) | |
| return StreamingResponse(gen(), media_type="text/event-stream") | |
| # ===================================================== | |
| # /v1/messages (Anthropic-compatible) | |
| # FIXED: Full tool calling support | |
| # ===================================================== | |
| async def anthropic(req: Request): | |
| if not auth_ok(req): | |
| return JSONResponse({"error": "Unauthorized"}, status_code=401) | |
| try: | |
| body = await req.json() | |
| except ClientDisconnect: | |
| return Response(status_code=499) | |
| except Exception: | |
| return JSONResponse({"error": "Bad JSON"}, status_code=400) | |
| stream = body.get("stream", False) | |
| original_model = body.get("model", "claude-opus-4-7") | |
| ollama_model = map_model(original_model) | |
| # Build OpenAI-format messages | |
| messages = [] | |
| if body.get("system"): | |
| sys = body["system"] | |
| if isinstance(sys, list): | |
| sys = "".join(x.get("text", "") for x in sys if x.get("type") == "text") | |
| messages.append({"role": "system", "content": sys}) | |
| # FIXED: Konversi penuh termasuk tool_use dan tool_result | |
| messages.extend(convert_anthropic_messages(body.get("messages", []))) | |
| proxy_body = { | |
| "model": ollama_model, | |
| "messages": messages, | |
| "stream": stream, | |
| } | |
| if "max_tokens" in body: | |
| proxy_body["max_tokens"] = body["max_tokens"] | |
| if "temperature" in body: | |
| proxy_body["temperature"] = body["temperature"] | |
| if "top_p" in body: | |
| proxy_body["top_p"] = body["top_p"] | |
| # FIXED: Forward tools β OpenAI format | |
| if body.get("tools"): | |
| proxy_body["tools"] = anthropic_tools_to_openai(body["tools"]) | |
| if body.get("tool_choice"): | |
| proxy_body["tool_choice"] = anthropic_tool_choice_to_openai(body["tool_choice"]) | |
| # ----------------------------------------- | |
| # NON STREAM | |
| # ----------------------------------------- | |
| if not stream: | |
| tried = set() | |
| for _ in range(len(OLLAMA_KEYS)): | |
| key = await get_key(tried) | |
| if not key: | |
| await asyncio.sleep(0.5) | |
| continue | |
| tried.add(key) | |
| try: | |
| async with httpx.AsyncClient(timeout=180) as client: | |
| r = await client.post( | |
| f"{BASE_URL}/v1/chat/completions", | |
| json=proxy_body, | |
| headers={"Authorization": f"Bearer {key}"} | |
| ) | |
| if is_rate_limited(r.status_code, r.text if r.status_code != 200 else ""): | |
| await mark_rate_limited(key) | |
| continue | |
| if r.status_code != 200: | |
| log(f"HTTP {r.status_code}: {r.text[:200]}") | |
| await mark_fail(key) | |
| continue | |
| data = r.json() | |
| # FIXED: convert OpenAI response β Anthropic (handles tool_calls too) | |
| out = openai_to_anthropic_response(data, original_model) | |
| await mark_ok(key) | |
| return JSONResponse(out) | |
| except Exception as e: | |
| log(e) | |
| await mark_fail(key) | |
| finally: | |
| await release_key(key) | |
| return JSONResponse({"error": "All keys failed"}, status_code=500) | |
| # ----------------------------------------- | |
| # STREAM β infinite loop, Anthropic SSE format | |
| # FIXED: Handle tool_calls delta dari streaming | |
| # ----------------------------------------- | |
| async def agen(): | |
| exclude = set() | |
| msg_id = "msg_" + uuid.uuid4().hex[:10] | |
| sent_header = False | |
| while True: | |
| key, exclude = await get_key_infinite(exclude) | |
| try: | |
| async with httpx.AsyncClient(timeout=None) as client: | |
| async with client.stream( | |
| "POST", f"{BASE_URL}/v1/chat/completions", | |
| json=proxy_body, headers={"Authorization": f"Bearer {key}"} | |
| ) as r: | |
| if is_rate_limited(r.status_code): | |
| await mark_rate_limited(key) | |
| continue | |
| if r.status_code != 200: | |
| log(f"STREAM HTTP {r.status_code}") | |
| await mark_fail(key) | |
| continue | |
| # Kirim Anthropic header sekali | |
| if not sent_header: | |
| sent_header = True | |
| yield sse({ | |
| "type": "message_start", | |
| "message": { | |
| "id": msg_id, "type": "message", "role": "assistant", | |
| "model": original_model, "content": [], | |
| "stop_reason": None, "stop_sequence": None, | |
| "usage": {"input_tokens": 0, "output_tokens": 0} | |
| } | |
| }) | |
| yield sse({ | |
| "type": "content_block_start", "index": 0, | |
| "content_block": {"type": "text", "text": ""} | |
| }) | |
| hit_limit = False | |
| finish_reason = None | |
| # track tool call blocks: openai index β {block_index, id, name} | |
| tool_blocks = {} # tc_idx β anthropic block_index | |
| next_block = 1 # 0 = text block | |
| async for line in r.aiter_lines(): | |
| if not line: | |
| continue | |
| if line.strip() == "data: [DONE]": | |
| break | |
| raw = line[6:] if line.startswith("data: ") else line | |
| try: | |
| j = json.loads(raw) | |
| except Exception: | |
| continue | |
| # Cek error API (bukan model output) | |
| if "error" in j and "choices" not in j: | |
| if is_rate_limited(0, json.dumps(j)): | |
| hit_limit = True | |
| break | |
| choices = j.get("choices", []) | |
| if not choices: | |
| continue | |
| choice = choices[0] | |
| delta = choice.get("delta", {}) | |
| finish_reason = choice.get("finish_reason") or finish_reason | |
| # ββ TEXT content ββ | |
| txt = delta.get("content") or "" | |
| if txt: | |
| yield sse({ | |
| "type": "content_block_delta", "index": 0, | |
| "delta": {"type": "text_delta", "text": txt} | |
| }) | |
| # ββ TOOL CALLS ββ FIXED | |
| for tc in (delta.get("tool_calls") or []): | |
| tc_idx = tc.get("index", 0) | |
| # Tool call baru β buka block | |
| if tc.get("id") or tc.get("function", {}).get("name"): | |
| if tc_idx not in tool_blocks: | |
| block_idx = next_block | |
| next_block += 1 | |
| tool_blocks[tc_idx] = block_idx | |
| yield sse({ | |
| "type": "content_block_start", | |
| "index": block_idx, | |
| "content_block": { | |
| "type": "tool_use", | |
| "id": tc.get("id", "toolu_" + uuid.uuid4().hex[:10]), | |
| "name": tc.get("function", {}).get("name", ""), | |
| "input": {} | |
| } | |
| }) | |
| # Stream argument chunks | |
| args_chunk = tc.get("function", {}).get("arguments", "") | |
| if args_chunk and tc_idx in tool_blocks: | |
| yield sse({ | |
| "type": "content_block_delta", | |
| "index": tool_blocks[tc_idx], | |
| "delta": {"type": "input_json_delta", "partial_json": args_chunk} | |
| }) | |
| if hit_limit: | |
| await mark_rate_limited(key) | |
| continue | |
| # Tutup semua blocks | |
| yield sse({"type": "content_block_stop", "index": 0}) | |
| for block_idx in tool_blocks.values(): | |
| yield sse({"type": "content_block_stop", "index": block_idx}) | |
| # Stop reason | |
| if finish_reason == "tool_calls" or tool_blocks: | |
| stop_reason = "tool_use" | |
| elif finish_reason == "length": | |
| stop_reason = "max_tokens" | |
| else: | |
| stop_reason = "end_turn" | |
| yield sse({ | |
| "type": "message_delta", | |
| "delta": {"stop_reason": stop_reason, "stop_sequence": None}, | |
| "usage": {"output_tokens": 0} | |
| }) | |
| yield sse({"type": "message_stop"}) | |
| await mark_ok(key) | |
| return # sukses | |
| except Exception as e: | |
| log(e) | |
| await mark_fail(key) | |
| finally: | |
| await release_key(key) | |
| # Fallback kalau belum kirim header | |
| if not sent_header: | |
| yield sse({ | |
| "type": "message_start", | |
| "message": { | |
| "id": msg_id, "type": "message", "role": "assistant", | |
| "model": original_model, "content": [], "stop_reason": None, | |
| "stop_sequence": None, "usage": {"input_tokens": 0, "output_tokens": 0} | |
| } | |
| }) | |
| yield sse({"type": "content_block_start", "index": 0, "content_block": {"type": "text", "text": ""}}) | |
| yield sse({"type": "content_block_stop", "index": 0}) | |
| yield sse({"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence": None}, "usage": {"output_tokens": 0}}) | |
| yield sse({"type": "message_stop"}) | |
| return StreamingResponse(agen(), media_type="text/event-stream") |