proxycf

Sleeping

App Files Files Community

Elysiadev11 commited on 15 days ago

Commit

f6549ae

verified ·

1 Parent(s): 8df83b1

Update proxy_cerebras.py

Browse files

Files changed (1) hide show

proxy_cerebras.py +217 -280

proxy_cerebras.py CHANGED Viewed

@@ -1,9 +1,18 @@
 # app.py
-# ==========================================================
-# FULL FIXED VERSION
-# OpenAI + Anthropic Proxy
-# HuggingFace Spaces Ready
-# ==========================================================
 import os
 import json
@@ -18,28 +27,15 @@ from starlette.requests import ClientDisconnect
 app = FastAPI()
-# ==========================================================
 # CONFIG
-# ==========================================================
-BASE_URL = os.getenv("BASE_URL", "https://elysiadev11-proxyollma.hf.space")
 MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
-# ==========================================================
-# MODEL MAP
-# ==========================================================
-MODEL_MAP = {
-    "claude-opus-4-7": "minimax-m2.7:cloud",
-    "claude-opus-4-6": "minimax-m2.7:cloud",
-    "claude-sonnet-4-6": "minimax-m2.7:cloud",
-    "claude-haiku-4-5": "minimax-m2.7:cloud",
-}
-def map_model(name):
-    return MODEL_MAP.get(name, name)
-# ==========================================================
 # LOAD KEYS
-# ==========================================================
 OLLAMA_KEYS = []
 for i in range(1, 101):
@@ -50,111 +46,148 @@ for i in range(1, 101):
 if not OLLAMA_KEYS:
     OLLAMA_KEYS.append("dummy")
-# ==========================================================
-# KEY MANAGER
-# ==========================================================
-last_index = 0
 key_status = {}
-for i, k in enumerate(OLLAMA_KEYS, 1):
     key_status[k] = {
-        "busy": False,
-        "ok": True,
-        "index": i
     }
-def get_key():
-    global last_index
-    total = len(OLLAMA_KEYS)
-    for x in range(total):
-        idx = (last_index + x) % total
         key = OLLAMA_KEYS[idx]
-        if not key_status[key]["busy"]:
-            key_status[key]["busy"] = True
-            last_index = idx + 1
             return key
     return None
 def release_key(key):
     if key in key_status:
-        key_status[key]["busy"] = False
-# ==========================================================
-# AUTH
-# ==========================================================
-def authorized(req: Request):
-    token = req.headers.get("Authorization", "")
-    token = token.replace("Bearer ", "")
-    return token == MASTER_API_KEY
-# ==========================================================
-# ROOT
-# ==========================================================
 @app.get("/")
 def root():
     return {
         "status": "ok",
-        "keys_loaded": len(OLLAMA_KEYS),
-        "base_url": BASE_URL
     }
-# ==========================================================
 # MODELS
-# ==========================================================
 @app.get("/v1/models")
 async def models(req: Request):
-    if not authorized(req):
-        return JSONResponse({"error": "Unauthorized"}, status_code=401)
     key = OLLAMA_KEYS[0]
     async with httpx.AsyncClient(timeout=60) as client:
         r = await client.get(
-            f"{BASE_URL}/v1/models",
             headers={"Authorization": f"Bearer {key}"}
         )
-    return Response(
-        content=r.content,
-        media_type="application/json"
-    )
-# ==========================================================
-# OPENAI CHAT
-# ==========================================================
-@app.post("/v1/chat/completions")
-async def openai_chat(req: Request):
-    if not authorized(req):
-        return JSONResponse({"error": "Unauthorized"}, status_code=401)
     try:
         body = await req.json()
     except:
-        return JSONResponse({"error": "Invalid JSON"}, status_code=400)
-    body["model"] = map_model(body.get("model", ""))
-    is_stream = body.get("stream", False)
-    # ------------------------------------------------------
     # NON STREAM
-    # ------------------------------------------------------
-    if not is_stream:
         for _ in range(len(OLLAMA_KEYS)):
-            key = get_key()
             if not key:
-                await asyncio.sleep(0.2)
                 continue
             try:
                 async with httpx.AsyncClient(timeout=180) as client:
                     r = await client.post(
@@ -163,37 +196,41 @@ async def openai_chat(req: Request):
                         headers={"Authorization": f"Bearer {key}"}
                     )
-                return Response(
-                    content=r.content,
-                    media_type=r.headers.get("content-type", "application/json")
-                )
-            except:
-                pass
             finally:
                 release_key(key)
-        return JSONResponse({"error": "All keys failed"}, status_code=500)
-    # ------------------------------------------------------
     # STREAM
-    # ------------------------------------------------------
     async def gen():
         for _ in range(len(OLLAMA_KEYS)):
             key = get_key()
             if not key:
-                await asyncio.sleep(0.2)
                 continue
             try:
                 timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
                 async with httpx.AsyncClient(timeout=timeout) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
@@ -201,252 +238,152 @@ async def openai_chat(req: Request):
                         headers={"Authorization": f"Bearer {key}"}
                     ) as r:
-                        async for chunk in r.aiter_bytes():
-                            yield chunk
                         return
-            except:
-                pass
             finally:
                 release_key(key)
-        yield b'data: {"error":"failed"}\n\n'
     return StreamingResponse(gen(), media_type="text/event-stream")
-# ==========================================================
-# ANTHROPIC RESPONSE CONVERTER
-# ==========================================================
-def to_anthropic(data, model_name):
-    text = ""
-    try:
-        if "choices" in data:
-            text = data["choices"][0]["message"]["content"]
-        elif "message" in data:
-            text = data["message"]["content"]
-    except:
-        pass
-    return {
-        "id": f"msg_{uuid.uuid4().hex[:10]}",
-        "type": "message",
-        "role": "assistant",
-        "content": [
-            {
-                "type": "text",
-                "text": text
-            }
-        ],
-        "model": model_name,
-        "stop_reason": "end_turn",
-        "stop_sequence": None,
-        "usage": {
-            "input_tokens": 0,
-            "output_tokens": 0
-        }
-    }
-# ==========================================================
-# ANTHROPIC STREAM CONVERTER
-# ==========================================================
-async def anthropic_stream(lines, model):
-    msg_id = f"msg_{uuid.uuid4().hex[:10]}"
-    start_payload = {
-        "type": "message_start",
-        "message": {
-            "id": msg_id,
-            "type": "message",
-            "role": "assistant",
-            "model": model,
-            "content": [],
-            "stop_reason": None,
-            "stop_sequence": None,
-            "usage": {
-                "input_tokens": 0,
-                "output_tokens": 0
-            }
-        }
-    }
-    yield "data: " + json.dumps(start_payload) + "\n\n"
-    yield "data: " + json.dumps({
-        "type": "content_block_start",
-        "index": 0,
-        "content_block": {
-            "type": "text"
-        }
-    }) + "\n\n"
-    async for line in lines:
-        if not line:
-            continue
-        if not line.startswith("data: "):
-            continue
-        raw = line[6:].strip()
-        if raw == "[DONE]":
-            break
-        try:
-            data = json.loads(raw)
-        except:
-            continue
-        text = ""
-        try:
-            if "choices" in data:
-                delta = data["choices"][0]["delta"]
-                text = delta.get("content", "")
-                if not text:
-                    text = delta.get("reasoning", "")
-            elif "message" in data:
-                text = data["message"].get("content", "")
-        except:
-            pass
-        if text:
-            payload = {
-                "type": "content_block_delta",
-                "index": 0,
-                "delta": {
-                    "type": "text_delta",
-                    "text": text
-                }
-            }
-            yield "data: " + json.dumps(payload) + "\n\n"
-    yield "data: " + json.dumps({
-        "type": "content_block_stop",
-        "index": 0
-    }) + "\n\n"
-    yield "data: " + json.dumps({
-        "type": "message_delta",
-        "delta": {
-            "stop_reason": "end_turn",
-            "stop_sequence": None
-        },
-        "usage": {
-            "output_tokens": 0
-        }
-    }) + "\n\n"
-    yield "data: " + json.dumps({
-        "type": "message_stop"
-    }) + "\n\n"
-# ==========================================================
-# ANTHROPIC CHAT
-# ==========================================================
 @app.post("/v1/messages")
-async def anthropic_chat(req: Request):
-    if not authorized(req):
-        return JSONResponse({"error": "Unauthorized"}, status_code=401)
     try:
         body = await req.json()
     except:
-        return JSONResponse({"error": "Invalid JSON"}, status_code=400)
-    original_model = body.get("model", "claude-opus-4-7")
-    payload = {
-        "model": map_model(original_model),
         "messages": body.get("messages", []),
-        "stream": body.get("stream", False)
     }
-    is_stream = body.get("stream", False)
-    # ------------------------------------------------------
     # NON STREAM
-    # ------------------------------------------------------
-    if not is_stream:
-        for _ in range(len(OLLAMA_KEYS)):
-            key = get_key()
             if not key:
-                await asyncio.sleep(0.2)
                 continue
             try:
                 async with httpx.AsyncClient(timeout=180) as client:
                     r = await client.post(
                         f"{BASE_URL}/v1/chat/completions",
-                        json=payload,
                         headers={"Authorization": f"Bearer {key}"}
                     )
-                data = r.json()
-                return JSONResponse(
-                    to_anthropic(data, original_model)
-                )
-            except:
-                pass
             finally:
                 release_key(key)
-        return JSONResponse({"error": "All keys failed"}, status_code=500)
-    # ------------------------------------------------------
     # STREAM
-    # ------------------------------------------------------
     async def gen():
-        for _ in range(len(OLLAMA_KEYS)):
-            key = get_key()
             if not key:
-                await asyncio.sleep(0.2)
                 continue
             try:
                 timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
                 async with httpx.AsyncClient(timeout=timeout) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
-                        json=payload,
                         headers={"Authorization": f"Bearer {key}"}
                     ) as r:
-                        async for chunk in anthropic_stream(
-                            r.aiter_lines(),
-                            original_model
-                        ):
-                            yield chunk
-                        return
-            except:
-                pass
             finally:
                 release_key(key)
-        yield 'data: {"error":"failed"}\n\n'
     return StreamingResponse(gen(), media_type="text/event-stream")

 # app.py
+# FULL RESTORE VERSION
+# Semua fitur utama dibalikin:
+# ✅ Multi key rotate
+# ✅ Round robin
+# ✅ Key lock
+# ✅ Dashboard /
+# ✅ /v1/models
+# ✅ /v1/chat/completions
+# ✅ /v1/messages
+# ✅ Stream OpenAI
+# ✅ Stream Anthropic
+# ✅ Claude -> Ollama model map
+# ✅ Retry jika key limit
+# ✅ Health monitor
 import os
 import json
 app = FastAPI()
+# =========================================================
 # CONFIG
+# =========================================================
+BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
 MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
+# =========================================================
 # LOAD KEYS
+# =========================================================
 OLLAMA_KEYS = []
 for i in range(1, 101):
 if not OLLAMA_KEYS:
     OLLAMA_KEYS.append("dummy")
+# =========================================================
+# STATUS
+# =========================================================
+last_used_index = 0
 key_status = {}
+for idx, k in enumerate(OLLAMA_KEYS, 1):
     key_status[k] = {
+        "index": idx,
+        "prefix": k[:8] + "...",
+        "success": 0,
+        "failures": 0,
+        "healthy": True,
+        "in_use": False
     }
+# =========================================================
+# MODEL MAP
+# =========================================================
+MODEL_MAP = {
+    "claude-opus-4-7": "minimax-m2.7:cloud",
+    "claude-sonnet-4-6": "minimax-m2.7:cloud",
+    "claude-haiku-4-5": "minimax-m2.7:cloud"
+}
+# =========================================================
+# UTILS
+# =========================================================
+def log(msg):
+    print(f"[{time.strftime('%H:%M:%S')}] {msg}")
+def auth_ok(req: Request):
+    token = req.headers.get("Authorization", "").replace("Bearer ", "")
+    return token == MASTER_API_KEY
+def map_model(name):
+    return MODEL_MAP.get(name, "minimax-m2.7:cloud")
+def get_key(exclude=None):
+    global last_used_index
+    if exclude is None:
+        exclude = set()
+    for i in range(len(OLLAMA_KEYS)):
+        idx = (last_used_index + i) % len(OLLAMA_KEYS)
         key = OLLAMA_KEYS[idx]
+        st = key_status[key]
+        if st["healthy"] and not st["in_use"] and key not in exclude:
+            st["in_use"] = True
+            last_used_index = idx + 1
             return key
     return None
 def release_key(key):
     if key in key_status:
+        key_status[key]["in_use"] = False
+# =========================================================
+# ROOT DASHBOARD
+# =========================================================
 @app.get("/")
 def root():
     return {
         "status": "ok",
+        "total_keys": len(OLLAMA_KEYS),
+        "keys": {
+            v["prefix"]: {
+                "healthy": v["healthy"],
+                "busy": v["in_use"],
+                "success": v["success"],
+                "failures": v["failures"]
+            }
+            for v in key_status.values()
+        }
     }
+# =========================================================
 # MODELS
+# =========================================================
 @app.get("/v1/models")
 async def models(req: Request):
+    if not auth_ok(req):
+        return JSONResponse({"error": "Unauthorized"}, 401)
     key = OLLAMA_KEYS[0]
     async with httpx.AsyncClient(timeout=60) as client:
         r = await client.get(
+            f"{BASE_URL}/api/tags",
             headers={"Authorization": f"Bearer {key}"}
         )
+    if r.status_code != 200:
+        return JSONResponse({"error": r.text}, r.status_code)
+    data = r.json()
+    models = []
+    now = int(time.time())
+    for m in data.get("models", []):
+        models.append({
+            "id": m["name"],
+            "object": "model",
+            "created": now,
+            "owned_by": "ollama"
+        })
+    return {"object": "list", "data": models}
+# =========================================================
+# OPENAI NORMAL + STREAM
+# =========================================================
+@app.post("/v1/chat/completions")
+async def chat(req: Request):
+    if not auth_ok(req):
+        return JSONResponse({"error": "Unauthorized"}, 401)
     try:
         body = await req.json()
     except:
+        return JSONResponse({"error": "Invalid JSON"}, 400)
+    stream = body.get("stream", False)
+    tried = set()
+    # -----------------------------------------------------
     # NON STREAM
+    # -----------------------------------------------------
+    if not stream:
         for _ in range(len(OLLAMA_KEYS)):
+            key = get_key(tried)
             if not key:
+                await asyncio.sleep(1)
                 continue
+            tried.add(key)
             try:
                 async with httpx.AsyncClient(timeout=180) as client:
                     r = await client.post(
                         headers={"Authorization": f"Bearer {key}"}
                     )
+                if r.status_code == 200:
+                    key_status[key]["success"] += 1
+                    return Response(
+                        content=r.content,
+                        media_type=r.headers.get("content-type")
+                    )
+                if r.status_code == 429:
+                    key_status[key]["healthy"] = False
+                key_status[key]["failures"] += 1
+            except Exception as e:
+                log(str(e))
+                key_status[key]["failures"] += 1
             finally:
                 release_key(key)
+        return JSONResponse({"error": "All keys failed"}, 500)
+    # -----------------------------------------------------
     # STREAM
+    # -----------------------------------------------------
     async def gen():
         for _ in range(len(OLLAMA_KEYS)):
             key = get_key()
             if not key:
+                await asyncio.sleep(1)
                 continue
             try:
                 timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
                 async with httpx.AsyncClient(timeout=timeout) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
                         headers={"Authorization": f"Bearer {key}"}
                     ) as r:
+                        if r.status_code == 429:
+                            key_status[key]["healthy"] = False
+                            continue
+                        async for line in r.aiter_lines():
+                            if line:
+                                yield line + "\n\n"
+                        key_status[key]["success"] += 1
                         return
+            except Exception as e:
+                log(str(e))
+                key_status[key]["failures"] += 1
             finally:
                 release_key(key)
+        yield 'data: {"error":"All stream keys failed"}\n\n'
     return StreamingResponse(gen(), media_type="text/event-stream")
+# =========================================================
+# ANTHROPIC
+# =========================================================
 @app.post("/v1/messages")
+async def claude(req: Request):
+    if not auth_ok(req):
+        return JSONResponse({"error": "Unauthorized"}, 401)
     try:
         body = await req.json()
     except:
+        return JSONResponse({"error": "Invalid JSON"}, 400)
+    stream = body.get("stream", False)
+    openai_body = {
+        "model": map_model(body.get("model")),
         "messages": body.get("messages", []),
+        "stream": stream
     }
+    # -----------------------------------------------------
     # NON STREAM
+    # -----------------------------------------------------
+    if not stream:
+        fake = Request(scope=req.scope)
+        req._body = json.dumps(openai_body).encode()
+        # manual call
+        tried = set()
+        for _ in range(len(OLLAMA_KEYS)):
+            key = get_key(tried)
             if not key:
+                await asyncio.sleep(1)
                 continue
+            tried.add(key)
             try:
                 async with httpx.AsyncClient(timeout=180) as client:
                     r = await client.post(
                         f"{BASE_URL}/v1/chat/completions",
+                        json=openai_body,
                         headers={"Authorization": f"Bearer {key}"}
                     )
+                if r.status_code == 200:
+                    data = r.json()
+                    txt = data["choices"][0]["message"]["content"]
+                    return {
+                        "id": "msg_" + uuid.uuid4().hex[:10],
+                        "type": "message",
+                        "role": "assistant",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": txt
+                            }
+                        ],
+                        "model": body.get("model")
+                    }
             finally:
                 release_key(key)
+        return JSONResponse({"error": "All keys failed"}, 500)
+    # -----------------------------------------------------
     # STREAM
+    # -----------------------------------------------------
     async def gen():
+        msg_id = "msg_" + uuid.uuid4().hex[:10]
+        yield f'data: {json.dumps({"type":"message_start","message":{"id":msg_id,"type":"message","role":"assistant","model":body.get("model"),"content":[],"stop_reason":None,"stop_sequence":None,"usage":{"input_tokens":0,"output_tokens":0}})}\n\n'
+        yield f'data: {json.dumps({"type":"content_block_start","index":0,"content_block":{"type":"text"}})}\n\n'
+        tried = set()
+        for _ in range(len(OLLAMA_KEYS)):
+            key = get_key(tried)
             if not key:
+                await asyncio.sleep(1)
                 continue
+            tried.add(key)
             try:
                 timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
                 async with httpx.AsyncClient(timeout=timeout) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
+                        json=openai_body,
                         headers={"Authorization": f"Bearer {key}"}
                     ) as r:
+                        async for line in r.aiter_lines():
+                            if line.startswith("data: "):
+                                raw = line[6:]
+                                if raw == "[DONE]":
+                                    break
+                                try:
+                                    j = json.loads(raw)
+                                    delta = j["choices"][0]["delta"]
+                                    txt = delta.get("content", "")
+                                    if txt:
+                                        yield f'data: {json.dumps({"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":txt}})}\n\n'
+                                except:
+                                    pass
+                        break
             finally:
                 release_key(key)
+        yield f'data: {json.dumps({"type":"content_block_stop","index":0})}\n\n'
+        yield f'data: {json.dumps({"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":None},"usage":{"output_tokens":0}})}\n\n'
+        yield f'data: {json.dumps({"type":"message_stop"})}\n\n'
     return StreamingResponse(gen(), media_type="text/event-stream")