proxycf

Sleeping

App Files Files Community

Elysiadev11 commited on 14 days ago

Commit

f1c283c

verified ·

1 Parent(s): f6549ae

Update proxy_cerebras.py

Browse files

Files changed (1) hide show

proxy_cerebras.py +265 -178

proxy_cerebras.py CHANGED Viewed

@@ -1,19 +1,3 @@
-# app.py
-# FULL RESTORE VERSION
-# Semua fitur utama dibalikin:
-# ✅ Multi key rotate
-# ✅ Round robin
-# ✅ Key lock
-# ✅ Dashboard /
-# ✅ /v1/models
-# ✅ /v1/chat/completions
-# ✅ /v1/messages
-# ✅ Stream OpenAI
-# ✅ Stream Anthropic
-# ✅ Claude -> Ollama model map
-# ✅ Retry jika key limit
-# ✅ Health monitor
 import os
 import json
 import time
@@ -27,17 +11,16 @@ from starlette.requests import ClientDisconnect
 app = FastAPI()
-# =========================================================
 # CONFIG
-# =========================================================
 BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
 MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
-# =========================================================
 # LOAD KEYS
-# =========================================================
 OLLAMA_KEYS = []
 for i in range(1, 101):
     k = os.getenv(f"OLLAMA_KEY_{i}")
     if k:
@@ -46,92 +29,89 @@ for i in range(1, 101):
 if not OLLAMA_KEYS:
     OLLAMA_KEYS.append("dummy")
-# =========================================================
-# STATUS
-# =========================================================
-last_used_index = 0
 key_status = {}
 for idx, k in enumerate(OLLAMA_KEYS, 1):
     key_status[k] = {
         "index": idx,
-        "prefix": k[:8] + "...",
-        "success": 0,
-        "failures": 0,
         "healthy": True,
-        "in_use": False
     }
-# =========================================================
-# MODEL MAP
-# =========================================================
-MODEL_MAP = {
-    "claude-opus-4-7": "minimax-m2.7:cloud",
-    "claude-sonnet-4-6": "minimax-m2.7:cloud",
-    "claude-haiku-4-5": "minimax-m2.7:cloud"
-}
-# =========================================================
-# UTILS
-# =========================================================
-def log(msg):
-    print(f"[{time.strftime('%H:%M:%S')}] {msg}")
 def auth_ok(req: Request):
     token = req.headers.get("Authorization", "").replace("Bearer ", "")
     return token == MASTER_API_KEY
-def map_model(name):
-    return MODEL_MAP.get(name, "minimax-m2.7:cloud")
 def get_key(exclude=None):
-    global last_used_index
     if exclude is None:
         exclude = set()
-    for i in range(len(OLLAMA_KEYS)):
-        idx = (last_used_index + i) % len(OLLAMA_KEYS)
-        key = OLLAMA_KEYS[idx]
-        st = key_status[key]
-        if st["healthy"] and not st["in_use"] and key not in exclude:
-            st["in_use"] = True
-            last_used_index = idx + 1
-            return key
     return None
-def release_key(key):
-    if key in key_status:
-        key_status[key]["in_use"] = False
-# =========================================================
-# ROOT DASHBOARD
-# =========================================================
 @app.get("/")
-def root():
     return {
         "status": "ok",
-        "total_keys": len(OLLAMA_KEYS),
-        "keys": {
-            v["prefix"]: {
-                "healthy": v["healthy"],
-                "busy": v["in_use"],
-                "success": v["success"],
-                "failures": v["failures"]
-            }
-            for v in key_status.values()
-        }
     }
-# =========================================================
-# MODELS
-# =========================================================
 @app.get("/v1/models")
 async def models(req: Request):
     if not auth_ok(req):
-        return JSONResponse({"error": "Unauthorized"}, 401)
     key = OLLAMA_KEYS[0]
@@ -142,48 +122,50 @@ async def models(req: Request):
         )
     if r.status_code != 200:
-        return JSONResponse({"error": r.text}, r.status_code)
     data = r.json()
-    models = []
     now = int(time.time())
     for m in data.get("models", []):
-        models.append({
-            "id": m["name"],
             "object": "model",
             "created": now,
             "owned_by": "ollama"
         })
-    return {"object": "list", "data": models}
-# =========================================================
-# OPENAI NORMAL + STREAM
-# =========================================================
 @app.post("/v1/chat/completions")
 async def chat(req: Request):
     if not auth_ok(req):
-        return JSONResponse({"error": "Unauthorized"}, 401)
     try:
         body = await req.json()
     except:
-        return JSONResponse({"error": "Invalid JSON"}, 400)
-    stream = body.get("stream", False)
-    tried = set()
-    # -----------------------------------------------------
     # NON STREAM
-    # -----------------------------------------------------
-    if not stream:
         for _ in range(len(OLLAMA_KEYS)):
             key = get_key(tried)
             if not key:
-                await asyncio.sleep(1)
                 continue
             tried.add(key)
@@ -196,41 +178,48 @@ async def chat(req: Request):
                         headers={"Authorization": f"Bearer {key}"}
                     )
-                if r.status_code == 200:
-                    key_status[key]["success"] += 1
-                    return Response(
-                        content=r.content,
-                        media_type=r.headers.get("content-type")
-                    )
-                if r.status_code == 429:
-                    key_status[key]["healthy"] = False
-                key_status[key]["failures"] += 1
             except Exception as e:
-                log(str(e))
-                key_status[key]["failures"] += 1
             finally:
                 release_key(key)
-        return JSONResponse({"error": "All keys failed"}, 500)
-    # -----------------------------------------------------
     # STREAM
-    # -----------------------------------------------------
     async def gen():
         for _ in range(len(OLLAMA_KEYS)):
-            key = get_key()
             if not key:
-                await asyncio.sleep(1)
                 continue
-            try:
-                timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
-                async with httpx.AsyncClient(timeout=timeout) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
@@ -239,62 +228,84 @@ async def chat(req: Request):
                     ) as r:
                         if r.status_code == 429:
-                            key_status[key]["healthy"] = False
                             continue
                         async for line in r.aiter_lines():
                             if line:
                                 yield line + "\n\n"
-                        key_status[key]["success"] += 1
                         return
             except Exception as e:
-                log(str(e))
-                key_status[key]["failures"] += 1
             finally:
                 release_key(key)
-        yield 'data: {"error":"All stream keys failed"}\n\n'
     return StreamingResponse(gen(), media_type="text/event-stream")
-# =========================================================
-# ANTHROPIC
-# =========================================================
 @app.post("/v1/messages")
-async def claude(req: Request):
     if not auth_ok(req):
-        return JSONResponse({"error": "Unauthorized"}, 401)
     try:
         body = await req.json()
-    except:
-        return JSONResponse({"error": "Invalid JSON"}, 400)
     stream = body.get("stream", False)
-    openai_body = {
-        "model": map_model(body.get("model")),
-        "messages": body.get("messages", []),
         "stream": stream
     }
-    # -----------------------------------------------------
     # NON STREAM
-    # -----------------------------------------------------
     if not stream:
-        fake = Request(scope=req.scope)
-        req._body = json.dumps(openai_body).encode()
-        # manual call
         tried = set()
         for _ in range(len(OLLAMA_KEYS)):
             key = get_key(tried)
             if not key:
-                await asyncio.sleep(1)
                 continue
             tried.add(key)
@@ -303,87 +314,163 @@ async def claude(req: Request):
                 async with httpx.AsyncClient(timeout=180) as client:
                     r = await client.post(
                         f"{BASE_URL}/v1/chat/completions",
-                        json=openai_body,
                         headers={"Authorization": f"Bearer {key}"}
                     )
-                if r.status_code == 200:
-                    data = r.json()
-                    txt = data["choices"][0]["message"]["content"]
-                    return {
-                        "id": "msg_" + uuid.uuid4().hex[:10],
-                        "type": "message",
-                        "role": "assistant",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": txt
-                            }
-                        ],
-                        "model": body.get("model")
                     }
             finally:
                 release_key(key)
-        return JSONResponse({"error": "All keys failed"}, 500)
-    # -----------------------------------------------------
     # STREAM
-    # -----------------------------------------------------
-    async def gen():
         msg_id = "msg_" + uuid.uuid4().hex[:10]
-        yield f'data: {json.dumps({"type":"message_start","message":{"id":msg_id,"type":"message","role":"assistant","model":body.get("model"),"content":[],"stop_reason":None,"stop_sequence":None,"usage":{"input_tokens":0,"output_tokens":0}})}\n\n'
-        yield f'data: {json.dumps({"type":"content_block_start","index":0,"content_block":{"type":"text"}})}\n\n'
-        tried = set()
         for _ in range(len(OLLAMA_KEYS)):
             key = get_key(tried)
             if not key:
-                await asyncio.sleep(1)
                 continue
             tried.add(key)
             try:
-                timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
-                async with httpx.AsyncClient(timeout=timeout) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
-                        json=openai_body,
                         headers={"Authorization": f"Bearer {key}"}
                     ) as r:
                         async for line in r.aiter_lines():
-                            if line.startswith("data: "):
-                                raw = line[6:]
-                                if raw == "[DONE]":
-                                    break
-                                try:
-                                    j = json.loads(raw)
-                                    delta = j["choices"][0]["delta"]
-                                    txt = delta.get("content", "")
-                                    if txt:
-                                        yield f'data: {json.dumps({"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":txt}})}\n\n'
-                                except:
-                                    pass
                         break
             finally:
                 release_key(key)
-        yield f'data: {json.dumps({"type":"content_block_stop","index":0})}\n\n'
-        yield f'data: {json.dumps({"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":None},"usage":{"output_tokens":0}})}\n\n'
-        yield f'data: {json.dumps({"type":"message_stop"})}\n\n'
-    return StreamingResponse(gen(), media_type="text/event-stream")

 import os
 import json
 import time
 app = FastAPI()
+# =====================================================
 # CONFIG
+# =====================================================
 BASE_URL = os.getenv("BASE_URL", "https://ollama.com")
 MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
+# =====================================================
 # LOAD KEYS
+# =====================================================
 OLLAMA_KEYS = []
 for i in range(1, 101):
     k = os.getenv(f"OLLAMA_KEY_{i}")
     if k:
 if not OLLAMA_KEYS:
     OLLAMA_KEYS.append("dummy")
 key_status = {}
 for idx, k in enumerate(OLLAMA_KEYS, 1):
     key_status[k] = {
         "index": idx,
         "healthy": True,
+        "busy": False,
+        "success": 0,
+        "fail": 0,
     }
+rr_index = 0
+# =====================================================
+# HELPERS
+# =====================================================
+def log(x):
+    print(f"[{time.strftime('%H:%M:%S')}] {x}")
+def sse(obj):
+    return "data: " + json.dumps(obj, ensure_ascii=False) + "\n\n"
 def auth_ok(req: Request):
     token = req.headers.get("Authorization", "").replace("Bearer ", "")
     return token == MASTER_API_KEY
 def get_key(exclude=None):
+    global rr_index
     if exclude is None:
         exclude = set()
+    for _ in range(len(OLLAMA_KEYS)):
+        rr_index = (rr_index + 1) % len(OLLAMA_KEYS)
+        k = OLLAMA_KEYS[rr_index]
+        st = key_status[k]
+        if st["healthy"] and not st["busy"] and k not in exclude:
+            st["busy"] = True
+            return k
     return None
+def release_key(k):
+    if k in key_status:
+        key_status[k]["busy"] = False
+def mark_fail(k):
+    if k in key_status:
+        key_status[k]["fail"] += 1
+def mark_ok(k):
+    if k in key_status:
+        key_status[k]["success"] += 1
+        key_status[k]["fail"] = 0
+# =====================================================
+# ROOT
+# =====================================================
 @app.get("/")
+async def root():
     return {
         "status": "ok",
+        "keys": len(OLLAMA_KEYS),
+        "detail": key_status
     }
+# =====================================================
+# /v1/models
+# =====================================================
 @app.get("/v1/models")
 async def models(req: Request):
     if not auth_ok(req):
+        return JSONResponse({"error": "Unauthorized"}, status_code=401)
     key = OLLAMA_KEYS[0]
         )
     if r.status_code != 200:
+        return JSONResponse({"error": r.text}, status_code=r.status_code)
     data = r.json()
+    out = []
     now = int(time.time())
     for m in data.get("models", []):
+        out.append({
+            "id": m.get("name"),
             "object": "model",
             "created": now,
             "owned_by": "ollama"
         })
+    return {"object": "list", "data": out}
+# =====================================================
+# OPENAI CHAT
+# =====================================================
 @app.post("/v1/chat/completions")
 async def chat(req: Request):
     if not auth_ok(req):
+        return JSONResponse({"error": "Unauthorized"}, status_code=401)
     try:
         body = await req.json()
     except:
+        return JSONResponse({"error": "Bad JSON"}, status_code=400)
+    is_stream = body.get("stream", False)
+    # -----------------------------------------
     # NON STREAM
+    # -----------------------------------------
+    if not is_stream:
+        tried = set()
         for _ in range(len(OLLAMA_KEYS)):
             key = get_key(tried)
             if not key:
+                await asyncio.sleep(0.3)
                 continue
             tried.add(key)
                         headers={"Authorization": f"Bearer {key}"}
                     )
+                txt = r.text.lower()
+                if "weekly usage limit" in txt or r.status_code == 429:
+                    mark_fail(key)
+                    continue
+                mark_ok(key)
+                return Response(
+                    content=r.content,
+                    media_type=r.headers.get(
+                        "content-type",
+                        "application/json"
+                    )
+                )
             except Exception as e:
+                log(e)
+                mark_fail(key)
             finally:
                 release_key(key)
+        return JSONResponse({"error": "All keys failed"}, status_code=500)
+    # -----------------------------------------
     # STREAM
+    # -----------------------------------------
     async def gen():
+        tried = set()
         for _ in range(len(OLLAMA_KEYS)):
+            key = get_key(tried)
             if not key:
+                await asyncio.sleep(0.3)
                 continue
+            tried.add(key)
+            try:
+                async with httpx.AsyncClient(timeout=None) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
                     ) as r:
                         if r.status_code == 429:
+                            mark_fail(key)
                             continue
                         async for line in r.aiter_lines():
                             if line:
                                 yield line + "\n\n"
+                        mark_ok(key)
                         return
             except Exception as e:
+                log(e)
+                mark_fail(key)
             finally:
                 release_key(key)
+        yield sse({"error": "All keys failed"})
+        yield "data: [DONE]\n\n"
     return StreamingResponse(gen(), media_type="text/event-stream")
+# =====================================================
+# ANTHROPIC /v1/messages
+# =====================================================
 @app.post("/v1/messages")
+async def anthropic(req: Request):
     if not auth_ok(req):
+        return JSONResponse({"error": "Unauthorized"}, status_code=401)
     try:
         body = await req.json()
+    except ClientDisconnect:
+        return Response(status_code=499)
     stream = body.get("stream", False)
+    messages = []
+    if body.get("system"):
+        messages.append({
+            "role": "system",
+            "content": body["system"]
+        })
+    for m in body.get("messages", []):
+        content = m.get("content", "")
+        if isinstance(content, list):
+            txt = ""
+            for x in content:
+                if x.get("type") == "text":
+                    txt += x.get("text", "")
+            content = txt
+        messages.append({
+            "role": m["role"],
+            "content": content
+        })
+    proxy_body = {
+        "model": "minimax-m2.7:cloud",
+        "messages": messages,
         "stream": stream
     }
+    # -----------------------------------------
     # NON STREAM
+    # -----------------------------------------
     if not stream:
         tried = set()
         for _ in range(len(OLLAMA_KEYS)):
             key = get_key(tried)
             if not key:
+                await asyncio.sleep(0.3)
                 continue
             tried.add(key)
                 async with httpx.AsyncClient(timeout=180) as client:
                     r = await client.post(
                         f"{BASE_URL}/v1/chat/completions",
+                        json=proxy_body,
                         headers={"Authorization": f"Bearer {key}"}
                     )
+                txt = r.text.lower()
+                if "weekly usage limit" in txt or r.status_code == 429:
+                    mark_fail(key)
+                    continue
+                data = r.json()
+                ans = data["choices"][0]["message"]["content"]
+                out = {
+                    "id": "msg_" + uuid.uuid4().hex[:10],
+                    "type": "message",
+                    "role": "assistant",
+                    "model": body.get("model", "claude-opus-4-7"),
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": ans
+                        }
+                    ],
+                    "stop_reason": "end_turn",
+                    "stop_sequence": None,
+                    "usage": {
+                        "input_tokens": 0,
+                        "output_tokens": 0
                     }
+                }
+                mark_ok(key)
+                return JSONResponse(out)
+            except Exception as e:
+                log(e)
+                mark_fail(key)
             finally:
                 release_key(key)
+        return JSONResponse({"error": "All keys failed"}, status_code=500)
+    # -----------------------------------------
     # STREAM
+    # -----------------------------------------
+    async def agen():
+        tried = set()
         msg_id = "msg_" + uuid.uuid4().hex[:10]
+        start_payload = {
+            "type": "message_start",
+            "message": {
+                "id": msg_id,
+                "type": "message",
+                "role": "assistant",
+                "model": body.get("model", "claude-opus-4-7"),
+                "content": [],
+                "stop_reason": None,
+                "stop_sequence": None,
+                "usage": {
+                    "input_tokens": 0,
+                    "output_tokens": 0
+                }
+            }
+        }
+        yield sse(start_payload)
+        yield sse({
+            "type": "content_block_start",
+            "index": 0,
+            "content_block": {"type": "text"}
+        })
         for _ in range(len(OLLAMA_KEYS)):
             key = get_key(tried)
             if not key:
+                await asyncio.sleep(0.3)
                 continue
             tried.add(key)
             try:
+                async with httpx.AsyncClient(timeout=None) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
+                        json=proxy_body,
                         headers={"Authorization": f"Bearer {key}"}
                     ) as r:
+                        if r.status_code == 429:
+                            mark_fail(key)
+                            continue
                         async for line in r.aiter_lines():
+                            if not line.startswith("data: "):
+                                continue
+                            raw = line[6:].strip()
+                            if raw == "[DONE]":
+                                break
+                            try:
+                                j = json.loads(raw)
+                            except:
+                                continue
+                            delta = j["choices"][0]["delta"]
+                            txt = delta.get("content", "")
+                            if txt:
+                                yield sse({
+                                    "type": "content_block_delta",
+                                    "index": 0,
+                                    "delta": {
+                                        "type": "text_delta",
+                                        "text": txt
+                                    }
+                                })
+                        mark_ok(key)
                         break
+            except Exception as e:
+                log(e)
+                mark_fail(key)
             finally:
                 release_key(key)
+        yield sse({
+            "type": "content_block_stop",
+            "index": 0
+        })
+        yield sse({
+            "type": "message_delta",
+            "delta": {
+                "stop_reason": "end_turn",
+                "stop_sequence": None
+            },
+            "usage": {
+                "output_tokens": 0
+            }
+        })
+        yield sse({
+            "type": "message_stop"
+        })
+    return StreamingResponse(
+        agen(),
+        media_type="text/event-stream"
+    )