proxycf

Sleeping

App Files Files Community

Elysiadev11 commited on 15 days ago

Commit

8df83b1

verified ·

1 Parent(s): 7076d96

Update proxy_cerebras.py

Browse files

Files changed (1) hide show

proxy_cerebras.py +163 -265

proxy_cerebras.py CHANGED Viewed

@@ -1,25 +1,19 @@
 # ==========================================================
-# FULL CODE APP.PY
-# Proxy 2 endpoint:
-#   /v1/chat/completions   (OpenAI)
-#   /v1/messages           (Anthropic)
-# Multi key rotation
-# HuggingFace Spaces ready
 # ==========================================================
 import os
-import time
 import json
 import uuid
 import asyncio
 import httpx
 from fastapi import FastAPI, Request
-from fastapi.responses import (
-    JSONResponse,
-    Response,
-    StreamingResponse
-)
 from starlette.requests import ClientDisconnect
 app = FastAPI()
@@ -27,15 +21,8 @@ app = FastAPI()
 # ==========================================================
 # CONFIG
 # ==========================================================
-BASE_URL = os.getenv(
-    "BASE_URL",
-    "https://elysiadev11-proxyollma.hf.space"
-)
-MASTER_API_KEY = os.getenv(
-    "MASTER_API_KEY",
-    "olla"
-)
 # ==========================================================
 # MODEL MAP
@@ -47,10 +34,8 @@ MODEL_MAP = {
     "claude-haiku-4-5": "minimax-m2.7:cloud",
 }
-def map_model(model: str) -> str:
-    return MODEL_MAP.get(model, model)
 # ==========================================================
 # LOAD KEYS
@@ -58,74 +43,54 @@ def map_model(model: str) -> str:
 OLLAMA_KEYS = []
 for i in range(1, 101):
-    key = os.getenv(f"OLLAMA_KEY_{i}")
-    if key:
-        OLLAMA_KEYS.append(key)
 if not OLLAMA_KEYS:
-    OLLAMA_KEYS.append("dummy-key")
 # ==========================================================
-# KEY STATUS
 # ==========================================================
-last_used_index = 0
 key_status = {}
-for idx, key in enumerate(OLLAMA_KEYS, 1):
-    key_status[key] = {
-        "index": idx,
         "busy": False,
-        "healthy": True,
-        "success": 0,
-        "fail": 0
     }
-# ==========================================================
-# LOG
-# ==========================================================
-def log(msg):
-    print(f"[{time.strftime('%H:%M:%S')}] {msg}")
-# ==========================================================
-# AUTH
-# ==========================================================
-def check_auth(req: Request):
-    token = req.headers.get(
-        "Authorization", ""
-    ).replace("Bearer ", "")
-    return token == MASTER_API_KEY
-# ==========================================================
-# PICK KEY
-# ==========================================================
 def get_key():
-    global last_used_index
     total = len(OLLAMA_KEYS)
-    for i in range(total):
-        idx = (last_used_index + i) % total
         key = OLLAMA_KEYS[idx]
-        st = key_status[key]
-        if st["healthy"] and not st["busy"]:
-            st["busy"] = True
-            last_used_index = idx + 1
             return key
     return None
 def release_key(key):
     if key in key_status:
         key_status[key]["busy"] = False
 # ==========================================================
 # ROOT
@@ -134,71 +99,46 @@ def release_key(key):
 def root():
     return {
         "status": "ok",
-        "keys": len(OLLAMA_KEYS),
         "base_url": BASE_URL
     }
 # ==========================================================
 # MODELS
 # ==========================================================
 @app.get("/v1/models")
 async def models(req: Request):
-    if not check_auth(req):
-        return JSONResponse(
-            {"error": "Unauthorized"},
-            status_code=401
-        )
     key = OLLAMA_KEYS[0]
-    try:
-        async with httpx.AsyncClient(timeout=60) as client:
-            r = await client.get(
-                f"{BASE_URL}/v1/models",
-                headers={
-                    "Authorization":
-                    f"Bearer {key}"
-                }
-            )
-        return Response(
-            content=r.content,
-            media_type="application/json"
-        )
-    except Exception as e:
-        return JSONResponse(
-            {"error": str(e)},
-            status_code=500
         )
 # ==========================================================
-# OPENAI ENDPOINT
 # ==========================================================
 @app.post("/v1/chat/completions")
-async def chat(req: Request):
-    if not check_auth(req):
-        return JSONResponse(
-            {"error": "Unauthorized"},
-            status_code=401
-        )
     try:
         body = await req.json()
-    except ClientDisconnect:
-        return Response(status_code=499)
     except:
-        return JSONResponse(
-            {"error": "Invalid JSON"},
-            status_code=400
-        )
-    model = body.get("model", "")
-    body["model"] = map_model(model)
     is_stream = body.get("stream", False)
@@ -212,47 +152,29 @@ async def chat(req: Request):
             key = get_key()
             if not key:
-                await asyncio.sleep(0.3)
                 continue
             try:
-                async with httpx.AsyncClient(
-                    timeout=180
-                ) as client:
                     r = await client.post(
                         f"{BASE_URL}/v1/chat/completions",
                         json=body,
-                        headers={
-                            "Authorization":
-                            f"Bearer {key}"
-                        }
                     )
-                if r.status_code == 200:
-                    key_status[key]["success"] += 1
-                    return Response(
-                        content=r.content,
-                        media_type=r.headers.get(
-                            "content-type",
-                            "application/json"
-                        )
-                    )
-                else:
-                    key_status[key]["fail"] += 1
-            except Exception:
-                key_status[key]["fail"] += 1
             finally:
                 release_key(key)
-        return JSONResponse(
-            {"error": "All keys failed"},
-            status_code=500
-        )
     # ------------------------------------------------------
     # STREAM
@@ -264,71 +186,50 @@ async def chat(req: Request):
             key = get_key()
             if not key:
-                await asyncio.sleep(0.3)
                 continue
             try:
-                timeout = httpx.Timeout(
-                    connect=15,
-                    read=None,
-                    write=15,
-                    pool=10
-                )
-                async with httpx.AsyncClient(
-                    timeout=timeout
-                ) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
                         json=body,
-                        headers={
-                            "Authorization":
-                            f"Bearer {key}"
-                        }
                     ) as r:
-                        if r.status_code != 200:
-                            key_status[key]["fail"] += 1
-                            continue
                         async for chunk in r.aiter_bytes():
-                            if chunk:
-                                yield chunk
-                        key_status[key]["success"] += 1
                         return
-            except Exception:
-                key_status[key]["fail"] += 1
             finally:
                 release_key(key)
-        yield b'data: {"error":"all keys failed"}\n\n'
-    return StreamingResponse(
-        gen(),
-        media_type="text/event-stream"
-    )
 # ==========================================================
-# ANTHROPIC CONVERTER
 # ==========================================================
-def to_anthropic(resp, original_model):
     text = ""
-    if "choices" in resp:
-        try:
-            text = resp["choices"][0]["message"]["content"]
-        except:
-            text = ""
-    elif "message" in resp:
-        text = resp["message"].get("content", "")
     return {
         "id": f"msg_{uuid.uuid4().hex[:10]}",
@@ -340,7 +241,7 @@ def to_anthropic(resp, original_model):
                 "text": text
             }
         ],
-        "model": original_model,
         "stop_reason": "end_turn",
         "stop_sequence": None,
         "usage": {
@@ -349,17 +250,39 @@ def to_anthropic(resp, original_model):
         }
     }
 # ==========================================================
-# ANTHROPIC STREAM
 # ==========================================================
 async def anthropic_stream(lines, model):
     msg_id = f"msg_{uuid.uuid4().hex[:10]}"
-    yield f"data: {json.dumps({'type':'message_start','message':{'id':msg_id,'type':'message','role':'assistant','model':model,'content':[],'stop_reason':None,'stop_sequence':None,'usage':{'input_tokens':0,'output_tokens':0}})}\n\n"
-    yield f"data: {json.dumps({'type':'content_block_start','index':0,'content_block':{'type':'text'}})}\n\n"
     async for line in lines:
@@ -381,66 +304,71 @@ async def anthropic_stream(lines, model):
         text = ""
-        if "choices" in data:
-            try:
                 delta = data["choices"][0]["delta"]
                 text = delta.get("content", "")
                 if not text:
                     text = delta.get("reasoning", "")
-            except:
-                pass
-        elif "message" in data:
-            text = data["message"].get(
-                "content", ""
-            )
         if text:
-            yield f"data: {json.dumps({'type':'content_block_delta','index':0,'delta':{'type':'text_delta','text':text}})}\n\n"
-    yield f"data: {json.dumps({'type':'content_block_stop','index':0})}\n\n"
-    yield f"data: {json.dumps({'type':'message_delta','delta':{'stop_reason':'end_turn','stop_sequence':None},'usage':{'output_tokens':0}})}\n\n"
-    yield f"data: {json.dumps({'type':'message_stop'})}\n\n"
 # ==========================================================
-# ANTHROPIC ENDPOINT
 # ==========================================================
 @app.post("/v1/messages")
-async def messages(req: Request):
-    if not check_auth(req):
-        return JSONResponse(
-            {"error": "Unauthorized"},
-            status_code=401
-        )
     try:
         body = await req.json()
-    except ClientDisconnect:
-        return Response(status_code=499)
     except:
-        return JSONResponse(
-            {"error": "Invalid JSON"},
-            status_code=400
-        )
-    original_model = body.get(
-        "model",
-        "claude-opus-4-7"
-    )
-    backend_model = map_model(original_model)
     payload = {
-        "model": backend_model,
-        "stream": body.get("stream", False),
-        "messages": body.get("messages", [])
     }
     is_stream = body.get("stream", False)
@@ -455,33 +383,22 @@ async def messages(req: Request):
             key = get_key()
             if not key:
-                await asyncio.sleep(0.3)
                 continue
             try:
-                async with httpx.AsyncClient(
-                    timeout=180
-                ) as client:
                     r = await client.post(
                         f"{BASE_URL}/v1/chat/completions",
                         json=payload,
-                        headers={
-                            "Authorization":
-                            f"Bearer {key}"
-                        }
                     )
-                if r.status_code == 200:
-                    data = r.json()
-                    return JSONResponse(
-                        to_anthropic(
-                            data,
-                            original_model
-                        )
-                    )
             except:
                 pass
@@ -489,10 +406,7 @@ async def messages(req: Request):
             finally:
                 release_key(key)
-        return JSONResponse(
-            {"error": "All keys failed"},
-            status_code=500
-        )
     # ------------------------------------------------------
     # STREAM
@@ -504,39 +418,26 @@ async def messages(req: Request):
             key = get_key()
             if not key:
-                await asyncio.sleep(0.3)
                 continue
             try:
-                timeout = httpx.Timeout(
-                    connect=15,
-                    read=None,
-                    write=15,
-                    pool=10
-                )
-                async with httpx.AsyncClient(
-                    timeout=timeout
-                ) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
                         json=payload,
-                        headers={
-                            "Authorization":
-                            f"Bearer {key}"
-                        }
                     ) as r:
-                        if r.status_code != 200:
-                            continue
-                        async for x in anthropic_stream(
                             r.aiter_lines(),
                             original_model
                         ):
-                            yield x
                         return
@@ -546,9 +447,6 @@ async def messages(req: Request):
             finally:
                 release_key(key)
-        yield 'data: {"error":"all keys failed"}\n\n'
-    return StreamingResponse(
-        gen(),
-        media_type="text/event-stream"
-    )

+# app.py
 # ==========================================================
+# FULL FIXED VERSION
+# OpenAI + Anthropic Proxy
+# HuggingFace Spaces Ready
 # ==========================================================
 import os
 import json
+import time
 import uuid
 import asyncio
 import httpx
 from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse, Response, StreamingResponse
 from starlette.requests import ClientDisconnect
 app = FastAPI()
 # ==========================================================
 # CONFIG
 # ==========================================================
+BASE_URL = os.getenv("BASE_URL", "https://elysiadev11-proxyollma.hf.space")
+MASTER_API_KEY = os.getenv("MASTER_API_KEY", "olla")
 # ==========================================================
 # MODEL MAP
     "claude-haiku-4-5": "minimax-m2.7:cloud",
 }
+def map_model(name):
+    return MODEL_MAP.get(name, name)
 # ==========================================================
 # LOAD KEYS
 OLLAMA_KEYS = []
 for i in range(1, 101):
+    k = os.getenv(f"OLLAMA_KEY_{i}")
+    if k:
+        OLLAMA_KEYS.append(k)
 if not OLLAMA_KEYS:
+    OLLAMA_KEYS.append("dummy")
 # ==========================================================
+# KEY MANAGER
 # ==========================================================
+last_index = 0
 key_status = {}
+for i, k in enumerate(OLLAMA_KEYS, 1):
+    key_status[k] = {
         "busy": False,
+        "ok": True,
+        "index": i
     }
 def get_key():
+    global last_index
     total = len(OLLAMA_KEYS)
+    for x in range(total):
+        idx = (last_index + x) % total
         key = OLLAMA_KEYS[idx]
+        if not key_status[key]["busy"]:
+            key_status[key]["busy"] = True
+            last_index = idx + 1
             return key
     return None
 def release_key(key):
     if key in key_status:
         key_status[key]["busy"] = False
+# ==========================================================
+# AUTH
+# ==========================================================
+def authorized(req: Request):
+    token = req.headers.get("Authorization", "")
+    token = token.replace("Bearer ", "")
+    return token == MASTER_API_KEY
 # ==========================================================
 # ROOT
 def root():
     return {
         "status": "ok",
+        "keys_loaded": len(OLLAMA_KEYS),
         "base_url": BASE_URL
     }
 # ==========================================================
 # MODELS
 # ==========================================================
 @app.get("/v1/models")
 async def models(req: Request):
+    if not authorized(req):
+        return JSONResponse({"error": "Unauthorized"}, status_code=401)
     key = OLLAMA_KEYS[0]
+    async with httpx.AsyncClient(timeout=60) as client:
+        r = await client.get(
+            f"{BASE_URL}/v1/models",
+            headers={"Authorization": f"Bearer {key}"}
         )
+    return Response(
+        content=r.content,
+        media_type="application/json"
+    )
 # ==========================================================
+# OPENAI CHAT
 # ==========================================================
 @app.post("/v1/chat/completions")
+async def openai_chat(req: Request):
+    if not authorized(req):
+        return JSONResponse({"error": "Unauthorized"}, status_code=401)
     try:
         body = await req.json()
     except:
+        return JSONResponse({"error": "Invalid JSON"}, status_code=400)
+    body["model"] = map_model(body.get("model", ""))
     is_stream = body.get("stream", False)
             key = get_key()
             if not key:
+                await asyncio.sleep(0.2)
                 continue
             try:
+                async with httpx.AsyncClient(timeout=180) as client:
                     r = await client.post(
                         f"{BASE_URL}/v1/chat/completions",
                         json=body,
+                        headers={"Authorization": f"Bearer {key}"}
                     )
+                return Response(
+                    content=r.content,
+                    media_type=r.headers.get("content-type", "application/json")
+                )
+            except:
+                pass
             finally:
                 release_key(key)
+        return JSONResponse({"error": "All keys failed"}, status_code=500)
     # ------------------------------------------------------
     # STREAM
             key = get_key()
             if not key:
+                await asyncio.sleep(0.2)
                 continue
             try:
+                timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
+                async with httpx.AsyncClient(timeout=timeout) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
                         json=body,
+                        headers={"Authorization": f"Bearer {key}"}
                     ) as r:
                         async for chunk in r.aiter_bytes():
+                            yield chunk
                         return
+            except:
+                pass
             finally:
                 release_key(key)
+        yield b'data: {"error":"failed"}\n\n'
+    return StreamingResponse(gen(), media_type="text/event-stream")
 # ==========================================================
+# ANTHROPIC RESPONSE CONVERTER
 # ==========================================================
+def to_anthropic(data, model_name):
     text = ""
+    try:
+        if "choices" in data:
+            text = data["choices"][0]["message"]["content"]
+        elif "message" in data:
+            text = data["message"]["content"]
+    except:
+        pass
     return {
         "id": f"msg_{uuid.uuid4().hex[:10]}",
                 "text": text
             }
         ],
+        "model": model_name,
         "stop_reason": "end_turn",
         "stop_sequence": None,
         "usage": {
         }
     }
 # ==========================================================
+# ANTHROPIC STREAM CONVERTER
 # ==========================================================
 async def anthropic_stream(lines, model):
     msg_id = f"msg_{uuid.uuid4().hex[:10]}"
+    start_payload = {
+        "type": "message_start",
+        "message": {
+            "id": msg_id,
+            "type": "message",
+            "role": "assistant",
+            "model": model,
+            "content": [],
+            "stop_reason": None,
+            "stop_sequence": None,
+            "usage": {
+                "input_tokens": 0,
+                "output_tokens": 0
+            }
+        }
+    }
+    yield "data: " + json.dumps(start_payload) + "\n\n"
+    yield "data: " + json.dumps({
+        "type": "content_block_start",
+        "index": 0,
+        "content_block": {
+            "type": "text"
+        }
+    }) + "\n\n"
     async for line in lines:
         text = ""
+        try:
+            if "choices" in data:
                 delta = data["choices"][0]["delta"]
                 text = delta.get("content", "")
                 if not text:
                     text = delta.get("reasoning", "")
+            elif "message" in data:
+                text = data["message"].get("content", "")
+        except:
+            pass
         if text:
+            payload = {
+                "type": "content_block_delta",
+                "index": 0,
+                "delta": {
+                    "type": "text_delta",
+                    "text": text
+                }
+            }
+            yield "data: " + json.dumps(payload) + "\n\n"
+    yield "data: " + json.dumps({
+        "type": "content_block_stop",
+        "index": 0
+    }) + "\n\n"
+    yield "data: " + json.dumps({
+        "type": "message_delta",
+        "delta": {
+            "stop_reason": "end_turn",
+            "stop_sequence": None
+        },
+        "usage": {
+            "output_tokens": 0
+        }
+    }) + "\n\n"
+    yield "data: " + json.dumps({
+        "type": "message_stop"
+    }) + "\n\n"
 # ==========================================================
+# ANTHROPIC CHAT
 # ==========================================================
 @app.post("/v1/messages")
+async def anthropic_chat(req: Request):
+    if not authorized(req):
+        return JSONResponse({"error": "Unauthorized"}, status_code=401)
     try:
         body = await req.json()
     except:
+        return JSONResponse({"error": "Invalid JSON"}, status_code=400)
+    original_model = body.get("model", "claude-opus-4-7")
     payload = {
+        "model": map_model(original_model),
+        "messages": body.get("messages", []),
+        "stream": body.get("stream", False)
     }
     is_stream = body.get("stream", False)
             key = get_key()
             if not key:
+                await asyncio.sleep(0.2)
                 continue
             try:
+                async with httpx.AsyncClient(timeout=180) as client:
                     r = await client.post(
                         f"{BASE_URL}/v1/chat/completions",
                         json=payload,
+                        headers={"Authorization": f"Bearer {key}"}
                     )
+                data = r.json()
+                return JSONResponse(
+                    to_anthropic(data, original_model)
+                )
             except:
                 pass
             finally:
                 release_key(key)
+        return JSONResponse({"error": "All keys failed"}, status_code=500)
     # ------------------------------------------------------
     # STREAM
             key = get_key()
             if not key:
+                await asyncio.sleep(0.2)
                 continue
             try:
+                timeout = httpx.Timeout(connect=15, read=None, write=15, pool=10)
+                async with httpx.AsyncClient(timeout=timeout) as client:
                     async with client.stream(
                         "POST",
                         f"{BASE_URL}/v1/chat/completions",
                         json=payload,
+                        headers={"Authorization": f"Bearer {key}"}
                     ) as r:
+                        async for chunk in anthropic_stream(
                             r.aiter_lines(),
                             original_model
                         ):
+                            yield chunk
                         return
             finally:
                 release_key(key)
+        yield 'data: {"error":"failed"}\n\n'
+    return StreamingResponse(gen(), media_type="text/event-stream")