Spaces:

bahi-bh
/

Duck

Running

App Files Files Community

bahi-bh commited on 12 days ago

Commit

88ea102

verified ·

1 Parent(s): e2031cf

Update app.py

Browse files

Files changed (1) hide show

app.py +327 -97

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
 """
 ╔══════════════════════════════════════════════════════════════════╗
-║         Universal AI Gateway  –  Production v6.0                ║
-║                                                                  ║
-║  • كل نموذج مربوط بـ provider يعمل فعلاً (لا auto عشوائي)       ║
-║  • عائلة Cohere كاملة عبر HuggingSpace (بدون auth)              ║
-║  • Kimi عبر Jmuz                                                 ║
-║  • GPT-4 عبر Jmuz / Liaobots / PollinationsAI                   ║
-║  • Streaming حقيقي بدون MissingAuthError                         ║
-║  • Fallback chain مرتب لكل نموذج                                ║
 ╚══════════════════════════════════════════════════════════════════╝
 """
@@ -18,44 +18,49 @@ import json
 import logging
 import time
 import uuid
 from contextlib import asynccontextmanager
 from typing import Any, Dict, List, Optional
 import g4f
 from fastapi import FastAPI, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
 from g4f import Provider
 from g4f.client import Client
 from pydantic import BaseModel
 # ──────────────────────────────────────────────────────────────────
 # LOGGING
 # ──────────────────────────────────────────────────────────────────
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s | %(levelname)-7s | %(message)s",
     datefmt="%H:%M:%S",
 )
 log = logging.getLogger("gw")
 # ──────────────────────────────────────────────────────────────────
 # CONFIG
 # ──────────────────────────────────────────────────────────────────
-API_KEY       = "sk-your-secret-key"
-CALL_TIMEOUT  = 60   # seconds – non-streaming
-STREAM_TIMEOUT = 90  # seconds – streaming
 # ══════════════════════════════════════════════════════════════════
-#  MODEL → PROVIDER ROUTING TABLE
-#
-#  قائمة مرتبة: أول provider ينجح يُستخدم، الباقي fallback.
-#  لا يوجد أي provider يحتاج API key هنا.
 # ══════════════════════════════════════════════════════════════════
 ROUTING: Dict[str, List[Any]] = {
-    # ── Cohere family (HuggingSpace يستضيف spaces رسمية لـ Cohere) ─
     "command-r":            [Provider.HuggingSpace, Provider.Jmuz],
     "command-r-plus":       [Provider.HuggingSpace, Provider.Jmuz],
     "command-r7b":          [Provider.HuggingSpace],
@@ -67,56 +72,56 @@ ROUTING: Dict[str, List[Any]] = {
     "c4ai-aya-expanse-8b":  [Provider.HuggingSpace],
     "c4ai-aya-expanse-32b": [Provider.HuggingSpace],
-    # ── Kimi / Moonshot ───────────────────────────────────────────
     "kimi":             [Provider.Jmuz],
     "moonshot-v1-8k":   [Provider.Jmuz],
     "moonshot-v1-32k":  [Provider.Jmuz],
     "moonshot-v1-128k": [Provider.Jmuz],
-    # ── GPT-4 family ──────────────────────────────────────────────
     "gpt-4":         [Provider.Jmuz, Provider.Liaobots, Provider.PollinationsAI],
     "gpt-4-turbo":   [Provider.Jmuz, Provider.Liaobots, Provider.PollinationsAI],
     "gpt-4o":        [Provider.PollinationsAI, Provider.Jmuz, Provider.Liaobots],
     "gpt-4o-mini":   [Provider.PollinationsAI, Provider.DDG, Provider.Jmuz],
     "gpt-3.5-turbo": [Provider.DDG, Provider.Jmuz, Provider.PollinationsAI],
-    # ── DeepSeek ──────────────────────────────────────────────────
     "deepseek-chat": [Provider.PollinationsAI, Provider.Jmuz],
     "deepseek-r1":   [Provider.PollinationsAI, Provider.Jmuz, Provider.DDG],
     "deepseek-v3":   [Provider.PollinationsAI, Provider.Jmuz],
-    # ── Llama ─────────────────────────────────────────────────────
-    "llama-3.1-8b":    [Provider.PollinationsAI, Provider.Jmuz, Provider.DDG],
-    "llama-3.1-70b":   [Provider.PollinationsAI, Provider.Jmuz],
-    "llama-3.3-70b":   [Provider.PollinationsAI, Provider.Jmuz],
-    "llama-3.2-11b":   [Provider.PollinationsAI],
-    "llama-4-scout":   [Provider.PollinationsAI],
-    "llama-4-maverick":[Provider.PollinationsAI],
-    # ── Mistral ───────────────────────────────────────────────────
     "mistral-7b":    [Provider.PollinationsAI, Provider.Jmuz],
     "mixtral-8x7b":  [Provider.PollinationsAI, Provider.Jmuz],
     "mistral-small": [Provider.PollinationsAI],
     "mistral-large": [Provider.PollinationsAI, Provider.Jmuz],
-    # ── Gemini ────────────────────────────────────────────────────
     "gemini-2.0-flash": [Provider.PollinationsAI, Provider.Jmuz],
     "gemini-1.5-flash": [Provider.PollinationsAI, Provider.Jmuz],
     "gemini-1.5-pro":   [Provider.PollinationsAI, Provider.Jmuz],
     "gemini-pro":       [Provider.PollinationsAI, Provider.Jmuz],
-    # ── Qwen ──────────────────────────────────────────────────────
     "qwen-2.5-72b":       [Provider.PollinationsAI, Provider.Jmuz],
     "qwen-2.5-coder-32b": [Provider.PollinationsAI],
     "qwq-32b":            [Provider.PollinationsAI, Provider.Jmuz],
-    # ── Claude (عبر Jmuz proxy — لا حاجة لـ Anthropic key) ───────
-    "claude-3-haiku":   [Provider.Jmuz, Provider.Liaobots],
-    "claude-3-sonnet":  [Provider.Jmuz, Provider.Liaobots],
-    "claude-3-opus":    [Provider.Jmuz, Provider.Liaobots],
-    "claude-3.5-sonnet":[Provider.Jmuz, Provider.Liaobots],
-    # ── Other ─────────────────────────────────────────────────────
     "phi-4":     [Provider.PollinationsAI],
     "sonar-pro": [Provider.PollinationsAI, Provider.Jmuz],
     "sonar":     [Provider.PollinationsAI, Provider.Jmuz],
@@ -124,194 +129,419 @@ ROUTING: Dict[str, List[Any]] = {
 ALL_MODELS: List[str] = sorted(ROUTING.keys())
 # ══════════════════════════════════════════════════════════════════
-#  CORE CALL  (sync – runs in thread)
 # ══════════════════════════════════════════════════════════════════
 def _call(provider_cls: Any, model: str, messages: list, stream: bool) -> Any:
     client = Client(provider=provider_cls)
     return client.chat.completions.create(
         model=model,
         messages=messages,
         stream=stream,
     )
 # ══════════════════════════════════════════════════════════════════
-#  SMART COMPLETION  (async wrapper with fallback chain)
 # ══════════════════════════════════════════════════════════════════
-async def smart_completion(model: str, messages: list, stream: bool = False) -> Any:
     chain = ROUTING.get(model)
     if not chain:
-        log.warning(f"Unknown model '{model}' – using generic fallback chain")
-        chain = [Provider.PollinationsAI, Provider.Jmuz, Provider.DDG]
     timeout = STREAM_TIMEOUT if stream else CALL_TIMEOUT
     errors: List[str] = []
     for provider_cls in chain:
-        pname = getattr(provider_cls, "__name__", str(provider_cls))
         try:
-            log.info(f"  -> {pname} | model={model} | stream={stream}")
             resp = await asyncio.wait_for(
-                asyncio.to_thread(_call, provider_cls, model, messages, stream),
                 timeout=timeout,
             )
             log.info(f"  OK {pname}")
             return resp
         except asyncio.TimeoutError:
             msg = f"{pname}: timeout after {timeout}s"
         except Exception as exc:
             msg = f"{pname}: {type(exc).__name__}: {exc}"
         log.warning(f"  FAIL {msg}")
-        errors.append(msg)
-    raise RuntimeError(f"All providers failed for '{model}':\n" + "\n".join(errors))
 # ══════════════════════════════════════════════════════════════════
-#  STREAMING GENERATOR
 # ══════════════════════════════════════════════════════════════════
 async def sse_generator(model: str, messages: list):
-    cid     = f"chatcmpl-{uuid.uuid4().hex}"
     created = int(time.time())
-    sent    = False
     try:
-        response = await smart_completion(model, messages, stream=True)
         for chunk in response:
             try:
-                content = chunk.choices[0].delta.content or ""
             except Exception:
                 content = ""
             if not content:
                 continue
             sent = True
-            yield "data: " + json.dumps({
-                "id": cid, "object": "chat.completion.chunk",
-                "created": created, "model": model,
-                "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
-            }, ensure_ascii=False) + "\n\n"
             await asyncio.sleep(0)
         if not sent:
-            yield "data: " + json.dumps(
-                {"error": {"message": "Provider returned empty stream", "type": "empty_stream"}}
-            ) + "\n\n"
         # stop chunk
-        yield "data: " + json.dumps({
-            "id": cid, "object": "chat.completion.chunk",
-            "created": created, "model": model,
-            "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
-        }) + "\n\n"
         yield "data: [DONE]\n\n"
     except Exception as exc:
         log.error(f"Stream error: {exc}")
-        yield "data: " + json.dumps(
-            {"error": {"message": str(exc), "type": "server_error"}}
-        ) + "\n\n"
-        yield "data: [DONE]\n\n"
 # ══════════════════════════════════════════════════════════════════
-#  FASTAPI
 # ══════════════════════════════════════════════════════════════════
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    log.info(f"Gateway ready — {len(ALL_MODELS)} models | providers pinned (no auto)")
     yield
-app = FastAPI(title="Universal AI Gateway", version="6.0.0", lifespan=lifespan)
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"], allow_credentials=True,
-    allow_methods=["*"], allow_headers=["*"],
 )
 class Message(BaseModel):
     role: str
     content: str
 class ChatRequest(BaseModel):
     model: str
     messages: List[Message]
     stream: bool = False
     temperature: Optional[float] = 0.7
-    max_tokens:  Optional[int]   = 4096
 def _auth(req: Request):
     auth = req.headers.get("Authorization", "")
     if not auth:
         return
     if not auth.startswith("Bearer "):
-        raise HTTPException(401, "Invalid Authorization format")
     if auth.removeprefix("Bearer ").strip() != API_KEY:
-        raise HTTPException(403, "Invalid API key")
 @app.get("/")
 async def root():
     return {
         "service": "Universal AI Gateway",
         "version": "6.0.0",
-        "models":  len(ALL_MODELS),
-        "docs":    "/docs",
     }
 @app.get("/v1/models")
 async def get_models(req: Request):
     _auth(req)
     now = int(time.time())
     return {
         "object": "list",
         "data": [
             {
-                "id": m, "object": "model", "created": now, "owned_by": "g4f",
-                "providers": [getattr(p, "__name__", str(p)) for p in ROUTING.get(m, [])],
             }
             for m in ALL_MODELS
         ],
     }
 @app.post("/v1/chat/completions")
-async def chat_completions(req: Request, body: ChatRequest):
     _auth(req)
-    messages = [{"role": m.role, "content": m.content} for m in body.messages]
-    log.info(f"Request model={body.model} stream={body.stream}")
     if body.stream:
         return StreamingResponse(
             sse_generator(body.model, messages),
             media_type="text/event-stream",
-            headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"},
         )
     try:
-        response = await smart_completion(body.model, messages, stream=False)
         try:
-            content = response.choices[0].message.content
         except Exception:
             content = str(response)
-        return JSONResponse({
-            "id": f"chatcmpl-{uuid.uuid4().hex}",
-            "object": "chat.completion",
-            "created": int(time.time()),
-            "model": body.model,
-            "choices": [{"index": 0, "message": {"role": "assistant", "content": content}, "finish_reason": "stop"}],
-            "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
-        })
     except Exception as exc:
         log.error(f"Error: {exc}")
-        raise HTTPException(500, str(exc))
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")

 """
 ╔══════════════════════════════════════════════════════════════════╗
+║         Universal AI Gateway  –  Production v6.0               ║
+║                                                                ║
+║  • كل نموذج مربوط بـ provider يعمل فعلاً (لا auto عشوائي)     ║
+║  • عائلة Cohere كاملة عبر HuggingSpace (بدون auth)            ║
+║  • Kimi عبر Jmuz                                               ║
+║  • GPT-4 عبر Jmuz / Liaobots / PollinationsAI                 ║
+║  • Streaming حقيقي بدون MissingAuthError                      ║
+║  • Fallback chain مرتب لكل نموذج                              ║
 ╚══════════════════════════════════════════════════════════════════╝
 """
 import logging
 import time
 import uuid
 from contextlib import asynccontextmanager
 from typing import Any, Dict, List, Optional
 import g4f
 from fastapi import FastAPI, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
 from g4f import Provider
 from g4f.client import Client
 from pydantic import BaseModel
 # ──────────────────────────────────────────────────────────────────
 # LOGGING
 # ──────────────────────────────────────────────────────────────────
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s | %(levelname)-7s | %(message)s",
     datefmt="%H:%M:%S",
 )
 log = logging.getLogger("gw")
 # ──────────────────────────────────────────────────────────────────
 # CONFIG
 # ──────────────────────────────────────────────────────────────────
+API_KEY = "sk-your-secret-key"
+CALL_TIMEOUT = 60
+STREAM_TIMEOUT = 90
 # ══════════════════════════════════════════════════════════════════
+# MODEL → PROVIDER ROUTING TABLE
 # ══════════════════════════════════════════════════════════════════
 ROUTING: Dict[str, List[Any]] = {
+    # ── Cohere ────────────────────────────────────────────────
     "command-r":            [Provider.HuggingSpace, Provider.Jmuz],
     "command-r-plus":       [Provider.HuggingSpace, Provider.Jmuz],
     "command-r7b":          [Provider.HuggingSpace],
     "c4ai-aya-expanse-8b":  [Provider.HuggingSpace],
     "c4ai-aya-expanse-32b": [Provider.HuggingSpace],
+    # ── Kimi / Moonshot ──────────────────────────────────────
     "kimi":             [Provider.Jmuz],
     "moonshot-v1-8k":   [Provider.Jmuz],
     "moonshot-v1-32k":  [Provider.Jmuz],
     "moonshot-v1-128k": [Provider.Jmuz],
+    # ── GPT ──────────────────────────────────────────────────
     "gpt-4":         [Provider.Jmuz, Provider.Liaobots, Provider.PollinationsAI],
     "gpt-4-turbo":   [Provider.Jmuz, Provider.Liaobots, Provider.PollinationsAI],
     "gpt-4o":        [Provider.PollinationsAI, Provider.Jmuz, Provider.Liaobots],
     "gpt-4o-mini":   [Provider.PollinationsAI, Provider.DDG, Provider.Jmuz],
     "gpt-3.5-turbo": [Provider.DDG, Provider.Jmuz, Provider.PollinationsAI],
+    # ── DeepSeek ─────────────────────────────────────────────
     "deepseek-chat": [Provider.PollinationsAI, Provider.Jmuz],
     "deepseek-r1":   [Provider.PollinationsAI, Provider.Jmuz, Provider.DDG],
     "deepseek-v3":   [Provider.PollinationsAI, Provider.Jmuz],
+    # ── Llama ────────────────────────────────────────────────
+    "llama-3.1-8b":     [Provider.PollinationsAI, Provider.Jmuz, Provider.DDG],
+    "llama-3.1-70b":    [Provider.PollinationsAI, Provider.Jmuz],
+    "llama-3.3-70b":    [Provider.PollinationsAI, Provider.Jmuz],
+    "llama-3.2-11b":    [Provider.PollinationsAI],
+    "llama-4-scout":    [Provider.PollinationsAI],
+    "llama-4-maverick": [Provider.PollinationsAI],
+    # ── Mistral ──────────────────────────────────────────────
     "mistral-7b":    [Provider.PollinationsAI, Provider.Jmuz],
     "mixtral-8x7b":  [Provider.PollinationsAI, Provider.Jmuz],
     "mistral-small": [Provider.PollinationsAI],
     "mistral-large": [Provider.PollinationsAI, Provider.Jmuz],
+    # ── Gemini ───────────────────────────────────────────────
     "gemini-2.0-flash": [Provider.PollinationsAI, Provider.Jmuz],
     "gemini-1.5-flash": [Provider.PollinationsAI, Provider.Jmuz],
     "gemini-1.5-pro":   [Provider.PollinationsAI, Provider.Jmuz],
     "gemini-pro":       [Provider.PollinationsAI, Provider.Jmuz],
+    # ── Qwen ─────────────────────────────────────────────────
     "qwen-2.5-72b":       [Provider.PollinationsAI, Provider.Jmuz],
     "qwen-2.5-coder-32b": [Provider.PollinationsAI],
     "qwq-32b":            [Provider.PollinationsAI, Provider.Jmuz],
+    # ── Claude ───────────────────────────────────────────────
+    "claude-3-haiku":    [Provider.Jmuz, Provider.Liaobots],
+    "claude-3-sonnet":   [Provider.Jmuz, Provider.Liaobots],
+    "claude-3-opus":     [Provider.Jmuz, Provider.Liaobots],
+    "claude-3.5-sonnet": [Provider.Jmuz, Provider.Liaobots],
+    # ─��� Other ────────────────────────────────────────────────
     "phi-4":     [Provider.PollinationsAI],
     "sonar-pro": [Provider.PollinationsAI, Provider.Jmuz],
     "sonar":     [Provider.PollinationsAI, Provider.Jmuz],
 ALL_MODELS: List[str] = sorted(ROUTING.keys())
 # ══════════════════════════════════════════════════════════════════
+# CORE CALL
 # ══════════════════════════════════════════════════════════════════
 def _call(provider_cls: Any, model: str, messages: list, stream: bool) -> Any:
     client = Client(provider=provider_cls)
     return client.chat.completions.create(
         model=model,
         messages=messages,
         stream=stream,
     )
 # ══════════════════════════════════════════════════════════════════
+# SMART COMPLETION
 # ══════════════════════════════════════════════════════════════════
+async def smart_completion(
+    model: str,
+    messages: list,
+    stream: bool = False
+) -> Any:
     chain = ROUTING.get(model)
     if not chain:
+        log.warning(
+            f"Unknown model '{model}' – using generic fallback chain"
+        )
+        chain = [
+            Provider.PollinationsAI,
+            Provider.Jmuz,
+            Provider.DDG,
+        ]
     timeout = STREAM_TIMEOUT if stream else CALL_TIMEOUT
     errors: List[str] = []
     for provider_cls in chain:
+        pname = getattr(
+            provider_cls,
+            "__name__",
+            str(provider_cls)
+        )
         try:
+            log.info(
+                f"  -> {pname} | model={model} | stream={stream}"
+            )
             resp = await asyncio.wait_for(
+                asyncio.to_thread(
+                    _call,
+                    provider_cls,
+                    model,
+                    messages,
+                    stream,
+                ),
                 timeout=timeout,
             )
             log.info(f"  OK {pname}")
             return resp
         except asyncio.TimeoutError:
             msg = f"{pname}: timeout after {timeout}s"
         except Exception as exc:
             msg = f"{pname}: {type(exc).__name__}: {exc}"
         log.warning(f"  FAIL {msg}")
+        errors.append(msg)
+    raise RuntimeError(
+        f"All providers failed for '{model}':\n"
+        + "\n".join(errors)
+    )
 # ══════════════════════════════════════════════════════════════════
+# STREAMING GENERATOR
 # ══════════════════════════════════════════════════════════════════
 async def sse_generator(model: str, messages: list):
+    cid = f"chatcmpl-{uuid.uuid4().hex}"
     created = int(time.time())
+    sent = False
     try:
+        response = await smart_completion(
+            model,
+            messages,
+            stream=True,
+        )
         for chunk in response:
             try:
+                content = (
+                    chunk.choices[0].delta.content or ""
+                )
             except Exception:
                 content = ""
             if not content:
                 continue
             sent = True
+            yield (
+                "data: "
+                + json.dumps(
+                    {
+                        "id": cid,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {
+                                    "content": content
+                                },
+                                "finish_reason": None,
+                            }
+                        ],
+                    },
+                    ensure_ascii=False,
+                )
+                + "\n\n"
+            )
             await asyncio.sleep(0)
         if not sent:
+            yield (
+                "data: "
+                + json.dumps(
+                    {
+                        "error": {
+                            "message": "Provider returned empty stream",
+                            "type": "empty_stream",
+                        }
+                    }
+                )
+                + "\n\n"
+            )
         # stop chunk
+        yield (
+            "data: "
+            + json.dumps(
+                {
+                    "id": cid,
+                    "object": "chat.completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {},
+                            "finish_reason": "stop",
+                        }
+                    ],
+                }
+            )
+            + "\n\n"
+        )
         yield "data: [DONE]\n\n"
     except Exception as exc:
         log.error(f"Stream error: {exc}")
+        yield (
+            "data: "
+            + json.dumps(
+                {
+                    "error": {
+                        "message": str(exc),
+                        "type": "server_error",
+                    }
+                }
+            )
+            + "\n\n"
+        )
+        yield "data: [DONE]\n\n"
 # ══════════════════════════════════════════════════════════════════
+# FASTAPI
 # ══════════════════════════════════════════════════════════════════
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    log.info(
+        f"Gateway ready — {len(ALL_MODELS)} models | providers pinned (no auto)"
+    )
     yield
+app = FastAPI(
+    title="Universal AI Gateway",
+    version="6.0.0",
+    lifespan=lifespan,
+)
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
+# ══════════════════════════════════════════════════════════════════
+# Pydantic Models
+# ══════════════════════════════════════════════════════════════════
 class Message(BaseModel):
     role: str
     content: str
 class ChatRequest(BaseModel):
     model: str
     messages: List[Message]
     stream: bool = False
     temperature: Optional[float] = 0.7
+    max_tokens: Optional[int] = 4096
+# ══════════════════════════════════════════════════════════════════
+# AUTH
+# ════════════════════════���═════════════════════════════════════════
 def _auth(req: Request):
     auth = req.headers.get("Authorization", "")
     if not auth:
         return
     if not auth.startswith("Bearer "):
+        raise HTTPException(
+            401,
+            "Invalid Authorization format"
+        )
     if auth.removeprefix("Bearer ").strip() != API_KEY:
+        raise HTTPException(
+            403,
+            "Invalid API key"
+        )
+# ══════════════════════════════════════════════════════════════════
+# ROOT
+# ══════════════════════════════════════════════════════════════════
 @app.get("/")
 async def root():
     return {
         "service": "Universal AI Gateway",
         "version": "6.0.0",
+        "models": len(ALL_MODELS),
+        "docs": "/docs",
     }
+# ══════════════════════════════════════════════════════════════════
+# MODELS ENDPOINT
+# ══════════════════════════════════════════════════════════════════
 @app.get("/v1/models")
 async def get_models(req: Request):
     _auth(req)
     now = int(time.time())
     return {
         "object": "list",
         "data": [
             {
+                "id": m,
+                "object": "model",
+                "created": now,
+                "owned_by": "g4f",
+                "providers": [
+                    getattr(p, "__name__", str(p))
+                    for p in ROUTING.get(m, [])
+                ],
             }
             for m in ALL_MODELS
         ],
     }
+# ══════════════════════════════════════════════════════════════════
+# CHAT COMPLETIONS
+# ══════════════════════════════════════════════════════════════════
 @app.post("/v1/chat/completions")
+async def chat_completions(
+    req: Request,
+    body: ChatRequest
+):
     _auth(req)
+    messages = [
+        {
+            "role": m.role,
+            "content": m.content,
+        }
+        for m in body.messages
+    ]
+    log.info(
+        f"Request model={body.model} stream={body.stream}"
+    )
+    # STREAMING
     if body.stream:
         return StreamingResponse(
             sse_generator(body.model, messages),
             media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "X-Accel-Buffering": "no",
+            },
         )
+    # NORMAL RESPONSE
     try:
+        response = await smart_completion(
+            body.model,
+            messages,
+            stream=False,
+        )
         try:
+            content = (
+                response.choices[0].message.content
+            )
         except Exception:
             content = str(response)
+        return JSONResponse(
+            {
+                "id": f"chatcmpl-{uuid.uuid4().hex}",
+                "object": "chat.completion",
+                "created": int(time.time()),
+                "model": body.model,
+                "choices": [
+                    {
+                        "index": 0,
+                        "message": {
+                            "role": "assistant",
+                            "content": content,
+                        },
+                        "finish_reason": "stop",
+                    }
+                ],
+                "usage": {
+                    "prompt_tokens": 0,
+                    "completion_tokens": 0,
+                    "total_tokens": 0,
+                },
+            }
+        )
     except Exception as exc:
         log.error(f"Error: {exc}")
+        raise HTTPException(
+            500,
+            str(exc)
+        )
+# ══════════════════════════════════════════════════════════════════
+# RUN
+# ══════════════════════════════════════════════════════════════════
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=7860,
+        log_level="info",
+    )