Spaces:

bahi-bh
/

Duck

Running

App Files Files Community

bahi-bh commited on 11 days ago

Commit

d589d39

verified ·

1 Parent(s): 88ea102

Update app.py

Browse files

Files changed (1) hide show

app.py +273 -393

app.py CHANGED Viewed

@@ -1,70 +1,48 @@
-"""
-╔══════════════════════════════════════════════════════════════════╗
-║         Universal AI Gateway  –  Production v6.0               ║
-║                                                                ║
-║  • كل نموذج مربوط بـ provider يعمل فعلاً (لا auto عشوائي)     ║
-║  • عائلة Cohere كاملة عبر HuggingSpace (بدون auth)            ║
-║  • Kimi عبر Jmuz                                               ║
-║  • GPT-4 عبر Jmuz / Liaobots / PollinationsAI                 ║
-║  • Streaming حقيقي بدون MissingAuthError                      ║
-║  • Fallback chain مرتب لكل نموذج                              ║
-╚══════════════════════════════════════════════════════════════════╝
-"""
-from __future__ import annotations
 import asyncio
 import json
-import logging
 import time
 import uuid
-from contextlib import asynccontextmanager
-from typing import Any, Dict, List, Optional
 import g4f
-from fastapi import FastAPI, HTTPException, Request
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse, StreamingResponse
-from g4f import Provider
 from g4f.client import Client
-from pydantic import BaseModel
-# ──────────────────────────────────────────────────────────────────
 # LOGGING
-# ──────────────────────────────────────────────────────────────────
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s | %(levelname)-7s | %(message)s",
-    datefmt="%H:%M:%S",
-)
-log = logging.getLogger("gw")
-# ──────────────────────────────────────────────────────────────────
 # CONFIG
-# ──────────────────────────────────────────────────────────────────
 API_KEY = "sk-your-secret-key"
-CALL_TIMEOUT = 60
-STREAM_TIMEOUT = 90
-# ══════════════════════════════════════════════════════════════════
-# MODEL → PROVIDER ROUTING TABLE
-# ══════════════════════════════════════════════════════════════════
-ROUTING: Dict[str, List[Any]] = {
-    # ── Cohere ────────────────────────────────────────────────
     "command-r":            [Provider.HuggingSpace, Provider.Jmuz],
     "command-r-plus":       [Provider.HuggingSpace, Provider.Jmuz],
     "command-r7b":          [Provider.HuggingSpace],
-    "command-r7b-arabic":   [Provider.HuggingSpace],
     "command-a":            [Provider.HuggingSpace],
     "command":              [Provider.HuggingSpace, Provider.Jmuz],
     "command-light":        [Provider.HuggingSpace, Provider.Jmuz],
@@ -72,476 +50,378 @@ ROUTING: Dict[str, List[Any]] = {
     "c4ai-aya-expanse-8b":  [Provider.HuggingSpace],
     "c4ai-aya-expanse-32b": [Provider.HuggingSpace],
-    # ── Kimi / Moonshot ──────────────────────────────────────
     "kimi":             [Provider.Jmuz],
     "moonshot-v1-8k":   [Provider.Jmuz],
     "moonshot-v1-32k":  [Provider.Jmuz],
-    "moonshot-v1-128k": [Provider.Jmuz],
-    # ── GPT ──────────────────────────────────────────────────
     "gpt-4":         [Provider.Jmuz, Provider.Liaobots, Provider.PollinationsAI],
     "gpt-4-turbo":   [Provider.Jmuz, Provider.Liaobots, Provider.PollinationsAI],
     "gpt-4o":        [Provider.PollinationsAI, Provider.Jmuz, Provider.Liaobots],
     "gpt-4o-mini":   [Provider.PollinationsAI, Provider.DDG, Provider.Jmuz],
     "gpt-3.5-turbo": [Provider.DDG, Provider.Jmuz, Provider.PollinationsAI],
-    # ── DeepSeek ─────────────────────────────────────────────
     "deepseek-chat": [Provider.PollinationsAI, Provider.Jmuz],
-    "deepseek-r1":   [Provider.PollinationsAI, Provider.Jmuz, Provider.DDG],
     "deepseek-v3":   [Provider.PollinationsAI, Provider.Jmuz],
-    # ── Llama ────────────────────────────────────────────────
-    "llama-3.1-8b":     [Provider.PollinationsAI, Provider.Jmuz, Provider.DDG],
     "llama-3.1-70b":    [Provider.PollinationsAI, Provider.Jmuz],
     "llama-3.3-70b":    [Provider.PollinationsAI, Provider.Jmuz],
-    "llama-3.2-11b":    [Provider.PollinationsAI],
     "llama-4-scout":    [Provider.PollinationsAI],
     "llama-4-maverick": [Provider.PollinationsAI],
-    # ── Mistral ──────────────────────────────────────────────
-    "mistral-7b":    [Provider.PollinationsAI, Provider.Jmuz],
-    "mixtral-8x7b":  [Provider.PollinationsAI, Provider.Jmuz],
-    "mistral-small": [Provider.PollinationsAI],
-    "mistral-large": [Provider.PollinationsAI, Provider.Jmuz],
-    # ── Gemini ───────────────────────────────────────────────
     "gemini-2.0-flash": [Provider.PollinationsAI, Provider.Jmuz],
-    "gemini-1.5-flash": [Provider.PollinationsAI, Provider.Jmuz],
-    "gemini-1.5-pro":   [Provider.PollinationsAI, Provider.Jmuz],
-    "gemini-pro":       [Provider.PollinationsAI, Provider.Jmuz],
-    # ── Qwen ─────────────────────────────────────────────────
     "qwen-2.5-72b":       [Provider.PollinationsAI, Provider.Jmuz],
     "qwen-2.5-coder-32b": [Provider.PollinationsAI],
     "qwq-32b":            [Provider.PollinationsAI, Provider.Jmuz],
-    # ── Claude ───────────────────────────────────────────────
     "claude-3-haiku":    [Provider.Jmuz, Provider.Liaobots],
     "claude-3-sonnet":   [Provider.Jmuz, Provider.Liaobots],
     "claude-3-opus":     [Provider.Jmuz, Provider.Liaobots],
     "claude-3.5-sonnet": [Provider.Jmuz, Provider.Liaobots],
-    # ── Other ────────────────────────────────────────────────
     "phi-4":     [Provider.PollinationsAI],
     "sonar-pro": [Provider.PollinationsAI, Provider.Jmuz],
     "sonar":     [Provider.PollinationsAI, Provider.Jmuz],
 }
-ALL_MODELS: List[str] = sorted(ROUTING.keys())
-# ══════════════════════════════════════════════════════════════════
-# CORE CALL
-# ══════════════════════════════════════════════════════════════════
-def _call(provider_cls: Any, model: str, messages: list, stream: bool) -> Any:
-    client = Client(provider=provider_cls)
-    return client.chat.completions.create(
-        model=model,
-        messages=messages,
-        stream=stream,
-    )
-# ══════════════════════════════════════════════════════════════════
-# SMART COMPLETION
-# ══════════════════════════════════════════════════════════════════
-async def smart_completion(
-    model: str,
-    messages: list,
     stream: bool = False
-) -> Any:
-    chain = ROUTING.get(model)
-    if not chain:
-        log.warning(
-            f"Unknown model '{model}' – using generic fallback chain"
-        )
-        chain = [
-            Provider.PollinationsAI,
-            Provider.Jmuz,
-            Provider.DDG,
-        ]
-    timeout = STREAM_TIMEOUT if stream else CALL_TIMEOUT
-    errors: List[str] = []
-    for provider_cls in chain:
-        pname = getattr(
-            provider_cls,
-            "__name__",
-            str(provider_cls)
-        )
         try:
-            log.info(
-                f"  -> {pname} | model={model} | stream={stream}"
-            )
-            resp = await asyncio.wait_for(
                 asyncio.to_thread(
-                    _call,
-                    provider_cls,
-                    model,
-                    messages,
-                    stream,
                 ),
-                timeout=timeout,
             )
-            log.info(f"  OK {pname}")
-            return resp
         except asyncio.TimeoutError:
-            msg = f"{pname}: timeout after {timeout}s"
-        except Exception as exc:
-            msg = f"{pname}: {type(exc).__name__}: {exc}"
-        log.warning(f"  FAIL {msg}")
-        errors.append(msg)
-    raise RuntimeError(
-        f"All providers failed for '{model}':\n"
-        + "\n".join(errors)
-    )
-# ══════════════════════════════════════════════════════════════════
-# STREAMING GENERATOR
-# ══════════════════════════════════════════════════════════════════
-async def sse_generator(model: str, messages: list):
-    cid = f"chatcmpl-{uuid.uuid4().hex}"
-    created = int(time.time())
-    sent = False
-    try:
-        response = await smart_completion(
-            model,
-            messages,
-            stream=True,
-        )
-        for chunk in response:
             try:
-                content = (
-                    chunk.choices[0].delta.content or ""
                 )
-            except Exception:
-                content = ""
-            if not content:
-                continue
-            sent = True
-            yield (
-                "data: "
-                + json.dumps(
-                    {
-                        "id": cid,
-                        "object": "chat.completion.chunk",
-                        "created": created,
-                        "model": model,
-                        "choices": [
-                            {
-                                "index": 0,
-                                "delta": {
-                                    "content": content
-                                },
-                                "finish_reason": None,
                             }
-                        ],
-                    },
-                    ensure_ascii=False,
-                )
-                + "\n\n"
-            )
-            await asyncio.sleep(0)
-        if not sent:
-            yield (
-                "data: "
-                + json.dumps(
-                    {
                         "error": {
                             "message": "Provider returned empty stream",
-                            "type": "empty_stream",
                         }
                     }
-                )
-                + "\n\n"
-            )
-        # stop chunk
-        yield (
-            "data: "
-            + json.dumps(
-                {
-                    "id": cid,
                     "object": "chat.completion.chunk",
-                    "created": created,
-                    "model": model,
                     "choices": [
                         {
                             "index": 0,
                             "delta": {},
-                            "finish_reason": "stop",
                         }
-                    ],
                 }
-            )
-            + "\n\n"
-        )
-        yield "data: [DONE]\n\n"
-    except Exception as exc:
-        log.error(f"Stream error: {exc}")
-        yield (
-            "data: "
-            + json.dumps(
-                {
                     "error": {
-                        "message": str(exc),
-                        "type": "server_error",
                     }
                 }
-            )
-            + "\n\n"
-        )
-        yield "data: [DONE]\n\n"
-# ══════════════════════════════════════════════════════════════════
-# FASTAPI
-# ══════════════════════════════════════════════════════════════════
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    log.info(
-        f"Gateway ready — {len(ALL_MODELS)} models | providers pinned (no auto)"
-    )
-    yield
-app = FastAPI(
-    title="Universal AI Gateway",
-    version="6.0.0",
-    lifespan=lifespan,
-)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# ══════════════════════════════════════════════════════════════════
-# Pydantic Models
-# ══════════════════════════════════════════════════════════════════
-class Message(BaseModel):
-    role: str
-    content: str
-class ChatRequest(BaseModel):
-    model: str
-    messages: List[Message]
-    stream: bool = False
-    temperature: Optional[float] = 0.7
-    max_tokens: Optional[int] = 4096
-# ══════════════════════════════════════════════════════════════════
-# AUTH
-# ══════════════════════════════════════════════════════════════════
-def _auth(req: Request):
-    auth = req.headers.get("Authorization", "")
-    if not auth:
-        return
-    if not auth.startswith("Bearer "):
-        raise HTTPException(
-            401,
-            "Invalid Authorization format"
-        )
-    if auth.removeprefix("Bearer ").strip() != API_KEY:
-        raise HTTPException(
-            403,
-            "Invalid API key"
-        )
-# ══════════════════════════════════════════════════════════════════
-# ROOT
-# ══════════════════════════════════════════════════════════════════
-@app.get("/")
-async def root():
-    return {
-        "service": "Universal AI Gateway",
-        "version": "6.0.0",
-        "models": len(ALL_MODELS),
-        "docs": "/docs",
-    }
-# ══════════════════════════════════════════════════════════════════
-# MODELS ENDPOINT
-# ══════════════════════════════════════════════════════════════════
-@app.get("/v1/models")
-async def get_models(req: Request):
-    _auth(req)
-    now = int(time.time())
-    return {
-        "object": "list",
-        "data": [
-            {
-                "id": m,
-                "object": "model",
-                "created": now,
-                "owned_by": "g4f",
-                "providers": [
-                    getattr(p, "__name__", str(p))
-                    for p in ROUTING.get(m, [])
-                ],
-            }
-            for m in ALL_MODELS
-        ],
-    }
-# ══════════════════════════════════════════════════════════════════
-# CHAT COMPLETIONS
-# ══════════════════════════════════════════════════════════════════
-@app.post("/v1/chat/completions")
-async def chat_completions(
-    req: Request,
-    body: ChatRequest
-):
-    _auth(req)
-    messages = [
-        {
-            "role": m.role,
-            "content": m.content,
-        }
-        for m in body.messages
-    ]
-    log.info(
-        f"Request model={body.model} stream={body.stream}"
-    )
-    # STREAMING
-    if body.stream:
         return StreamingResponse(
-            sse_generator(body.model, messages),
             media_type="text/event-stream",
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",
-            },
         )
     # NORMAL RESPONSE
     try:
-        response = await smart_completion(
-            body.model,
-            messages,
-            stream=False,
         )
-        try:
-            content = (
-                response.choices[0].message.content
-            )
         except Exception:
-            content = str(response)
-        return JSONResponse(
-            {
-                "id": f"chatcmpl-{uuid.uuid4().hex}",
-                "object": "chat.completion",
-                "created": int(time.time()),
-                "model": body.model,
-                "choices": [
-                    {
-                        "index": 0,
-                        "message": {
-                            "role": "assistant",
-                            "content": content,
-                        },
-                        "finish_reason": "stop",
-                    }
-                ],
-                "usage": {
-                    "prompt_tokens": 0,
-                    "completion_tokens": 0,
-                    "total_tokens": 0,
-                },
             }
-        )
-    except Exception as exc:
-        log.error(f"Error: {exc}")
         raise HTTPException(
-            500,
-            str(exc)
         )
-# ══════════════════════════════════════════════════════════════════
 # RUN
-# ══════════════════════════════════════════════════════════════════
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(
-        app,
-        host="0.0.0.0",
-        port=7860,
-        log_level="info",
-    )

+from fastapi import FastAPI, Request, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel
+from typing import List, Optional
 import asyncio
 import json
 import time
 import uuid
+import logging
 import g4f
 from g4f.client import Client
+from g4f import Provider
+# =====================================================
 # LOGGING
+# =====================================================
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# =====================================================
 # CONFIG
+# =====================================================
 API_KEY = "sk-your-secret-key"
+REQUEST_TIMEOUT = 45
+MAX_RETRIES = 2
+# =====================================================
+# MODEL → PROVIDER MAP
+# الإصلاح الوحيد: كل نموذج مربوط بـ provider يعمل
+# بدون API key بدل auto العشوائي
+# =====================================================
+MODEL_PROVIDERS = {
+    # Cohere - تعمل عبر HuggingSpace
     "command-r":            [Provider.HuggingSpace, Provider.Jmuz],
     "command-r-plus":       [Provider.HuggingSpace, Provider.Jmuz],
     "command-r7b":          [Provider.HuggingSpace],
     "command-a":            [Provider.HuggingSpace],
     "command":              [Provider.HuggingSpace, Provider.Jmuz],
     "command-light":        [Provider.HuggingSpace, Provider.Jmuz],
     "c4ai-aya-expanse-8b":  [Provider.HuggingSpace],
     "c4ai-aya-expanse-32b": [Provider.HuggingSpace],
+    # Kimi
     "kimi":             [Provider.Jmuz],
     "moonshot-v1-8k":   [Provider.Jmuz],
     "moonshot-v1-32k":  [Provider.Jmuz],
+    # GPT-4
     "gpt-4":         [Provider.Jmuz, Provider.Liaobots, Provider.PollinationsAI],
     "gpt-4-turbo":   [Provider.Jmuz, Provider.Liaobots, Provider.PollinationsAI],
     "gpt-4o":        [Provider.PollinationsAI, Provider.Jmuz, Provider.Liaobots],
     "gpt-4o-mini":   [Provider.PollinationsAI, Provider.DDG, Provider.Jmuz],
     "gpt-3.5-turbo": [Provider.DDG, Provider.Jmuz, Provider.PollinationsAI],
+    # DeepSeek
     "deepseek-chat": [Provider.PollinationsAI, Provider.Jmuz],
+    "deepseek-r1":   [Provider.PollinationsAI, Provider.Jmuz],
     "deepseek-v3":   [Provider.PollinationsAI, Provider.Jmuz],
+    # Llama
+    "llama-3.1-8b":     [Provider.PollinationsAI, Provider.DDG],
     "llama-3.1-70b":    [Provider.PollinationsAI, Provider.Jmuz],
     "llama-3.3-70b":    [Provider.PollinationsAI, Provider.Jmuz],
     "llama-4-scout":    [Provider.PollinationsAI],
     "llama-4-maverick": [Provider.PollinationsAI],
+    # Mistral
+    "mistral-7b":   [Provider.PollinationsAI, Provider.Jmuz],
+    "mixtral-8x7b": [Provider.PollinationsAI, Provider.Jmuz],
+    "mistral-large":[Provider.PollinationsAI, Provider.Jmuz],
+    # Gemini
     "gemini-2.0-flash": [Provider.PollinationsAI, Provider.Jmuz],
+    "gemini-1.5-flash":  [Provider.PollinationsAI, Provider.Jmuz],
+    "gemini-1.5-pro":    [Provider.PollinationsAI, Provider.Jmuz],
+    "gemini-pro":        [Provider.PollinationsAI, Provider.Jmuz],
+    # Qwen
     "qwen-2.5-72b":       [Provider.PollinationsAI, Provider.Jmuz],
     "qwen-2.5-coder-32b": [Provider.PollinationsAI],
     "qwq-32b":            [Provider.PollinationsAI, Provider.Jmuz],
+    # Claude (عبر Jmuz proxy)
     "claude-3-haiku":    [Provider.Jmuz, Provider.Liaobots],
     "claude-3-sonnet":   [Provider.Jmuz, Provider.Liaobots],
     "claude-3-opus":     [Provider.Jmuz, Provider.Liaobots],
     "claude-3.5-sonnet": [Provider.Jmuz, Provider.Liaobots],
+    # Other
     "phi-4":     [Provider.PollinationsAI],
     "sonar-pro": [Provider.PollinationsAI, Provider.Jmuz],
     "sonar":     [Provider.PollinationsAI, Provider.Jmuz],
 }
+# =====================================================
+# FASTAPI
+# =====================================================
+app = FastAPI(
+    title="Universal AI Gateway",
+    version="4.2.0"
+)
+# =====================================================
+# CORS
+# =====================================================
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# =====================================================
+# MODELS
+# =====================================================
+class Message(BaseModel):
+    role: str
+    content: str
+class ChatRequest(BaseModel):
+    model: str
+    messages: List[Message]
     stream: bool = False
+    temperature: Optional[float] = 0.7
+    max_tokens: Optional[int] = 4096
+# =====================================================
+# AUTH
+# =====================================================
+def verify_api_key(req: Request):
+    auth = req.headers.get("Authorization")
+    if not auth:
+        return True
+    if not auth.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail="Invalid Authorization Format")
+    token = auth.replace("Bearer ", "").strip()
+    if token != API_KEY:
+        raise HTTPException(status_code=403, detail="Invalid API Key")
+    return True
+# =====================================================
+# ROOT
+# =====================================================
+@app.get("/")
+async def root():
+    return {
+        "status": "online",
+        "service": "Universal AI Gateway",
+        "version": "4.2.0"
+    }
+# =====================================================
+# MODELS
+# =====================================================
+@app.get("/v1/models")
+async def get_models():
+    models_data = []
+    now = int(time.time())
+    for model_name in MODEL_PROVIDERS.keys():
+        models_data.append({
+            "id": model_name,
+            "object": "model",
+            "created": now,
+            "owned_by": "g4f"
+        })
+    return {
+        "object": "list",
+        "data": models_data
+    }
+# =====================================================
+# SAFE COMPLETION
+# يجرب كل provider في القائمة واحداً تلو الآخر
+# =====================================================
+async def safe_completion(model, messages, stream=False):
+    # جلب قائمة الـ providers لهذا النموذج
+    providers = MODEL_PROVIDERS.get(model)
+    # نموذج غير موجود في الجدول → fallback عام
+    if not providers:
+        logger.warning(f"Model '{model}' not in table, using fallback providers")
+        providers = [Provider.PollinationsAI, Provider.Jmuz, Provider.DDG]
+    last_error = None
+    for provider_cls in providers:
+        pname = getattr(provider_cls, "__name__", str(provider_cls))
         try:
+            logger.info(f"Trying provider={pname} model={model}")
+            client = Client(provider=provider_cls)
+            response = await asyncio.wait_for(
                 asyncio.to_thread(
+                    client.chat.completions.create,
+                    model=model,
+                    messages=messages,
+                    stream=stream
                 ),
+                timeout=REQUEST_TIMEOUT
             )
+            logger.info(f"Success | provider={pname} model={model}")
+            return response
         except asyncio.TimeoutError:
+            last_error = f"{pname}: timeout"
+            logger.warning(f"Timeout | provider={pname}")
+        except Exception as e:
+            last_error = e
+            logger.warning(f"Failed | provider={pname} | {e}")
+    raise Exception(last_error)
+# =====================================================
+# CHAT COMPLETIONS
+# =====================================================
+@app.post("/v1/chat/completions")
+async def chat_completions(req: Request, body: ChatRequest):
+    verify_api_key(req)
+    messages = [
+        {"role": m.role, "content": m.content}
+        for m in body.messages
+    ]
+    logger.info(f"Request model={body.model} stream={body.stream}")
+    # =================================================
+    # STREAMING
+    # =================================================
+    if body.stream:
+        async def generate_stream():
             try:
+                response = await safe_completion(
+                    model=body.model,
+                    messages=messages,
+                    stream=True
                 )
+                chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
+                has_content = False
+                for chunk in response:
+                    try:
+                        content = ""
+                        if (
+                            hasattr(chunk, "choices")
+                            and chunk.choices
+                            and chunk.choices[0].delta
+                            and chunk.choices[0].delta.content
+                        ):
+                            content = chunk.choices[0].delta.content
+                        if content:
+                            has_content = True
+                            payload = {
+                                "id": chunk_id,
+                                "object": "chat.completion.chunk",
+                                "created": int(time.time()),
+                                "model": body.model,
+                                "choices": [
+                                    {
+                                        "index": 0,
+                                        "delta": {"content": content},
+                                        "finish_reason": None
+                                    }
+                                ]
                             }
+                            yield (
+                                f"data: "
+                                f"{json.dumps(payload, ensure_ascii=False)}\n\n"
+                            )
+                            await asyncio.sleep(0)
+                    except Exception as chunk_error:
+                        logger.error(f"Chunk error: {chunk_error}")
+                if not has_content:
+                    error_payload = {
                         "error": {
                             "message": "Provider returned empty stream",
+                            "type": "empty_stream"
                         }
                     }
+                    yield f"data: {json.dumps(error_payload)}\n\n"
+                final_payload = {
+                    "id": chunk_id,
                     "object": "chat.completion.chunk",
+                    "created": int(time.time()),
+                    "model": body.model,
                     "choices": [
                         {
                             "index": 0,
                             "delta": {},
+                            "finish_reason": "stop"
                         }
+                    ]
                 }
+                yield f"data: {json.dumps(final_payload)}\n\n"
+                yield "data: [DONE]\n\n"
+            except Exception as e:
+                logger.error(f"Streaming error: {e}")
+                error_payload = {
                     "error": {
+                        "message": str(e),
+                        "type": "server_error"
                     }
                 }
+                yield f"data: {json.dumps(error_payload)}\n\n"
         return StreamingResponse(
+            generate_stream(),
             media_type="text/event-stream",
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
+                "X-Accel-Buffering": "no"
+            }
         )
+    # =================================================
     # NORMAL RESPONSE
+    # =================================================
     try:
+        response = await safe_completion(
+            model=body.model,
+            messages=messages,
+            stream=False
         )
+        assistant_message = ""
+        try:
+            assistant_message = response.choices[0].message.content
         except Exception:
+            assistant_message = str(response)
+        return JSONResponse({
+            "id": f"chatcmpl-{uuid.uuid4().hex}",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": body.model,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": assistant_message
+                    },
+                    "finish_reason": "stop"
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0
             }
+        })
+    except Exception as e:
+        logger.error(f"Chat error: {e}")
         raise HTTPException(
+            status_code=500,
+            detail=str(e)
         )
+# =====================================================
 # RUN
+# =====================================================
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)