Spaces:

bahi-bh
/

Duck

Running

App Files Files Community

bahi-bh commited on 11 days ago

Commit

a0d3539

verified ·

1 Parent(s): 890b2f5

Update app.py

Browse files

Files changed (1) hide show

app.py +425 -353

app.py CHANGED Viewed

@@ -1,54 +1,267 @@
-from fastapi import FastAPI, Request, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel
-from typing import List, Optional
 import asyncio
 import json
 import time
 import uuid
 import logging
 import g4f
 from g4f.client import Client
-# =====================================================
 # LOGGING
-# =====================================================
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# =====================================================
 # CONFIG
-# =====================================================
-API_KEY = "sk-your-secret-key"
-# timeout لمنع التعليق الأبدي
-REQUEST_TIMEOUT = 45
-# retry خفيف
-MAX_RETRIES = 2
-# =====================================================
-# FASTAPI
-# =====================================================
 app = FastAPI(
     title="Universal AI Gateway",
-    version="4.2.0"
 )
-# =====================================================
-# CORS
-# =====================================================
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -58,10 +271,9 @@ app.add_middleware(
 )
-# =====================================================
 # MODELS
-# =====================================================
 class Message(BaseModel):
     role: str
     content: str
@@ -75,257 +287,188 @@ class ChatRequest(BaseModel):
     max_tokens: Optional[int] = 4096
-# =====================================================
 # AUTH
-# =====================================================
 def verify_api_key(req: Request):
-    auth = req.headers.get("Authorization")
-    # السماح للاختبار
     if not auth:
-        return True
     if not auth.startswith("Bearer "):
-        raise HTTPException(
-            status_code=401,
-            detail="Invalid Authorization Format"
-        )
-    token = auth.replace("Bearer ", "").strip()
     if token != API_KEY:
-        raise HTTPException(
-            status_code=403,
-            detail="Invalid API Key"
-        )
     return True
-# =====================================================
-# ROOT
-# =====================================================
-@app.get("/")
-async def root():
-    return {
-        "status": "online",
-        "service": "Universal AI Gateway",
-        "version": "4.2.0"
-    }
-# =====================================================
-# MODELS
-# =====================================================
-@app.get("/v1/models")
-async def get_models():
-    models_data = []
-    # =================================================
-    # MODELS THAT WORK WELL
-    # =================================================
-    fallback_models = [
-        # GPT
-        "gpt-4o-mini",
-        "gpt-4o",
-        "gpt-4",
-        "gpt-3.5-turbo",
-        # Claude
-        "claude-3-haiku",
-        # Llama
-        "llama-3.1-70b",
-        # Mixtral
-        "mixtral-8x7b",
-        # Deepseek
-        "deepseek-chat",
-        # Gemini
-        "gemini-pro",
-        # =================================================
-        # COHERE FAMILY
-        # =================================================
-        "command-r",
-        "command-r-plus",
-        "command-r7b",
-        "command",
-        "command-nightly",
-        # Additional Cohere-style names
-        "cohere-command-r",
-        "cohere-command-r-plus",
-    ]
-    added_models = set()
-    try:
-        if hasattr(g4f.models, "_all_models"):
-            all_models = list(g4f.models._all_models)
-            for model in all_models[:100]:
-                model_name = str(model)
-                if model_name not in added_models:
-                    models_data.append({
-                        "id": model_name,
-                        "object": "model",
-                        "created": int(time.time()),
-                        "owned_by": "g4f"
-                    })
-                    added_models.add(model_name)
-    except Exception as e:
-        logger.error(f"Models error: {e}")
-    # fallback models
-    for model in fallback_models:
-        if model not in added_models:
-            models_data.append({
-                "id": model,
-                "object": "model",
-                "created": int(time.time()),
-                "owned_by": "g4f"
-            })
-            added_models.add(model)
-    return {
-        "object": "list",
-        "data": models_data
-    }
-# =====================================================
-# SAFE COMPLETION
-# =====================================================
-async def safe_completion(
-    model,
-    messages,
-    stream=False
-):
-    last_error = None
-    for attempt in range(MAX_RETRIES):
         try:
             logger.info(
-                f"Attempt {attempt + 1} | model={model}"
             )
-            client = Client()
-            # timeout لمنع التعليق الأبدي
             response = await asyncio.wait_for(
-                asyncio.to_thread(
-                    client.chat.completions.create,
-                    model=model,
-                    messages=messages,
-                    stream=stream
-                ),
-                timeout=REQUEST_TIMEOUT
             )
-            logger.info(
-                f"Success | model={model}"
-            )
             return response
         except asyncio.TimeoutError:
-            last_error = "Request timeout"
-            logger.warning(
-                f"Timeout | model={model}"
-            )
-        except Exception as e:
-            last_error = e
-            logger.warning(
-                f"Attempt failed {attempt + 1} | {e}"
-            )
-        await asyncio.sleep(1)
-    raise Exception(last_error)
-# =====================================================
-# CHAT COMPLETIONS
-# =====================================================
-@app.post("/v1/chat/completions")
-async def chat_completions(
-    req: Request,
-    body: ChatRequest
-):
     verify_api_key(req)
-    messages = [
-        {
-            "role": m.role,
-            "content": m.content
-        }
-        for m in body.messages
-    ]
-    logger.info(
-        f"Request model={body.model} stream={body.stream}"
-    )
-    # =================================================
-    # STREAMING
-    # =================================================
-    if body.stream:
-        async def generate_stream():
             try:
-                response = await safe_completion(
-                    model=body.model,
-                    messages=messages,
-                    stream=True
                 )
-                chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
-                has_content = False
                 for chunk in response:
                     try:
                         content = ""
                         if (
                             hasattr(chunk, "choices")
                             and chunk.choices
@@ -333,168 +476,97 @@ async def chat_completions(
                             and chunk.choices[0].delta.content
                         ):
                             content = chunk.choices[0].delta.content
                         if content:
                             has_content = True
-                            payload = {
-                                "id": chunk_id,
-                                "object": "chat.completion.chunk",
-                                "created": int(time.time()),
-                                "model": body.model,
-                                "choices": [
-                                    {
-                                        "index": 0,
-                                        "delta": {
-                                            "content": content
-                                        },
-                                        "finish_reason": None
-                                    }
-                                ]
-                            }
                             yield (
-                                f"data: "
-                                f"{json.dumps(payload, ensure_ascii=False)}\n\n"
                             )
                             await asyncio.sleep(0)
-                    except Exception as chunk_error:
-                        logger.error(
-                            f"Chunk error: {chunk_error}"
-                        )
-                # provider فتح stream بدون محتوى
                 if not has_content:
-                    error_payload = {
-                        "error": {
-                            "message": "Provider returned empty stream",
-                            "type": "empty_stream"
-                        }
-                    }
                     yield (
-                        f"data: "
-                        f"{json.dumps(error_payload)}\n\n"
                     )
-                final_payload = {
-                    "id": chunk_id,
-                    "object": "chat.completion.chunk",
-                    "created": int(time.time()),
-                    "model": body.model,
-                    "choices": [
-                        {
-                            "index": 0,
-                            "delta": {},
-                            "finish_reason": "stop"
-                        }
-                    ]
-                }
                 yield (
-                    f"data: "
-                    f"{json.dumps(final_payload)}\n\n"
                 )
                 yield "data: [DONE]\n\n"
             except Exception as e:
                 logger.error(f"Streaming error: {e}")
-                error_payload = {
-                    "error": {
-                        "message": str(e),
-                        "type": "server_error"
-                    }
-                }
                 yield (
-                    f"data: "
-                    f"{json.dumps(error_payload)}\n\n"
                 )
         return StreamingResponse(
-            generate_stream(),
             media_type="text/event-stream",
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
-                "X-Accel-Buffering": "no"
-            }
         )
-    # =================================================
-    # NORMAL RESPONSE
-    # =================================================
     try:
-        response = await safe_completion(
-            model=body.model,
-            messages=messages,
-            stream=False
         )
-        assistant_message = ""
         try:
-            assistant_message = (
-                response.choices[0].message.content
-            )
         except Exception:
-            assistant_message = str(response)
         return JSONResponse({
             "id": f"chatcmpl-{uuid.uuid4().hex}",
             "object": "chat.completion",
             "created": int(time.time()),
             "model": body.model,
-            "choices": [
-                {
-                    "index": 0,
-                    "message": {
-                        "role": "assistant",
-                        "content": assistant_message
-                    },
-                    "finish_reason": "stop"
-                }
-            ],
-            "usage": {
-                "prompt_tokens": 0,
-                "completion_tokens": 0,
-                "total_tokens": 0
-            }
         })
     except Exception as e:
         logger.error(f"Chat error: {e}")
-        raise HTTPException(
-            status_code=500,
-            detail=str(e)
-        )
-# =====================================================
-# RUN
-# =====================================================
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(
-        app,
-        host="0.0.0.0",
-        port=7860
-    )

+# ╔══════════════════════════════════════════════════════════════════╗
+# ║          Universal AI Gateway  –  Advanced Edition v5.0         ║
+# ║  • Auto-discovers WORKING providers via live health checks       ║
+# ║  • Fetches working models list from g4f-working (daily updated)  ║
+# ║  • Lightweight proxy rotation to avoid rate-limit IP bans        ║
+# ║  • Smart fallback chain: best provider → pool → any              ║
+# ║  • /status endpoint with live provider/model stats               ║
+# ╚══════════════════════════════════════════════════════════════════╝
+from fastapi import FastAPI, Request, HTTPException, BackgroundTasks
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel
+from typing import List, Optional, Dict, Any
+from contextlib import asynccontextmanager
 import asyncio
 import json
 import time
 import uuid
 import logging
+import random
+import httpx
 import g4f
 from g4f.client import Client
+from g4f import Provider
+# ───────────────────────────────────────────────────────────────────
 # LOGGING
+# ───────────────────────────────────────────────────────────────────
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s │ %(levelname)-7s │ %(message)s",
+    datefmt="%H:%M:%S",
+)
+logger = logging.getLogger("gateway")
+# ───────────────────────────────────────────────────────────────────
 # CONFIG
+# ───────────────────────────────────────────────────────────────────
+API_KEY           = "sk-your-secret-key"
+REQUEST_TIMEOUT   = 50          # seconds per single attempt
+MAX_RETRIES       = 3
+PROBE_TIMEOUT     = 20          # health-check timeout
+PROBE_CONCURRENCY = 8           # parallel health checks
+CACHE_TTL         = 3600        # re-check providers every 1 h
+# Remote list of today's working models (no-auth, updated daily)
+G4F_WORKING_MODELS_URL = (
+    "https://raw.githubusercontent.com/"
+    "Free-AI-Things/g4f-working/main/output/models.txt"
+)
+# Optional: rotate through these free proxies (add your own)
+# Format: "http://host:port" or "socks5://user:pass@host:port"
+PROXY_POOL: List[str] = [
+    # "http://proxy1:8080",
+    # "socks5://user:pass@proxy2:1080",
+]
+# Providers confirmed to work without auth in recent tests
+# (used as seed; health check updates this at runtime)
+KNOWN_NO_AUTH_PROVIDERS = [
+    "PollinationsAI",
+    "DDG",
+    "Jmuz",
+    "OIVSCode",
+    "Liaobots",
+    "ChatGptEs",
+    "Free2GPT",
+    "HuggingSpace",
+    "Blackbox",
+    "You",
+    "Pizzagpt",
+    "LambdaChat",
+    "PerplexityLabs",
+    "RobocodersAPI",
+    "TeachAnything",
+    "AiChatOnline",
+    "Cloudflare",
+    "FreeNetfly",
+    "Gemini",
+    "GeminiPro",
+]
+# ───────────────────────────────────────────────────────────────────
+# GLOBAL RUNTIME STATE
+# ───────────────────────────────────────────────────────────────────
+class State:
+    working_providers: List[Any]  = []   # provider objects that passed health check
+    working_models:    List[str]  = []   # model names fetched from remote list
+    provider_stats:    Dict[str, Dict] = {}  # per-provider success / fail counters
+    proxy_index:       int        = 0
+    last_probe:        float      = 0.0
+    startup_done:      bool       = False
+state = State()
+# ───────────────────────────────────────────────────────────────────
+# PROXY ROTATION
+# ───────────────────────────────────────────────────────────────────
+def get_next_proxy() -> Optional[str]:
+    """Round-robin through the proxy pool; returns None if pool is empty."""
+    if not PROXY_POOL:
+        return None
+    proxy = PROXY_POOL[state.proxy_index % len(PROXY_POOL)]
+    state.proxy_index += 1
+    return proxy
+def get_random_proxy() -> Optional[str]:
+    """Pick a random proxy from the pool."""
+    return random.choice(PROXY_POOL) if PROXY_POOL else None
+# ───────────────────────────────────────────────────────────────────
+# FETCH WORKING MODELS FROM REMOTE LIST
+# ───────────────────────────────────────────────────────────────────
+async def fetch_remote_working_models() -> List[str]:
+    """
+    Downloads the daily-updated models.txt from g4f-working.
+    Falls back to a hard-coded seed list on failure.
+    """
+    try:
+        async with httpx.AsyncClient(timeout=10) as client:
+            resp = await client.get(G4F_WORKING_MODELS_URL)
+            if resp.status_code == 200:
+                lines = [l.strip() for l in resp.text.splitlines() if l.strip()]
+                logger.info(f"✅ Remote working models fetched: {len(lines)} models")
+                return lines
+    except Exception as e:
+        logger.warning(f"⚠️  Could not fetch remote model list: {e}")
+    # Fallback seed – known stable models as of 2025
+    return [
+        "gpt-4o-mini", "gpt-4o", "gpt-4", "gpt-3.5-turbo",
+        "claude-3-haiku", "claude-3-sonnet", "claude-3-opus",
+        "llama-3.1-70b", "llama-3.1-8b", "llama-3.3-70b",
+        "mistral-7b", "mixtral-8x7b",
+        "deepseek-chat", "deepseek-r1",
+        "gemini-pro", "gemini-1.5-flash", "gemini-1.5-pro",
+        "command-r", "command-r-plus",
+        "qwen-2-72b", "qwen-2.5-72b",
+        "phi-3-mini", "phi-4",
+        "hermes-3",
+    ]
+# ───────────────────────────────────────────────────────────────────
+# PROVIDER HEALTH CHECK
+# ───────────────────────────────────────────────────────────────────
+async def probe_provider(provider_name: str) -> bool:
+    """
+    Send a trivial prompt to a provider.
+    Returns True only if we get a non-empty, non-error text back.
+    """
+    try:
+        provider_cls = getattr(Provider, provider_name, None)
+        if provider_cls is None:
+            return False
+        proxy = get_random_proxy()
+        def _call():
+            c = Client(provider=provider_cls)
+            r = c.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[{"role": "user", "content": "Hi"}],
+                proxy=proxy,
+            )
+            return r.choices[0].message.content or ""
+        text = await asyncio.wait_for(
+            asyncio.to_thread(_call),
+            timeout=PROBE_TIMEOUT,
+        )
+        ok = bool(text.strip()) and "error" not in text.lower()[:50]
+        logger.info(f"  {'✅' if ok else '❌'} {provider_name}")
+        return ok
+    except Exception as e:
+        logger.debug(f"  ❌ {provider_name}: {e}")
+        return False
+async def run_health_checks():
+    """Probe all known no-auth providers concurrently and cache results."""
+    logger.info("🔍 Starting provider health checks …")
+    sem = asyncio.Semaphore(PROBE_CONCURRENCY)
+    async def guarded_probe(name):
+        async with sem:
+            result = await probe_provider(name)
+            state.provider_stats[name] = state.provider_stats.get(name, {
+                "success": 0, "fail": 0, "last_check": None
+            })
+            state.provider_stats[name]["last_check"] = time.time()
+            if result:
+                state.provider_stats[name]["success"] += 1
+            else:
+                state.provider_stats[name]["fail"] += 1
+            return name, result
+    tasks = [guarded_probe(name) for name in KNOWN_NO_AUTH_PROVIDERS]
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    working = []
+    for item in results:
+        if isinstance(item, tuple):
+            name, ok = item
+            if ok:
+                provider_cls = getattr(Provider, name, None)
+                if provider_cls:
+                    working.append(provider_cls)
+    state.working_providers = working
+    state.last_probe = time.time()
+    logger.info(
+        f"✅ Health check done: {len(working)}/{len(KNOWN_NO_AUTH_PROVIDERS)} providers working"
+    )
+# ───────────────────────────────────────────────────────────────────
+# STARTUP / BACKGROUND REFRESH
+# ───────────────────────────────────────────────────────────────────
+async def startup_init():
+    logger.info("🚀 Gateway starting – fetching models & probing providers …")
+    state.working_models = await fetch_remote_working_models()
+    # Run health checks in background so startup is fast
+    asyncio.create_task(run_health_checks())
+    state.startup_done = True
+    logger.info(f"🎯 {len(state.working_models)} models loaded")
+async def background_refresh(interval: int = CACHE_TTL):
+    """Periodically re-probe providers and refresh model list."""
+    await asyncio.sleep(interval)
+    while True:
+        logger.info("♻️  Refreshing provider health & model list …")
+        state.working_models = await fetch_remote_working_models()
+        await run_health_checks()
+        await asyncio.sleep(interval)
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    await startup_init()
+    asyncio.create_task(background_refresh())
+    yield
+# ───────────────────────────────────────────────────────────────────
+# FASTAPI APP
+# ───────────────────────────────────────────────────────────────────
 app = FastAPI(
     title="Universal AI Gateway",
+    version="5.0.0",
+    description="Advanced g4f gateway with live provider health checks and proxy rotation",
+    lifespan=lifespan,
 )
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
 )
+# ───────────────────────────────────────────────────────────────────
 # MODELS
+# ───────────────────────────────────────────────────────────────────
 class Message(BaseModel):
     role: str
     content: str
     max_tokens: Optional[int] = 4096
+# ───────────────────────────────────────────────────────────────────
 # AUTH
+# ───────────────────────────────────────────────────────────────────
 def verify_api_key(req: Request):
+    auth = req.headers.get("Authorization", "")
     if not auth:
+        return True   # allow unauthenticated for testing
     if not auth.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail="Invalid Authorization format")
+    token = auth.removeprefix("Bearer ").strip()
     if token != API_KEY:
+        raise HTTPException(status_code=403, detail="Invalid API key")
     return True
+# ───────────────────────────────────────────────────────────────────
+# SMART COMPLETION  (provider fallback chain)
+# ───────────────────────────────────────────────────────────────────
+async def smart_completion(model: str, messages: list, stream: bool = False):
+    """
+    Try providers in this order:
+      1. g4f auto (lets g4f pick the best_provider for the model)
+      2. Each working provider from our health-checked pool
+      3. Any g4f provider via RetryProvider as last resort
+    Rotates proxy on each attempt.
+    """
+    errors: List[str] = []
+    # Build candidate provider list: health-checked first
+    candidates = list(state.working_providers)
+    # Always include auto (None) as first try
+    provider_order = [None] + candidates
+    for attempt, provider_cls in enumerate(provider_order[:MAX_RETRIES + 1]):
+        proxy = get_next_proxy()
+        pname = getattr(provider_cls, "__name__", "auto") if provider_cls else "auto"
         try:
             logger.info(
+                f"  ▶ attempt {attempt + 1} | provider={pname} | "
+                f"model={model} | proxy={'yes' if proxy else 'no'}"
             )
+            def _call():
+                kwargs = {"model": model, "messages": messages, "stream": stream}
+                if proxy:
+                    kwargs["proxy"] = proxy
+                if provider_cls:
+                    c = Client(provider=provider_cls)
+                else:
+                    c = Client()
+                return c.chat.completions.create(**kwargs)
             response = await asyncio.wait_for(
+                asyncio.to_thread(_call),
+                timeout=REQUEST_TIMEOUT,
             )
+            # Update stats
+            if pname in state.provider_stats:
+                state.provider_stats[pname]["success"] = \
+                    state.provider_stats[pname].get("success", 0) + 1
+            logger.info(f"  ✅ success via {pname}")
             return response
         except asyncio.TimeoutError:
+            msg = f"{pname}: timeout after {REQUEST_TIMEOUT}s"
+        except Exception as e:
+            msg = f"{pname}: {type(e).__name__}: {e}"
+            if pname in state.provider_stats:
+                state.provider_stats[pname]["fail"] = \
+                    state.provider_stats[pname].get("fail", 0) + 1
+        errors.append(msg)
+        logger.warning(f"  ⚠️  {msg}")
+        await asyncio.sleep(0.5)
+    raise RuntimeError("All providers failed:\n" + "\n".join(errors))
+# ───────────────────────────────────────────────────────────────────
+# ROUTES
+# ───────────────────────────────────────────────────────────────────
+@app.get("/")
+async def root():
+    return {
+        "service": "Universal AI Gateway",
+        "version": "5.0.0",
+        "status": "online",
+        "working_providers": len(state.working_providers),
+        "working_models": len(state.working_models),
+        "proxy_pool": len(PROXY_POOL),
+        "docs": "/docs",
+    }
+@app.get("/status")
+async def status():
+    """Live health dashboard for providers and models."""
+    provider_info = {}
+    for name, stats in state.provider_stats.items():
+        total = stats.get("success", 0) + stats.get("fail", 0)
+        success_rate = (
+            round(stats["success"] / total * 100, 1) if total else 0
+        )
+        last = stats.get("last_check")
+        provider_info[name] = {
+            "success": stats.get("success", 0),
+            "fail":    stats.get("fail", 0),
+            "success_rate": f"{success_rate}%",
+            "last_check": time.strftime("%H:%M:%S", time.localtime(last)) if last else "—",
+        }
+    return {
+        "working_providers": [
+            getattr(p, "__name__", str(p)) for p in state.working_providers
+        ],
+        "working_providers_count": len(state.working_providers),
+        "working_models_sample": state.working_models[:30],
+        "working_models_count": len(state.working_models),
+        "proxy_pool_size": len(PROXY_POOL),
+        "last_health_check": (
+            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(state.last_probe))
+            if state.last_probe else "pending"
+        ),
+        "provider_stats": provider_info,
+    }
+@app.post("/v1/providers/refresh")
+async def refresh_providers(background_tasks: BackgroundTasks, req: Request):
+    """Trigger a manual provider health-check refresh."""
+    verify_api_key(req)
+    background_tasks.add_task(run_health_checks)
+    return {"message": "Provider health check started in background"}
+@app.get("/v1/models")
+async def get_models(req: Request):
+    """Return the list of currently working models."""
     verify_api_key(req)
+    # Merge remote working list with g4f's own model registry
+    model_set: set = set(state.working_models)
+    try:
+        if hasattr(g4f.models, "_all_models"):
+            for m in list(g4f.models._all_models)[:200]:
+                model_set.add(str(m))
+    except Exception:
+        pass
+    now = int(time.time())
+    data = [
+        {"id": mid, "object": "model", "created": now, "owned_by": "g4f"}
+        for mid in sorted(model_set)
+    ]
+    return {"object": "list", "data": data}
+@app.post("/v1/chat/completions")
+async def chat_completions(req: Request, body: ChatRequest):
+    verify_api_key(req)
+    messages = [{"role": m.role, "content": m.content} for m in body.messages]
+    logger.info(f"📨 Request → model={body.model} stream={body.stream}")
+    # ── STREAMING ──────────────────────────────────────────────────
+    if body.stream:
+        async def generate():
+            chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
+            has_content = False
             try:
+                response = await smart_completion(
+                    model=body.model, messages=messages, stream=True
                 )
                 for chunk in response:
                     try:
                         content = ""
                         if (
                             hasattr(chunk, "choices")
                             and chunk.choices
                             and chunk.choices[0].delta.content
                         ):
                             content = chunk.choices[0].delta.content
                         if content:
                             has_content = True
                             yield (
+                                "data: "
+                                + json.dumps({
+                                    "id": chunk_id,
+                                    "object": "chat.completion.chunk",
+                                    "created": int(time.time()),
+                                    "model": body.model,
+                                    "choices": [{
+                                        "index": 0,
+                                        "delta": {"content": content},
+                                        "finish_reason": None,
+                                    }],
+                                }, ensure_ascii=False)
+                                + "\n\n"
                             )
                             await asyncio.sleep(0)
+                    except Exception as ce:
+                        logger.error(f"Chunk error: {ce}")
                 if not has_content:
                     yield (
+                        "data: "
+                        + json.dumps({"error": {"message": "Provider returned empty stream", "type": "empty_stream"}})
+                        + "\n\n"
                     )
+                # Final stop chunk
                 yield (
+                    "data: "
+                    + json.dumps({
+                        "id": chunk_id,
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": body.model,
+                        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                    })
+                    + "\n\n"
                 )
                 yield "data: [DONE]\n\n"
             except Exception as e:
                 logger.error(f"Streaming error: {e}")
                 yield (
+                    "data: "
+                    + json.dumps({"error": {"message": str(e), "type": "server_error"}})
+                    + "\n\n"
                 )
         return StreamingResponse(
+            generate(),
             media_type="text/event-stream",
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
+                "X-Accel-Buffering": "no",
+            },
         )
+    # ── NON-STREAMING ──────────────────────────────────────────────
     try:
+        response = await smart_completion(
+            model=body.model, messages=messages, stream=False
         )
         try:
+            content = response.choices[0].message.content
         except Exception:
+            content = str(response)
         return JSONResponse({
             "id": f"chatcmpl-{uuid.uuid4().hex}",
             "object": "chat.completion",
             "created": int(time.time()),
             "model": body.model,
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": content},
+                "finish_reason": "stop",
+            }],
+            "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
         })
     except Exception as e:
         logger.error(f"Chat error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+# ───────────────────────────────────────────────────────────────────
+# ENTRY POINT
+# ───────────────────────────────────────────────────────────────────
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")