Spaces:

bahi-bh
/

Duck

Running

App Files Files Community

bahi-bh commited on 12 days ago

Commit

32d4bc3

verified ·

1 Parent(s): b8ac474

Update app.py

Browse files

Files changed (1) hide show

app.py +776 -360

app.py CHANGED Viewed

@@ -6,20 +6,24 @@ import asyncio
 import logging
 import uvicorn
 import random
 from fastapi import FastAPI, HTTPException, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
 from concurrent.futures import ThreadPoolExecutor
 from curl_cffi import requests as curl_requests
 from typing import Dict, List, Optional, Tuple
 # =========================================================
-# 1. الإعدادات العليا (Orchestration Config)
 # =========================================================
 API_KEY = os.environ.get("API_KEY", "sk-your-secret-key")
 PORT = int(os.environ.get("PORT", 7860))
 MAX_WORKERS = 50
 VALIDATION_INTERVAL = 300
-GLOBAL_TIMEOUT = 60
 CACHE_FILE = "models_cache.json"
 MODEL_BLACKLIST = [
@@ -39,20 +43,27 @@ MODEL_BLACKLIST = [
 VALIDATION_PROMPT = [
     {
         "role": "user",
-        "content": "Reply only with: OK"
     }
 ]
-# Capability Registry
 WORKING_MODELS: Dict[str, dict] = {}
 PROVIDER_MODEL_MAP: Dict[str, List[str]] = {}
-# Atomic State & Concurrency Control
 STATE_LOCK = asyncio.Lock()
 REQUEST_LIMITER = asyncio.Semaphore(25)
 EXECUTOR = ThreadPoolExecutor(max_workers=MAX_WORKERS)
-app = FastAPI(title="Omega Orchestrator", version="2.0")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -60,16 +71,27 @@ app.add_middleware(
     allow_headers=["*"]
 )
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-logger = logging.getLogger("ORCHESTRATOR")
 def get_stealth_headers():
     return {
         "User-Agent": (
-            f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-            f"AppleWebKit/537.36 (KHTML, like Gecko) "
-            f"Chrome/{random.randint(120, 124)}.0.0.0 Safari/537.36"
         ),
         "Origin": "https://g4f.space",
         "Referer": "https://g4f.space/",
@@ -79,33 +101,25 @@ def get_stealth_headers():
     }
-# =========================================================
-# 2. التحقق من المصادقة (Auth Verification) — مُصلح
-# =========================================================
 def verify_api_key(request: Request) -> bool:
-    """
-    يدعم ثلاث طرق للمصادقة:
-      1. Authorization: Bearer <key>
-      2. x-api-key: <key>
-      3. api-key: <key>
-    إذا لم يُضبط API_KEY فالوصول مفتوح.
-    """
     if not API_KEY or API_KEY == "sk-your-secret-key":
-        return True  # No key configured → open access
     auth_header = request.headers.get("Authorization", "")
-    x_api_key   = request.headers.get("x-api-key", "")
     api_key_hdr = request.headers.get("api-key", "")
     candidates = []
-    # Bearer token
     if auth_header.startswith("Bearer "):
-        candidates.append(auth_header[len("Bearer "):].strip())
-    # x-api-key / api-key headers
     if x_api_key:
         candidates.append(x_api_key.strip())
     if api_key_hdr:
         candidates.append(api_key_hdr.strip())
@@ -113,538 +127,940 @@ def verify_api_key(request: Request) -> bool:
 # =========================================================
-# 3. مستخرج المحتوى المركزي (Central Content Extractor)
 # =========================================================
 def extract_content(data) -> Optional[str]:
     if not isinstance(data, dict):
         return None
-    # Direct content fields
-    for field in ("response", "content", "text", "output", "result", "generated_text"):
-        if field in data and isinstance(data[field], str) and data[field].strip():
-            return data[field]
-    # OpenAI-style choices
-    if "choices" in data and isinstance(data["choices"], list) and data["choices"]:
-        choice = data["choices"][0]
-        if isinstance(choice, dict):
-            msg = choice.get("message", {})
-            if isinstance(msg, dict) and "content" in msg:
-                return msg["content"]
-            if "text" in choice and choice["text"].strip():
-                return choice["text"]
-            if "delta" in choice and "content" in choice["delta"]:
-                return choice["delta"]["content"]
-    # Anthropic-style message
     if "message" in data:
-        m = data["message"]
-        if isinstance(m, dict) and "content" in m:
-            c = m["content"]
-            # content can be a list of blocks
-            if isinstance(c, list):
-                texts = [b.get("text", "") for b in c if isinstance(b, dict) and b.get("type") == "text"]
                 combined = "".join(texts)
                 if combined.strip():
                     return combined
-            elif isinstance(c, str) and c.strip():
-                return c
-        elif isinstance(m, str) and m.strip():
-            return m
-    # Nested data.message
-    if "data" in data and isinstance(data["data"], dict):
-        return extract_content(data["data"])
-    # Completion field (older APIs)
-    if "completion" in data and isinstance(data["completion"], str):
-        return data["completion"]
     return None
 # =========================================================
-# 4. مزودات الخدمة (Provider Classes)
 # =========================================================
 class BaseProvider:
     def __init__(self, name: str, url: str):
         self.name = name
         self.url = url
-        self.headers = get_stealth_headers()
         self.aliases: Dict[str, str] = {}
         self.fails = 0
         self.success = 0
         self.cooldown = 0.0
-        self.latency = 0.0
         self.health = 100
-    def update_health(self):
-        total = self.success + self.fails
-        if total > 0:
-            self.health = int((self.success / total) * 100)
-    # ------------------------------------------------------------------
-    # Model Discovery
-    # ------------------------------------------------------------------
     async def fetch_models(self) -> List[str]:
         loop = asyncio.get_event_loop()
         try:
             async with REQUEST_LIMITER:
-                models = await loop.run_in_executor(EXECUTOR, self._fetch_models_sync)
                 return list(set(models))
         except Exception as e:
-            logger.debug(f"[{self.name}] fetch_models error: {e}")
             return []
     def _fetch_models_sync(self) -> List[str]:
-        """
-        يجرب نقاط نهاية متعددة لاستخراج جميع النماذج المتاحة.
-        يدعم كافة الهياكل الشائعة: قوائم مسطّحة، قواميس، بيانات متداخلة.
-        """
-        discovered: List[str] = []
         endpoints = [
             f"{self.url}/v1/models",
             f"{self.url}/models",
-            self.url,
         ]
         with curl_requests.Session() as session:
             for endpoint in endpoints:
                 try:
                     resp = session.get(
                         endpoint,
-                        headers=self.headers,
                         impersonate="chrome124",
-                        timeout=10
                     )
                     if resp.status_code != 200:
                         continue
-                    data = resp.json()
-                    extracted = self._parse_models_response(data)
-                    if extracted:
-                        discovered.extend(extracted)
-                        break  # Found models, no need to try other endpoints
                 except Exception:
                     continue
-        return discovered
-    @staticmethod
-    def _parse_models_response(data) -> List[str]:
-        """
-        يستخرج معرّفات النماذج من أي هيكل بيانات محتمل.
-        """
-        ids: List[str] = []
-        if isinstance(data, list):
-            for item in data:
-                if isinstance(item, str):
-                    ids.append(item)
-                elif isinstance(item, dict):
-                    for key in ("id", "name", "model", "model_id"):
-                        if key in item and isinstance(item[key], str):
-                            ids.append(item[key])
-                            break
-        elif isinstance(data, dict):
-            # OpenAI-style: {"data": [...]}
-            if "data" in data and isinstance(data["data"], list):
-                ids.extend(BaseProvider._parse_models_response(data["data"]))
-            # {"models": [...]}
-            elif "models" in data and isinstance(data["models"], list):
-                ids.extend(BaseProvider._parse_models_response(data["models"]))
-            # {"result": [...]} or {"results": [...]}
-            elif "result" in data and isinstance(data["result"], list):
-                ids.extend(BaseProvider._parse_models_response(data["result"]))
-            elif "results" in data and isinstance(data["results"], list):
-                ids.extend(BaseProvider._parse_models_response(data["results"]))
-            # Single model dict
-            elif "id" in data:
-                ids.append(data["id"])
-        return ids
-    # ------------------------------------------------------------------
-    # Validation
-    # ------------------------------------------------------------------
-    async def validate_model(self, model: str) -> Tuple[bool, float]:
-        payload = {"model": model, "messages": VALIDATION_PROMPT}
         start = time.time()
         result = await self.attempt_request(payload)
         latency = time.time() - start
-        ok = result is not None and "ok" in result.strip().lower()
-        if ok:
             self.success += 1
-            self.latency = (self.latency + latency) / 2 if self.latency > 0 else latency
-        else:
-            self.fails += 1
-        self.update_health()
-        return ok, latency
-    # ------------------------------------------------------------------
-    # Request Handling
-    # ------------------------------------------------------------------
-    async def attempt_request(self, payload: dict) -> Optional[str]:
-        payload = payload.copy()  # prevent mutation leakage
         if time.time() < self.cooldown:
             return None
         model_id = payload.get("model", "")
-        payload["model"] = self.aliases.get(model_id, model_id)
         try:
             async with REQUEST_LIMITER:
                 loop = asyncio.get_event_loop()
-                content = await loop.run_in_executor(EXECUTOR, self._make_request, payload)
-            if content:
                 self.fails = 0
                 self.success += 1
-                self.update_health()
-                return content
             self.fails += 1
-            self.update_health()
             if self.fails >= 3:
                 self.cooldown = time.time() + 60
             return None
         except Exception:
             self.fails += 1
-            self.update_health()
             return None
-    def _make_request(self, payload: dict) -> Optional[str]:
         with curl_requests.Session() as session:
             try:
                 resp = session.post(
                     self.url,
-                    headers=self.headers,
                     json=payload,
                     impersonate="chrome124",
                     timeout=25
                 )
-                if resp.status_code == 200:
                     data = resp.json()
-                    content = extract_content(data)
-                    if content and str(content).strip():
-                        return str(content).strip()
             except Exception:
-                pass
-        return None
-# ------------------------------------------------------------------
-# Provider Definitions
-# ------------------------------------------------------------------
 class GroqProvider(BaseProvider):
     def __init__(self):
-        super().__init__("Groq", "https://g4f.space/api/groq")
-        self.aliases = {"gpt-4o": "llama-3-70b"}
 class GeminiProvider(BaseProvider):
     def __init__(self):
-        super().__init__("Gemini", "https://g4f.space/api/gemini")
-        self.aliases = {"claude-3-5-sonnet": "gemini-1.5-pro"}
 class PollinationsProvider(BaseProvider):
     def __init__(self):
-        super().__init__("Pollinations", "https://g4f.space/api/pollinations")
-        self.aliases = {"gpt-4o": "gpt-4"}
 class OllamaProvider(BaseProvider):
     def __init__(self):
-        super().__init__("Ollama", "https://g4f.space/api/ollama")
-class PerplexityProvider(BaseProvider):
-    def __init__(self):
-        super().__init__("Perplexity", "https://g4f.space/api/perplexity")
-class OpenRouterProvider(BaseProvider):
-    """
-    مزوّد إضافي: OpenRouter — يتيح الوصول إلى مئات النماذج تلقائياً.
-    """
     def __init__(self):
-        super().__init__("OpenRouter", "https://openrouter.ai/api/v1/chat/completions")
-        self.models_url = "https://openrouter.ai/api/v1/models"
-    async def fetch_models(self) -> List[str]:
-        loop = asyncio.get_event_loop()
-        try:
-            async with REQUEST_LIMITER:
-                models = await loop.run_in_executor(EXECUTOR, self._fetch_openrouter_models)
-                return list(set(models))
-        except Exception as e:
-            logger.debug(f"[OpenRouter] fetch_models error: {e}")
-            return []
-    def _fetch_openrouter_models(self) -> List[str]:
-        with curl_requests.Session() as session:
-            try:
-                resp = session.get(self.models_url, headers=self.headers, impersonate="chrome124", timeout=10)
-                if resp.status_code == 200:
-                    data = resp.json()
-                    return self._parse_models_response(data)
-            except Exception:
-                pass
-        return []
-PROVIDER_INSTANCES: List[BaseProvider] = [
     GroqProvider(),
     GeminiProvider(),
     PollinationsProvider(),
     OllamaProvider(),
-    PerplexityProvider(),
-    OpenRouterProvider(),
 ]
 # =========================================================
-# 5. إدارة الحالة والتخزين المؤقت
 # =========================================================
 async def load_cache():
-    global WORKING_MODELS, PROVIDER_MODEL_MAP
     try:
-        if os.path.exists(CACHE_FILE):
-            with open(CACHE_FILE, "r", encoding="utf-8") as f:
-                data = json.load(f)
-            async with STATE_LOCK:
-                WORKING_MODELS = data.get("WORKING_MODELS", {})
-                PROVIDER_MODEL_MAP = data.get("PROVIDER_MODEL_MAP", {})
-            logger.info(f"✅ Cache loaded — {len(WORKING_MODELS)} models.")
     except Exception as e:
-        logger.error(f"⚠️ Cache load error: {e}")
 async def save_cache():
     try:
         async with STATE_LOCK:
             snapshot = {
-                "WORKING_MODELS": dict(WORKING_MODELS),
-                "PROVIDER_MODEL_MAP": dict(PROVIDER_MODEL_MAP)
             }
-        with open(CACHE_FILE, "w", encoding="utf-8") as f:
-            json.dump(snapshot, f, indent=4, ensure_ascii=False)
-        logger.info("💾 Cache saved.")
     except Exception as e:
-        logger.error(f"⚠️ Cache save error: {e}")
 # =========================================================
-# 6. محرك الاستكشاف والتحقق
 # =========================================================
 async def discovery_engine():
     await load_cache()
     while True:
-        logger.info("📡 Starting validated discovery cycle …")
-        fresh_provider_map: Dict[str, List[str]] = {}
-        fresh_working_models: Dict[str, dict] = {}
-        # Run provider discovery concurrently
-        tasks = {provider: asyncio.create_task(provider.fetch_models()) for provider in PROVIDER_INSTANCES}
         for provider, task in tasks.items():
-            discovered = await task
-            clean_models: List[str] = []
-            # Filter & validate
             validation_tasks = []
-            filtered = [
-                m for m in discovered
-                if not any(bl in m.lower() for bl in MODEL_BLACKLIST)
-            ]
-            logger.info(f"[{provider.name}] Discovered {len(filtered)} candidate models — validating …")
-            for model in filtered:
-                validation_tasks.append((model, asyncio.create_task(provider.validate_model(model))))
             for model, vtask in validation_tasks:
                 try:
-                    is_valid, latency = await vtask
                 except Exception:
-                    is_valid, latency = False, 0.0
-                if is_valid:
-                    clean_models.append(model)
-                    if model not in fresh_working_models:
-                        fresh_working_models[model] = {
-                            "providers": [],
-                            "latency": latency,
-                            "health": provider.health,
-                            "aliases": provider.aliases
-                        }
-                    info = fresh_working_models[model]
-                    if provider.url not in info["providers"]:
-                        info["providers"].append(provider.url)
-                    info["latency"] = (info["latency"] + latency) / 2
-                    info["health"] = (info["health"] + provider.health) // 2
-            fresh_provider_map[provider.url] = clean_models
-            logger.info(f"✅ [{provider.name}] {len(clean_models)} valid models.")
-        # Atomic publish
         async with STATE_LOCK:
-            global PROVIDER_MODEL_MAP, WORKING_MODELS
             PROVIDER_MODEL_MAP = fresh_provider_map
             WORKING_MODELS = fresh_working_models
         await save_cache()
-        logger.info(f"🚀 Orchestrator ready — {len(WORKING_MODELS)} active models.")
-        await asyncio.sleep(VALIDATION_INTERVAL)
 # =========================================================
-# 7. المعالج المركزي (Omega Handler)
 # =========================================================
 @app.on_event("startup")
 async def startup():
-    asyncio.create_task(discovery_engine())
-@app.api_route("/{path:path}", methods=["GET", "HEAD", "POST", "OPTIONS"])
-async def omega_handler(request: Request, path: str):
-    # ---- CORS preflight ----
-    if request.method == "OPTIONS":
-        return Response(status_code=204, headers={
-            "Access-Control-Allow-Origin": "*",
-            "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
-            "Access-Control-Allow-Headers": "*"
-        })
     path_lower = path.lower().strip("/")
-    # ---- Model listing ----
-    if request.method in ("GET", "HEAD") and ("models" in path_lower or path_lower in ("", "v1", "v1/")):
-        if "models" in path_lower:
-            async with STATE_LOCK:
-                model_ids = list(WORKING_MODELS.keys()) if WORKING_MODELS else ["gpt-4o", "claude-3-5-sonnet"]
-            return {
-                "object": "list",
-                "data": [
-                    {
-                        "id": m,
-                        "object": "model",
-                        "created": int(time.time()),
-                        "owned_by": "omega-orchestrator"
-                    }
-                    for m in sorted(model_ids)
-                ]
-            }
-        return Response(status_code=200)
-    # ---- Chat completions ----
-    if request.method == "POST" and any(x in path_lower for x in ("messages", "completions", "chat")):
-        # --- Auth check (مُصلح) ---
-        if not verify_api_key(request):
-            raise HTTPException(status_code=401, detail="Unauthorized: invalid or missing API key.")
-        # --- Parse body ---
-        try:
-            body = await request.json()
-        except Exception:
-            raise HTTPException(status_code=400, detail="Invalid JSON body.")
-        model    = body.get("model", "gpt-4o")
-        messages = body.get("messages", [])
-        if not messages:
-            raise HTTPException(status_code=400, detail="messages field is required.")
-        # --- Smart routing ---
-        async with STATE_LOCK:
-            model_info = WORKING_MODELS.get(model)
-            if model_info and model_info.get("providers"):
-                target_urls = list(model_info["providers"])
-            else:
-                target_urls = [p.url for p in PROVIDER_INSTANCES]
-        providers = [p for p in PROVIDER_INSTANCES if p.url in target_urls]
-        # Sort: fewest fails → highest health → lowest latency
-        providers.sort(key=lambda p: (p.fails, -p.health, p.latency))
-        reply: Optional[str] = None
-        for provider in providers:
-            if time.time() < provider.cooldown:
-                continue
-            reply = await provider.attempt_request(body)
-            if reply:
-                logger.info(f"✅ Served by [{provider.name}] model={model}")
-                break
-        # --- Internal fallback via g4f ---
-        if not reply:
-            try:
-                from g4f.client import Client
-                loop = asyncio.get_event_loop()
-                def fallback_req():
-                    return (
-                        Client()
-                        .chat.completions.create(model=model, messages=messages)
-                        .choices[0].message.content
                     )
-                reply = await loop.run_in_executor(EXECUTOR, fallback_req)
-                logger.info("🔄 Served via g4f fallback.")
-            except Exception as e:
-                logger.warning(f"g4f fallback failed: {e}")
-        if not reply:
-            raise HTTPException(status_code=502, detail="Orchestration Failed: all routes exhausted.")
-        # --- Format response ---
-        # Anthropic messages format
-        if "messages" in path_lower and "chat" not in path_lower:
-            return {
-                "id": f"msg_{uuid.uuid4().hex}",
-                "type": "message",
-                "role": "assistant",
-                "model": model,
-                "content": [{"type": "text", "text": reply}],
-                "stop_reason": "end_turn",
-                "usage": {"input_tokens": 0, "output_tokens": 0}
-            }
-        # OpenAI chat.completions format (default)
         return {
-            "id": f"chatcmpl-{uuid.uuid4().hex}",
-            "object": "chat.completion",
-            "created": int(time.time()),
             "model": model,
-            "choices": [
                 {
-                    "index": 0,
-                    "message": {"role": "assistant", "content": reply},
-                    "finish_reason": "stop"
                 }
             ],
-            "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
         }
-    return Response(status_code=404)
 # =========================================================
-# 8. نقطة الدخول
 # =========================================================
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info")

 import logging
 import uvicorn
 import random
 from fastapi import FastAPI, HTTPException, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
 from concurrent.futures import ThreadPoolExecutor
 from curl_cffi import requests as curl_requests
 from typing import Dict, List, Optional, Tuple
 # =========================================================
+# CONFIG
 # =========================================================
 API_KEY = os.environ.get("API_KEY", "sk-your-secret-key")
 PORT = int(os.environ.get("PORT", 7860))
 MAX_WORKERS = 50
 VALIDATION_INTERVAL = 300
 CACHE_FILE = "models_cache.json"
 MODEL_BLACKLIST = [
 VALIDATION_PROMPT = [
     {
         "role": "user",
+        "content": "Reply only with OK"
     }
 ]
 WORKING_MODELS: Dict[str, dict] = {}
 PROVIDER_MODEL_MAP: Dict[str, List[str]] = {}
 STATE_LOCK = asyncio.Lock()
 REQUEST_LIMITER = asyncio.Semaphore(25)
 EXECUTOR = ThreadPoolExecutor(max_workers=MAX_WORKERS)
+# =========================================================
+# FASTAPI
+# =========================================================
+app = FastAPI(
+    title="Omega Orchestrator",
+    version="3.0"
+)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"]
 )
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger("OMEGA")
+# =========================================================
+# HELPERS
+# =========================================================
 def get_stealth_headers():
     return {
         "User-Agent": (
+            f"Mozilla/5.0 "
+            f"(Windows NT 10.0; Win64; x64) "
+            f"AppleWebKit/537.36 "
+            f"(KHTML, like Gecko) "
+            f"Chrome/{random.randint(120,124)}.0.0.0 "
+            f"Safari/537.36"
         ),
         "Origin": "https://g4f.space",
         "Referer": "https://g4f.space/",
     }
 def verify_api_key(request: Request) -> bool:
     if not API_KEY or API_KEY == "sk-your-secret-key":
+        return True
     auth_header = request.headers.get("Authorization", "")
+    x_api_key = request.headers.get("x-api-key", "")
     api_key_hdr = request.headers.get("api-key", "")
     candidates = []
     if auth_header.startswith("Bearer "):
+        candidates.append(
+            auth_header[len("Bearer "):].strip()
+        )
     if x_api_key:
         candidates.append(x_api_key.strip())
     if api_key_hdr:
         candidates.append(api_key_hdr.strip())
 # =========================================================
+# CONTENT EXTRACTION
 # =========================================================
 def extract_content(data) -> Optional[str]:
     if not isinstance(data, dict):
         return None
+    for field in (
+        "response",
+        "content",
+        "text",
+        "output",
+        "result",
+        "generated_text",
+        "completion"
+    ):
+        value = data.get(field)
+        if isinstance(value, str) and value.strip():
+            return value
+    if "choices" in data:
+        choices = data.get("choices")
+        if isinstance(choices, list) and choices:
+            choice = choices[0]
+            if isinstance(choice, dict):
+                msg = choice.get("message")
+                if isinstance(msg, dict):
+                    content = msg.get("content")
+                    if isinstance(content, str):
+                        return content
+                txt = choice.get("text")
+                if isinstance(txt, str):
+                    return txt
     if "message" in data:
+        message = data["message"]
+        if isinstance(message, str):
+            return message
+        if isinstance(message, dict):
+            content = message.get("content")
+            if isinstance(content, str):
+                return content
+            if isinstance(content, list):
+                texts = []
+                for block in content:
+                    if (
+                        isinstance(block, dict)
+                        and block.get("type") == "text"
+                    ):
+                        texts.append(block.get("text", ""))
                 combined = "".join(texts)
                 if combined.strip():
                     return combined
     return None
 # =========================================================
+# PROVIDERS
 # =========================================================
 class BaseProvider:
     def __init__(self, name: str, url: str):
         self.name = name
         self.url = url
         self.aliases: Dict[str, str] = {}
         self.fails = 0
         self.success = 0
         self.cooldown = 0.0
         self.health = 100
+        self.latency = 0.0
+    # =====================================================
+    # AUTO MODEL IMPORT
+    # =====================================================
     async def fetch_models(self) -> List[str]:
         loop = asyncio.get_event_loop()
         try:
             async with REQUEST_LIMITER:
+                models = await loop.run_in_executor(
+                    EXECUTOR,
+                    self._fetch_models_sync
+                )
                 return list(set(models))
         except Exception as e:
+            logger.debug(
+                f"[{self.name}] fetch_models error: {e}"
+            )
             return []
     def _fetch_models_sync(self) -> List[str]:
+        discovered = set()
         endpoints = [
             f"{self.url}/v1/models",
             f"{self.url}/models",
+            self.url
         ]
         with curl_requests.Session() as session:
             for endpoint in endpoints:
                 try:
                     resp = session.get(
                         endpoint,
+                        headers=get_stealth_headers(),
                         impersonate="chrome124",
+                        timeout=15
                     )
                     if resp.status_code != 200:
                         continue
+                    try:
+                        data = resp.json()
+                    except:
+                        continue
+                    extracted = self._deep_extract_models(data)
+                    for model in extracted:
+                        if not isinstance(model, str):
+                            continue
+                        model = model.strip()
+                        if not model:
+                            continue
+                        if any(
+                            x in model.lower()
+                            for x in MODEL_BLACKLIST
+                        ):
+                            continue
+                        discovered.add(model)
                 except Exception:
                     continue
+        return list(discovered)
+    # =====================================================
+    # FULL DEEP AUTO EXTRACTION
+    # =====================================================
+    def _deep_extract_models(self, data):
+        discovered = set()
+        model_keys = {
+            "id",
+            "model",
+            "model_id",
+            "name",
+            "slug"
+        }
+        model_tokens = [
+            "gpt",
+            "claude",
+            "llama",
+            "gemini",
+            "mixtral",
+            "mistral",
+            "phi",
+            "qwen",
+            "deepseek",
+            "command",
+            "sonnet",
+            "opus",
+            "haiku"
+        ]
+        def walker(obj):
+            # dict
+            if isinstance(obj, dict):
+                for key, value in obj.items():
+                    if key.lower() in model_keys:
+                        if isinstance(value, str):
+                            lower = value.lower()
+                            if any(
+                                token in lower
+                                for token in model_tokens
+                            ):
+                                if len(value) < 80:
+                                    discovered.add(value)
+                    walker(value)
+            # list
+            elif isinstance(obj, list):
+                for item in obj:
+                    walker(item)
+            # raw strings
+            elif isinstance(obj, str):
+                lower = obj.lower()
+                if any(
+                    token in lower
+                    for token in model_tokens
+                ):
+                    if len(obj) < 80:
+                        discovered.add(obj)
+        walker(data)
+        return list(discovered)
+    # =====================================================
+    # VALIDATION
+    # =====================================================
+    async def validate_model(
+        self,
+        model: str
+    ) -> Tuple[bool, float]:
+        payload = {
+            "model": model,
+            "messages": VALIDATION_PROMPT
+        }
         start = time.time()
         result = await self.attempt_request(payload)
         latency = time.time() - start
+        if (
+            result
+            and "ok" in result.lower()
+        ):
             self.success += 1
+            self.latency = (
+                (self.latency + latency) / 2
+                if self.latency > 0
+                else latency
+            )
+            self._update_health()
+            return True, latency
+        self.fails += 1
+        self._update_health()
+        return False, latency
+    # =====================================================
+    # REQUEST
+    # =====================================================
+    async def attempt_request(
+        self,
+        payload: dict
+    ) -> Optional[str]:
         if time.time() < self.cooldown:
             return None
+        payload = payload.copy()
         model_id = payload.get("model", "")
+        payload["model"] = (
+            self.aliases.get(model_id, model_id)
+        )
         try:
             async with REQUEST_LIMITER:
                 loop = asyncio.get_event_loop()
+                result = await loop.run_in_executor(
+                    EXECUTOR,
+                    self._make_request,
+                    payload
+                )
+            if result:
                 self.fails = 0
                 self.success += 1
+                self._update_health()
+                return result
             self.fails += 1
+            self._update_health()
             if self.fails >= 3:
                 self.cooldown = time.time() + 60
             return None
         except Exception:
             self.fails += 1
+            self._update_health()
             return None
+    def _make_request(
+        self,
+        payload: dict
+    ) -> Optional[str]:
         with curl_requests.Session() as session:
             try:
                 resp = session.post(
                     self.url,
+                    headers=get_stealth_headers(),
                     json=payload,
                     impersonate="chrome124",
                     timeout=25
                 )
+                if resp.status_code != 200:
+                    return None
+                try:
                     data = resp.json()
+                except:
+                    return None
+                return extract_content(data)
             except Exception:
+                return None
+    # =====================================================
+    # HEALTH
+    # =====================================================
+    def _update_health(self):
+        total = self.success + self.fails
+        if total > 0:
+            self.health = int(
+                (self.success / total) * 100
+            )
+# =========================================================
+# PROVIDERS
+# =========================================================
 class GroqProvider(BaseProvider):
     def __init__(self):
+        super().__init__(
+            "Groq",
+            "https://g4f.space/api/groq"
+        )
+        self.aliases = {
+            "gpt-4o": "llama-3-70b"
+        }
 class GeminiProvider(BaseProvider):
     def __init__(self):
+        super().__init__(
+            "Gemini",
+            "https://g4f.space/api/gemini"
+        )
+        self.aliases = {
+            "claude-3-5-sonnet": "gemini-1.5-pro"
+        }
 class PollinationsProvider(BaseProvider):
     def __init__(self):
+        super().__init__(
+            "Pollinations",
+            "https://g4f.space/api/pollinations"
+        )
+        self.aliases = {
+            "gpt-4o": "gpt-4"
+        }
 class OllamaProvider(BaseProvider):
     def __init__(self):
+        super().__init__(
+            "Ollama",
+            "https://g4f.space/api/ollama"
+        )
+class PerplexityProvider(BaseProvider):
     def __init__(self):
+        super().__init__(
+            "Perplexity",
+            "https://g4f.space/api/perplexity"
+        )
+PROVIDER_INSTANCES = [
     GroqProvider(),
     GeminiProvider(),
     PollinationsProvider(),
     OllamaProvider(),
+    PerplexityProvider()
 ]
 # =========================================================
+# CACHE
 # =========================================================
 async def load_cache():
+    global WORKING_MODELS
+    global PROVIDER_MODEL_MAP
+    if not os.path.exists(CACHE_FILE):
+        return
     try:
+        with open(
+            CACHE_FILE,
+            "r",
+            encoding="utf-8"
+        ) as f:
+            data = json.load(f)
+        async with STATE_LOCK:
+            WORKING_MODELS = data.get(
+                "WORKING_MODELS",
+                {}
+            )
+            PROVIDER_MODEL_MAP = data.get(
+                "PROVIDER_MODEL_MAP",
+                {}
+            )
+        logger.info(
+            f"✅ Cache loaded ({len(WORKING_MODELS)} models)"
+        )
     except Exception as e:
+        logger.error(
+            f"Cache load error: {e}"
+        )
 async def save_cache():
     try:
         async with STATE_LOCK:
             snapshot = {
+                "WORKING_MODELS": WORKING_MODELS,
+                "PROVIDER_MODEL_MAP": PROVIDER_MODEL_MAP
             }
+        with open(
+            CACHE_FILE,
+            "w",
+            encoding="utf-8"
+        ) as f:
+            json.dump(
+                snapshot,
+                f,
+                indent=4,
+                ensure_ascii=False
+            )
     except Exception as e:
+        logger.error(
+            f"Cache save error: {e}"
+        )
 # =========================================================
+# DISCOVERY ENGINE
 # =========================================================
 async def discovery_engine():
+    global WORKING_MODELS
+    global PROVIDER_MODEL_MAP
     await load_cache()
     while True:
+        logger.info(
+            "📡 Discovery cycle started..."
+        )
+        fresh_provider_map = {}
+        fresh_working_models = {}
+        tasks = {
+            provider: asyncio.create_task(
+                provider.fetch_models()
+            )
+            for provider in PROVIDER_INSTANCES
+        }
         for provider, task in tasks.items():
+            try:
+                discovered = await task
+            except Exception:
+                discovered = []
+            logger.info(
+                f"[{provider.name}] discovered {len(discovered)} models"
+            )
+            clean_models = []
             validation_tasks = []
+            for model in discovered:
+                validation_tasks.append(
+                    (
+                        model,
+                        asyncio.create_task(
+                            provider.validate_model(model)
+                        )
+                    )
+                )
             for model, vtask in validation_tasks:
                 try:
+                    valid, latency = await vtask
                 except Exception:
+                    valid = False
+                    latency = 0
+                if not valid:
+                    continue
+                clean_models.append(model)
+                if model not in fresh_working_models:
+                    fresh_working_models[model] = {
+                        "providers": [],
+                        "latency": latency,
+                        "health": provider.health
+                    }
+                info = fresh_working_models[model]
+                if provider.url not in info["providers"]:
+                    info["providers"].append(provider.url)
+            fresh_provider_map[
+                provider.url
+            ] = clean_models
+            logger.info(
+                f"[{provider.name}] validated {len(clean_models)} models"
+            )
         async with STATE_LOCK:
             PROVIDER_MODEL_MAP = fresh_provider_map
             WORKING_MODELS = fresh_working_models
         await save_cache()
+        logger.info(
+            f"🚀 Active models: {len(WORKING_MODELS)}"
+        )
+        await asyncio.sleep(
+            VALIDATION_INTERVAL
+        )
 # =========================================================
+# STARTUP
 # =========================================================
 @app.on_event("startup")
 async def startup():
+    asyncio.create_task(
+        discovery_engine()
+    )
+# =========================================================
+# UNIVERSAL ROUTER
+# =========================================================
+@app.api_route(
+    "/{path:path}",
+    methods=["GET", "POST", "HEAD", "OPTIONS"]
+)
+async def universal_handler(
+    request: Request,
+    path: str
+):
     path_lower = path.lower().strip("/")
+    # =====================================================
+    # OPTIONS
+    # =====================================================
+    if request.method == "OPTIONS":
+        return Response(
+            status_code=204
+        )
+    # =====================================================
+    # HEALTH
+    # =====================================================
+    if request.method in ("GET", "HEAD"):
+        if (
+            path_lower in ("", "v1", "v1/")
+            or "models" in path_lower
+        ):
+            if "models" in path_lower:
+                async with STATE_LOCK:
+                    models = (
+                        list(WORKING_MODELS.keys())
+                        if WORKING_MODELS
+                        else ["gpt-4o"]
+                    )
+                return {
+                    "object": "list",
+                    "data": [
+                        {
+                            "id": model,
+                            "object": "model",
+                            "created": int(time.time()),
+                            "owned_by": "omega"
+                        }
+                        for model in sorted(models)
+                    ]
+                }
+            return Response(status_code=200)
+    # =====================================================
+    # AUTH
+    # =====================================================
+    if not verify_api_key(request):
+        raise HTTPException(
+            status_code=401,
+            detail="Unauthorized"
+        )
+    # =====================================================
+    # REQUEST BODY
+    # =====================================================
+    try:
+        body = await request.json()
+    except Exception:
+        raise HTTPException(
+            status_code=400,
+            detail="Invalid JSON"
+        )
+    model = body.get(
+        "model",
+        "gpt-4o"
+    )
+    messages = body.get(
+        "messages",
+        []
+    )
+    if not messages:
+        raise HTTPException(
+            status_code=400,
+            detail="messages required"
+        )
+    # =====================================================
+    # ROUTING
+    # =====================================================
+    async with STATE_LOCK:
+        model_info = WORKING_MODELS.get(model)
+        if (
+            model_info
+            and model_info.get("providers")
+        ):
+            target_urls = model_info["providers"]
+        else:
+            target_urls = [
+                p.url
+                for p in PROVIDER_INSTANCES
+            ]
+    providers = [
+        p
+        for p in PROVIDER_INSTANCES
+        if p.url in target_urls
+    ]
+    providers.sort(
+        key=lambda p: (
+            p.fails,
+            -p.health,
+            p.latency
+        )
+    )
+    reply = None
+    for provider in providers:
+        if time.time() < provider.cooldown:
+            continue
+        reply = await provider.attempt_request(body)
+        if reply:
+            logger.info(
+                f"✅ Served by {provider.name}"
+            )
+            break
+    # =====================================================
+    # FALLBACK
+    # =====================================================
+    if not reply:
+        try:
+            from g4f.client import Client
+            loop = asyncio.get_event_loop()
+            def fallback_req():
+                return (
+                    Client()
+                    .chat.completions
+                    .create(
+                        model=model,
+                        messages=messages
                     )
+                    .choices[0]
+                    .message.content
+                )
+            reply = await loop.run_in_executor(
+                EXECUTOR,
+                fallback_req
+            )
+            logger.info(
+                "🔄 Served via fallback"
+            )
+        except Exception as e:
+            logger.warning(
+                f"Fallback failed: {e}"
+            )
+    if not reply:
+        raise HTTPException(
+            status_code=502,
+            detail="All providers failed"
+        )
+    # =====================================================
+    # CLAUDE FORMAT
+    # =====================================================
+    if (
+        "messages" in path_lower
+        and "chat" not in path_lower
+    ):
         return {
+            "id": f"msg_{uuid.uuid4().hex}",
+            "type": "message",
+            "role": "assistant",
             "model": model,
+            "content": [
                 {
+                    "type": "text",
+                    "text": reply
                 }
             ],
+            "stop_reason": "end_turn"
         }
+    # =====================================================
+    # OPENAI FORMAT
+    # =====================================================
+    return {
+        "id": f"chatcmpl-{uuid.uuid4().hex}",
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": reply
+                },
+                "finish_reason": "stop"
+            }
+        ]
+    }
 # =========================================================
+# RUN
 # =========================================================
 if __name__ == "__main__":
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=PORT,
+        log_level="info"
+    )