bahi-bh commited on
Commit
32d4bc3
·
verified ·
1 Parent(s): b8ac474

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +776 -360
app.py CHANGED
@@ -6,20 +6,24 @@ import asyncio
6
  import logging
7
  import uvicorn
8
  import random
 
9
  from fastapi import FastAPI, HTTPException, Request, Response
10
  from fastapi.middleware.cors import CORSMiddleware
 
11
  from concurrent.futures import ThreadPoolExecutor
12
  from curl_cffi import requests as curl_requests
 
13
  from typing import Dict, List, Optional, Tuple
14
 
15
  # =========================================================
16
- # 1. الإعدادات العليا (Orchestration Config)
17
  # =========================================================
 
18
  API_KEY = os.environ.get("API_KEY", "sk-your-secret-key")
19
  PORT = int(os.environ.get("PORT", 7860))
 
20
  MAX_WORKERS = 50
21
  VALIDATION_INTERVAL = 300
22
- GLOBAL_TIMEOUT = 60
23
  CACHE_FILE = "models_cache.json"
24
 
25
  MODEL_BLACKLIST = [
@@ -39,20 +43,27 @@ MODEL_BLACKLIST = [
39
  VALIDATION_PROMPT = [
40
  {
41
  "role": "user",
42
- "content": "Reply only with: OK"
43
  }
44
  ]
45
 
46
- # Capability Registry
47
  WORKING_MODELS: Dict[str, dict] = {}
48
  PROVIDER_MODEL_MAP: Dict[str, List[str]] = {}
49
 
50
- # Atomic State & Concurrency Control
51
  STATE_LOCK = asyncio.Lock()
52
  REQUEST_LIMITER = asyncio.Semaphore(25)
 
53
  EXECUTOR = ThreadPoolExecutor(max_workers=MAX_WORKERS)
54
 
55
- app = FastAPI(title="Omega Orchestrator", version="2.0")
 
 
 
 
 
 
 
 
56
  app.add_middleware(
57
  CORSMiddleware,
58
  allow_origins=["*"],
@@ -60,16 +71,27 @@ app.add_middleware(
60
  allow_headers=["*"]
61
  )
62
 
63
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
64
- logger = logging.getLogger("ORCHESTRATOR")
 
 
 
 
65
 
 
 
 
66
 
67
  def get_stealth_headers():
 
68
  return {
69
  "User-Agent": (
70
- f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
71
- f"AppleWebKit/537.36 (KHTML, like Gecko) "
72
- f"Chrome/{random.randint(120, 124)}.0.0.0 Safari/537.36"
 
 
 
73
  ),
74
  "Origin": "https://g4f.space",
75
  "Referer": "https://g4f.space/",
@@ -79,33 +101,25 @@ def get_stealth_headers():
79
  }
80
 
81
 
82
- # =========================================================
83
- # 2. التحقق من المصادقة (Auth Verification) — مُصلح
84
- # =========================================================
85
  def verify_api_key(request: Request) -> bool:
86
- """
87
- يدعم ثلاث طرق للمصادقة:
88
- 1. Authorization: Bearer <key>
89
- 2. x-api-key: <key>
90
- 3. api-key: <key>
91
- إذا لم يُضبط API_KEY فالوصول مفتوح.
92
- """
93
  if not API_KEY or API_KEY == "sk-your-secret-key":
94
- return True # No key configured → open access
95
 
96
  auth_header = request.headers.get("Authorization", "")
97
- x_api_key = request.headers.get("x-api-key", "")
98
  api_key_hdr = request.headers.get("api-key", "")
99
 
100
  candidates = []
101
 
102
- # Bearer token
103
  if auth_header.startswith("Bearer "):
104
- candidates.append(auth_header[len("Bearer "):].strip())
 
 
105
 
106
- # x-api-key / api-key headers
107
  if x_api_key:
108
  candidates.append(x_api_key.strip())
 
109
  if api_key_hdr:
110
  candidates.append(api_key_hdr.strip())
111
 
@@ -113,538 +127,940 @@ def verify_api_key(request: Request) -> bool:
113
 
114
 
115
  # =========================================================
116
- # 3. مستخرج المحتوى المركزي (Central Content Extractor)
117
  # =========================================================
 
118
  def extract_content(data) -> Optional[str]:
 
119
  if not isinstance(data, dict):
120
  return None
121
 
122
- # Direct content fields
123
- for field in ("response", "content", "text", "output", "result", "generated_text"):
124
- if field in data and isinstance(data[field], str) and data[field].strip():
125
- return data[field]
126
-
127
- # OpenAI-style choices
128
- if "choices" in data and isinstance(data["choices"], list) and data["choices"]:
129
- choice = data["choices"][0]
130
- if isinstance(choice, dict):
131
- msg = choice.get("message", {})
132
- if isinstance(msg, dict) and "content" in msg:
133
- return msg["content"]
134
- if "text" in choice and choice["text"].strip():
135
- return choice["text"]
136
- if "delta" in choice and "content" in choice["delta"]:
137
- return choice["delta"]["content"]
138
-
139
- # Anthropic-style message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  if "message" in data:
141
- m = data["message"]
142
- if isinstance(m, dict) and "content" in m:
143
- c = m["content"]
144
- # content can be a list of blocks
145
- if isinstance(c, list):
146
- texts = [b.get("text", "") for b in c if isinstance(b, dict) and b.get("type") == "text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  combined = "".join(texts)
 
148
  if combined.strip():
149
  return combined
150
- elif isinstance(c, str) and c.strip():
151
- return c
152
- elif isinstance(m, str) and m.strip():
153
- return m
154
-
155
- # Nested data.message
156
- if "data" in data and isinstance(data["data"], dict):
157
- return extract_content(data["data"])
158
-
159
- # Completion field (older APIs)
160
- if "completion" in data and isinstance(data["completion"], str):
161
- return data["completion"]
162
 
163
  return None
164
 
165
 
166
  # =========================================================
167
- # 4. مزودات الخدمة (Provider Classes)
168
  # =========================================================
 
169
  class BaseProvider:
 
170
  def __init__(self, name: str, url: str):
 
171
  self.name = name
172
  self.url = url
173
- self.headers = get_stealth_headers()
174
  self.aliases: Dict[str, str] = {}
 
175
  self.fails = 0
176
  self.success = 0
 
177
  self.cooldown = 0.0
178
- self.latency = 0.0
179
  self.health = 100
 
180
 
181
- def update_health(self):
182
- total = self.success + self.fails
183
- if total > 0:
184
- self.health = int((self.success / total) * 100)
185
 
186
- # ------------------------------------------------------------------
187
- # Model Discovery
188
- # ------------------------------------------------------------------
189
  async def fetch_models(self) -> List[str]:
 
190
  loop = asyncio.get_event_loop()
 
191
  try:
 
192
  async with REQUEST_LIMITER:
193
- models = await loop.run_in_executor(EXECUTOR, self._fetch_models_sync)
 
 
 
 
 
194
  return list(set(models))
 
195
  except Exception as e:
196
- logger.debug(f"[{self.name}] fetch_models error: {e}")
 
 
 
 
197
  return []
198
 
199
  def _fetch_models_sync(self) -> List[str]:
200
- """
201
- يجرب نقاط نهاية متعددة لاستخراج جميع النماذج المتاحة.
202
- يدعم كافة الهياكل الشائعة: قوائم مسطّحة، قواميس، بيانات متداخلة.
203
- """
204
- discovered: List[str] = []
205
  endpoints = [
206
  f"{self.url}/v1/models",
207
  f"{self.url}/models",
208
- self.url,
209
  ]
210
 
211
  with curl_requests.Session() as session:
 
212
  for endpoint in endpoints:
 
213
  try:
 
214
  resp = session.get(
215
  endpoint,
216
- headers=self.headers,
217
  impersonate="chrome124",
218
- timeout=10
219
  )
 
220
  if resp.status_code != 200:
221
  continue
222
- data = resp.json()
223
- extracted = self._parse_models_response(data)
224
- if extracted:
225
- discovered.extend(extracted)
226
- break # Found models, no need to try other endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  except Exception:
228
  continue
229
 
230
- return discovered
231
-
232
- @staticmethod
233
- def _parse_models_response(data) -> List[str]:
234
- """
235
- يستخرج معرّفات النماذج من أي هيكل بيانات محتمل.
236
- """
237
- ids: List[str] = []
238
-
239
- if isinstance(data, list):
240
- for item in data:
241
- if isinstance(item, str):
242
- ids.append(item)
243
- elif isinstance(item, dict):
244
- for key in ("id", "name", "model", "model_id"):
245
- if key in item and isinstance(item[key], str):
246
- ids.append(item[key])
247
- break
248
-
249
- elif isinstance(data, dict):
250
- # OpenAI-style: {"data": [...]}
251
- if "data" in data and isinstance(data["data"], list):
252
- ids.extend(BaseProvider._parse_models_response(data["data"]))
253
-
254
- # {"models": [...]}
255
- elif "models" in data and isinstance(data["models"], list):
256
- ids.extend(BaseProvider._parse_models_response(data["models"]))
257
-
258
- # {"result": [...]} or {"results": [...]}
259
- elif "result" in data and isinstance(data["result"], list):
260
- ids.extend(BaseProvider._parse_models_response(data["result"]))
261
- elif "results" in data and isinstance(data["results"], list):
262
- ids.extend(BaseProvider._parse_models_response(data["results"]))
263
-
264
- # Single model dict
265
- elif "id" in data:
266
- ids.append(data["id"])
267
-
268
- return ids
269
-
270
- # ------------------------------------------------------------------
271
- # Validation
272
- # ------------------------------------------------------------------
273
- async def validate_model(self, model: str) -> Tuple[bool, float]:
274
- payload = {"model": model, "messages": VALIDATION_PROMPT}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  start = time.time()
 
276
  result = await self.attempt_request(payload)
 
277
  latency = time.time() - start
278
 
279
- ok = result is not None and "ok" in result.strip().lower()
280
- if ok:
 
 
 
281
  self.success += 1
282
- self.latency = (self.latency + latency) / 2 if self.latency > 0 else latency
283
- else:
284
- self.fails += 1
285
- self.update_health()
286
- return ok, latency
287
 
288
- # ------------------------------------------------------------------
289
- # Request Handling
290
- # ------------------------------------------------------------------
291
- async def attempt_request(self, payload: dict) -> Optional[str]:
292
- payload = payload.copy() # prevent mutation leakage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
  if time.time() < self.cooldown:
295
  return None
296
 
 
 
297
  model_id = payload.get("model", "")
298
- payload["model"] = self.aliases.get(model_id, model_id)
 
 
 
299
 
300
  try:
 
301
  async with REQUEST_LIMITER:
 
302
  loop = asyncio.get_event_loop()
303
- content = await loop.run_in_executor(EXECUTOR, self._make_request, payload)
304
 
305
- if content:
 
 
 
 
 
 
 
306
  self.fails = 0
307
  self.success += 1
308
- self.update_health()
309
- return content
 
 
310
 
311
  self.fails += 1
312
- self.update_health()
 
 
313
  if self.fails >= 3:
314
  self.cooldown = time.time() + 60
 
315
  return None
316
 
317
  except Exception:
 
318
  self.fails += 1
319
- self.update_health()
 
 
320
  return None
321
 
322
- def _make_request(self, payload: dict) -> Optional[str]:
 
 
 
 
323
  with curl_requests.Session() as session:
 
324
  try:
 
325
  resp = session.post(
326
  self.url,
327
- headers=self.headers,
328
  json=payload,
329
  impersonate="chrome124",
330
  timeout=25
331
  )
332
- if resp.status_code == 200:
 
 
 
 
333
  data = resp.json()
334
- content = extract_content(data)
335
- if content and str(content).strip():
336
- return str(content).strip()
 
 
337
  except Exception:
338
- pass
339
- return None
340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
- # ------------------------------------------------------------------
343
- # Provider Definitions
344
- # ------------------------------------------------------------------
345
  class GroqProvider(BaseProvider):
 
346
  def __init__(self):
347
- super().__init__("Groq", "https://g4f.space/api/groq")
348
- self.aliases = {"gpt-4o": "llama-3-70b"}
 
 
 
 
 
 
 
349
 
350
 
351
  class GeminiProvider(BaseProvider):
 
352
  def __init__(self):
353
- super().__init__("Gemini", "https://g4f.space/api/gemini")
354
- self.aliases = {"claude-3-5-sonnet": "gemini-1.5-pro"}
 
 
 
 
 
 
 
355
 
356
 
357
  class PollinationsProvider(BaseProvider):
 
358
  def __init__(self):
359
- super().__init__("Pollinations", "https://g4f.space/api/pollinations")
360
- self.aliases = {"gpt-4o": "gpt-4"}
 
 
 
 
 
 
 
361
 
362
 
363
  class OllamaProvider(BaseProvider):
 
364
  def __init__(self):
365
- super().__init__("Ollama", "https://g4f.space/api/ollama")
366
 
 
 
 
 
367
 
368
- class PerplexityProvider(BaseProvider):
369
- def __init__(self):
370
- super().__init__("Perplexity", "https://g4f.space/api/perplexity")
371
 
 
372
 
373
- class OpenRouterProvider(BaseProvider):
374
- """
375
- مزوّد إضافي: OpenRouter — يتيح الوصول إلى مئات النماذج تلقائياً.
376
- """
377
  def __init__(self):
378
- super().__init__("OpenRouter", "https://openrouter.ai/api/v1/chat/completions")
379
- self.models_url = "https://openrouter.ai/api/v1/models"
380
 
381
- async def fetch_models(self) -> List[str]:
382
- loop = asyncio.get_event_loop()
383
- try:
384
- async with REQUEST_LIMITER:
385
- models = await loop.run_in_executor(EXECUTOR, self._fetch_openrouter_models)
386
- return list(set(models))
387
- except Exception as e:
388
- logger.debug(f"[OpenRouter] fetch_models error: {e}")
389
- return []
390
 
391
- def _fetch_openrouter_models(self) -> List[str]:
392
- with curl_requests.Session() as session:
393
- try:
394
- resp = session.get(self.models_url, headers=self.headers, impersonate="chrome124", timeout=10)
395
- if resp.status_code == 200:
396
- data = resp.json()
397
- return self._parse_models_response(data)
398
- except Exception:
399
- pass
400
- return []
401
 
402
-
403
- PROVIDER_INSTANCES: List[BaseProvider] = [
404
  GroqProvider(),
405
  GeminiProvider(),
406
  PollinationsProvider(),
407
  OllamaProvider(),
408
- PerplexityProvider(),
409
- OpenRouterProvider(),
410
  ]
411
 
412
-
413
  # =========================================================
414
- # 5. إدارة الحالة والتخزين المؤقت
415
  # =========================================================
 
416
  async def load_cache():
417
- global WORKING_MODELS, PROVIDER_MODEL_MAP
 
 
 
 
 
 
418
  try:
419
- if os.path.exists(CACHE_FILE):
420
- with open(CACHE_FILE, "r", encoding="utf-8") as f:
421
- data = json.load(f)
422
- async with STATE_LOCK:
423
- WORKING_MODELS = data.get("WORKING_MODELS", {})
424
- PROVIDER_MODEL_MAP = data.get("PROVIDER_MODEL_MAP", {})
425
- logger.info(f"✅ Cache loaded — {len(WORKING_MODELS)} models.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  except Exception as e:
427
- logger.error(f"⚠️ Cache load error: {e}")
 
 
 
428
 
429
 
430
  async def save_cache():
 
431
  try:
 
432
  async with STATE_LOCK:
 
433
  snapshot = {
434
- "WORKING_MODELS": dict(WORKING_MODELS),
435
- "PROVIDER_MODEL_MAP": dict(PROVIDER_MODEL_MAP)
436
  }
437
- with open(CACHE_FILE, "w", encoding="utf-8") as f:
438
- json.dump(snapshot, f, indent=4, ensure_ascii=False)
439
- logger.info("💾 Cache saved.")
 
 
 
 
 
 
 
 
 
 
 
440
  except Exception as e:
441
- logger.error(f"⚠️ Cache save error: {e}")
442
 
 
 
 
443
 
444
  # =========================================================
445
- # 6. محرك الاستكشاف والتحقق
446
  # =========================================================
 
447
  async def discovery_engine():
 
 
 
 
448
  await load_cache()
449
 
450
  while True:
451
- logger.info("📡 Starting validated discovery cycle …")
452
 
453
- fresh_provider_map: Dict[str, List[str]] = {}
454
- fresh_working_models: Dict[str, dict] = {}
 
455
 
456
- # Run provider discovery concurrently
457
- tasks = {provider: asyncio.create_task(provider.fetch_models()) for provider in PROVIDER_INSTANCES}
 
 
 
 
 
 
 
458
 
459
  for provider, task in tasks.items():
460
- discovered = await task
461
- clean_models: List[str] = []
462
 
463
- # Filter & validate
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  validation_tasks = []
465
- filtered = [
466
- m for m in discovered
467
- if not any(bl in m.lower() for bl in MODEL_BLACKLIST)
468
- ]
469
 
470
- logger.info(f"[{provider.name}] Discovered {len(filtered)} candidate models — validating …")
471
 
472
- for model in filtered:
473
- validation_tasks.append((model, asyncio.create_task(provider.validate_model(model))))
 
 
 
 
 
 
474
 
475
  for model, vtask in validation_tasks:
 
476
  try:
477
- is_valid, latency = await vtask
 
 
478
  except Exception:
479
- is_valid, latency = False, 0.0
480
 
481
- if is_valid:
482
- clean_models.append(model)
483
 
484
- if model not in fresh_working_models:
485
- fresh_working_models[model] = {
486
- "providers": [],
487
- "latency": latency,
488
- "health": provider.health,
489
- "aliases": provider.aliases
490
- }
491
 
492
- info = fresh_working_models[model]
493
- if provider.url not in info["providers"]:
494
- info["providers"].append(provider.url)
495
- info["latency"] = (info["latency"] + latency) / 2
496
- info["health"] = (info["health"] + provider.health) // 2
497
 
498
- fresh_provider_map[provider.url] = clean_models
499
- logger.info(f"✅ [{provider.name}] {len(clean_models)} valid models.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
 
501
- # Atomic publish
502
  async with STATE_LOCK:
503
- global PROVIDER_MODEL_MAP, WORKING_MODELS
504
  PROVIDER_MODEL_MAP = fresh_provider_map
505
  WORKING_MODELS = fresh_working_models
506
 
507
  await save_cache()
508
- logger.info(f"🚀 Orchestrator ready — {len(WORKING_MODELS)} active models.")
509
- await asyncio.sleep(VALIDATION_INTERVAL)
510
 
 
 
 
 
 
 
 
511
 
512
  # =========================================================
513
- # 7. المعالج المركزي (Omega Handler)
514
  # =========================================================
 
515
  @app.on_event("startup")
516
  async def startup():
517
- asyncio.create_task(discovery_engine())
518
 
 
 
 
519
 
520
- @app.api_route("/{path:path}", methods=["GET", "HEAD", "POST", "OPTIONS"])
521
- async def omega_handler(request: Request, path: str):
522
- # ---- CORS preflight ----
523
- if request.method == "OPTIONS":
524
- return Response(status_code=204, headers={
525
- "Access-Control-Allow-Origin": "*",
526
- "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
527
- "Access-Control-Allow-Headers": "*"
528
- })
 
 
 
529
 
530
  path_lower = path.lower().strip("/")
531
 
532
- # ---- Model listing ----
533
- if request.method in ("GET", "HEAD") and ("models" in path_lower or path_lower in ("", "v1", "v1/")):
534
- if "models" in path_lower:
535
- async with STATE_LOCK:
536
- model_ids = list(WORKING_MODELS.keys()) if WORKING_MODELS else ["gpt-4o", "claude-3-5-sonnet"]
537
- return {
538
- "object": "list",
539
- "data": [
540
- {
541
- "id": m,
542
- "object": "model",
543
- "created": int(time.time()),
544
- "owned_by": "omega-orchestrator"
545
- }
546
- for m in sorted(model_ids)
547
- ]
548
- }
549
- return Response(status_code=200)
550
 
551
- # ---- Chat completions ----
552
- if request.method == "POST" and any(x in path_lower for x in ("messages", "completions", "chat")):
553
 
554
- # --- Auth check (مُصلح) ---
555
- if not verify_api_key(request):
556
- raise HTTPException(status_code=401, detail="Unauthorized: invalid or missing API key.")
557
 
558
- # --- Parse body ---
559
- try:
560
- body = await request.json()
561
- except Exception:
562
- raise HTTPException(status_code=400, detail="Invalid JSON body.")
563
 
564
- model = body.get("model", "gpt-4o")
565
- messages = body.get("messages", [])
566
 
567
- if not messages:
568
- raise HTTPException(status_code=400, detail="messages field is required.")
 
 
569
 
570
- # --- Smart routing ---
571
- async with STATE_LOCK:
572
- model_info = WORKING_MODELS.get(model)
573
- if model_info and model_info.get("providers"):
574
- target_urls = list(model_info["providers"])
575
- else:
576
- target_urls = [p.url for p in PROVIDER_INSTANCES]
577
 
578
- providers = [p for p in PROVIDER_INSTANCES if p.url in target_urls]
579
 
580
- # Sort: fewest fails → highest health → lowest latency
581
- providers.sort(key=lambda p: (p.fails, -p.health, p.latency))
 
 
 
582
 
583
- reply: Optional[str] = None
 
 
 
 
 
 
 
 
 
 
 
584
 
585
- for provider in providers:
586
- if time.time() < provider.cooldown:
587
- continue
588
- reply = await provider.attempt_request(body)
589
- if reply:
590
- logger.info(f"✅ Served by [{provider.name}] model={model}")
591
- break
592
 
593
- # --- Internal fallback via g4f ---
594
- if not reply:
595
- try:
596
- from g4f.client import Client
597
- loop = asyncio.get_event_loop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
- def fallback_req():
600
- return (
601
- Client()
602
- .chat.completions.create(model=model, messages=messages)
603
- .choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
  )
 
 
 
605
 
606
- reply = await loop.run_in_executor(EXECUTOR, fallback_req)
607
- logger.info("🔄 Served via g4f fallback.")
608
- except Exception as e:
609
- logger.warning(f"g4f fallback failed: {e}")
610
-
611
- if not reply:
612
- raise HTTPException(status_code=502, detail="Orchestration Failed: all routes exhausted.")
613
-
614
- # --- Format response ---
615
- # Anthropic messages format
616
- if "messages" in path_lower and "chat" not in path_lower:
617
- return {
618
- "id": f"msg_{uuid.uuid4().hex}",
619
- "type": "message",
620
- "role": "assistant",
621
- "model": model,
622
- "content": [{"type": "text", "text": reply}],
623
- "stop_reason": "end_turn",
624
- "usage": {"input_tokens": 0, "output_tokens": 0}
625
- }
 
 
 
 
 
 
 
 
 
 
626
 
627
- # OpenAI chat.completions format (default)
628
  return {
629
- "id": f"chatcmpl-{uuid.uuid4().hex}",
630
- "object": "chat.completion",
631
- "created": int(time.time()),
632
  "model": model,
633
- "choices": [
634
  {
635
- "index": 0,
636
- "message": {"role": "assistant", "content": reply},
637
- "finish_reason": "stop"
638
  }
639
  ],
640
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
641
  }
642
 
643
- return Response(status_code=404)
 
 
644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
 
646
  # =========================================================
647
- # 8. نقطة الدخول
648
  # =========================================================
 
649
  if __name__ == "__main__":
650
- uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info")
 
 
 
 
 
 
 
6
  import logging
7
  import uvicorn
8
  import random
9
+
10
  from fastapi import FastAPI, HTTPException, Request, Response
11
  from fastapi.middleware.cors import CORSMiddleware
12
+
13
  from concurrent.futures import ThreadPoolExecutor
14
  from curl_cffi import requests as curl_requests
15
+
16
  from typing import Dict, List, Optional, Tuple
17
 
18
  # =========================================================
19
+ # CONFIG
20
  # =========================================================
21
+
22
  API_KEY = os.environ.get("API_KEY", "sk-your-secret-key")
23
  PORT = int(os.environ.get("PORT", 7860))
24
+
25
  MAX_WORKERS = 50
26
  VALIDATION_INTERVAL = 300
 
27
  CACHE_FILE = "models_cache.json"
28
 
29
  MODEL_BLACKLIST = [
 
43
  VALIDATION_PROMPT = [
44
  {
45
  "role": "user",
46
+ "content": "Reply only with OK"
47
  }
48
  ]
49
 
 
50
  WORKING_MODELS: Dict[str, dict] = {}
51
  PROVIDER_MODEL_MAP: Dict[str, List[str]] = {}
52
 
 
53
  STATE_LOCK = asyncio.Lock()
54
  REQUEST_LIMITER = asyncio.Semaphore(25)
55
+
56
  EXECUTOR = ThreadPoolExecutor(max_workers=MAX_WORKERS)
57
 
58
+ # =========================================================
59
+ # FASTAPI
60
+ # =========================================================
61
+
62
+ app = FastAPI(
63
+ title="Omega Orchestrator",
64
+ version="3.0"
65
+ )
66
+
67
  app.add_middleware(
68
  CORSMiddleware,
69
  allow_origins=["*"],
 
71
  allow_headers=["*"]
72
  )
73
 
74
+ logging.basicConfig(
75
+ level=logging.INFO,
76
+ format="%(asctime)s - %(levelname)s - %(message)s"
77
+ )
78
+
79
+ logger = logging.getLogger("OMEGA")
80
 
81
+ # =========================================================
82
+ # HELPERS
83
+ # =========================================================
84
 
85
  def get_stealth_headers():
86
+
87
  return {
88
  "User-Agent": (
89
+ f"Mozilla/5.0 "
90
+ f"(Windows NT 10.0; Win64; x64) "
91
+ f"AppleWebKit/537.36 "
92
+ f"(KHTML, like Gecko) "
93
+ f"Chrome/{random.randint(120,124)}.0.0.0 "
94
+ f"Safari/537.36"
95
  ),
96
  "Origin": "https://g4f.space",
97
  "Referer": "https://g4f.space/",
 
101
  }
102
 
103
 
 
 
 
104
  def verify_api_key(request: Request) -> bool:
105
+
 
 
 
 
 
 
106
  if not API_KEY or API_KEY == "sk-your-secret-key":
107
+ return True
108
 
109
  auth_header = request.headers.get("Authorization", "")
110
+ x_api_key = request.headers.get("x-api-key", "")
111
  api_key_hdr = request.headers.get("api-key", "")
112
 
113
  candidates = []
114
 
 
115
  if auth_header.startswith("Bearer "):
116
+ candidates.append(
117
+ auth_header[len("Bearer "):].strip()
118
+ )
119
 
 
120
  if x_api_key:
121
  candidates.append(x_api_key.strip())
122
+
123
  if api_key_hdr:
124
  candidates.append(api_key_hdr.strip())
125
 
 
127
 
128
 
129
  # =========================================================
130
+ # CONTENT EXTRACTION
131
  # =========================================================
132
+
133
  def extract_content(data) -> Optional[str]:
134
+
135
  if not isinstance(data, dict):
136
  return None
137
 
138
+ for field in (
139
+ "response",
140
+ "content",
141
+ "text",
142
+ "output",
143
+ "result",
144
+ "generated_text",
145
+ "completion"
146
+ ):
147
+
148
+ value = data.get(field)
149
+
150
+ if isinstance(value, str) and value.strip():
151
+ return value
152
+
153
+ if "choices" in data:
154
+
155
+ choices = data.get("choices")
156
+
157
+ if isinstance(choices, list) and choices:
158
+
159
+ choice = choices[0]
160
+
161
+ if isinstance(choice, dict):
162
+
163
+ msg = choice.get("message")
164
+
165
+ if isinstance(msg, dict):
166
+
167
+ content = msg.get("content")
168
+
169
+ if isinstance(content, str):
170
+ return content
171
+
172
+ txt = choice.get("text")
173
+
174
+ if isinstance(txt, str):
175
+ return txt
176
+
177
  if "message" in data:
178
+
179
+ message = data["message"]
180
+
181
+ if isinstance(message, str):
182
+ return message
183
+
184
+ if isinstance(message, dict):
185
+
186
+ content = message.get("content")
187
+
188
+ if isinstance(content, str):
189
+ return content
190
+
191
+ if isinstance(content, list):
192
+
193
+ texts = []
194
+
195
+ for block in content:
196
+
197
+ if (
198
+ isinstance(block, dict)
199
+ and block.get("type") == "text"
200
+ ):
201
+ texts.append(block.get("text", ""))
202
+
203
  combined = "".join(texts)
204
+
205
  if combined.strip():
206
  return combined
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  return None
209
 
210
 
211
  # =========================================================
212
+ # PROVIDERS
213
  # =========================================================
214
+
215
  class BaseProvider:
216
+
217
  def __init__(self, name: str, url: str):
218
+
219
  self.name = name
220
  self.url = url
221
+
222
  self.aliases: Dict[str, str] = {}
223
+
224
  self.fails = 0
225
  self.success = 0
226
+
227
  self.cooldown = 0.0
228
+
229
  self.health = 100
230
+ self.latency = 0.0
231
 
232
+ # =====================================================
233
+ # AUTO MODEL IMPORT
234
+ # =====================================================
 
235
 
 
 
 
236
  async def fetch_models(self) -> List[str]:
237
+
238
  loop = asyncio.get_event_loop()
239
+
240
  try:
241
+
242
  async with REQUEST_LIMITER:
243
+
244
+ models = await loop.run_in_executor(
245
+ EXECUTOR,
246
+ self._fetch_models_sync
247
+ )
248
+
249
  return list(set(models))
250
+
251
  except Exception as e:
252
+
253
+ logger.debug(
254
+ f"[{self.name}] fetch_models error: {e}"
255
+ )
256
+
257
  return []
258
 
259
  def _fetch_models_sync(self) -> List[str]:
260
+
261
+ discovered = set()
262
+
 
 
263
  endpoints = [
264
  f"{self.url}/v1/models",
265
  f"{self.url}/models",
266
+ self.url
267
  ]
268
 
269
  with curl_requests.Session() as session:
270
+
271
  for endpoint in endpoints:
272
+
273
  try:
274
+
275
  resp = session.get(
276
  endpoint,
277
+ headers=get_stealth_headers(),
278
  impersonate="chrome124",
279
+ timeout=15
280
  )
281
+
282
  if resp.status_code != 200:
283
  continue
284
+
285
+ try:
286
+ data = resp.json()
287
+ except:
288
+ continue
289
+
290
+ extracted = self._deep_extract_models(data)
291
+
292
+ for model in extracted:
293
+
294
+ if not isinstance(model, str):
295
+ continue
296
+
297
+ model = model.strip()
298
+
299
+ if not model:
300
+ continue
301
+
302
+ if any(
303
+ x in model.lower()
304
+ for x in MODEL_BLACKLIST
305
+ ):
306
+ continue
307
+
308
+ discovered.add(model)
309
+
310
  except Exception:
311
  continue
312
 
313
+ return list(discovered)
314
+
315
+ # =====================================================
316
+ # FULL DEEP AUTO EXTRACTION
317
+ # =====================================================
318
+
319
+ def _deep_extract_models(self, data):
320
+
321
+ discovered = set()
322
+
323
+ model_keys = {
324
+ "id",
325
+ "model",
326
+ "model_id",
327
+ "name",
328
+ "slug"
329
+ }
330
+
331
+ model_tokens = [
332
+ "gpt",
333
+ "claude",
334
+ "llama",
335
+ "gemini",
336
+ "mixtral",
337
+ "mistral",
338
+ "phi",
339
+ "qwen",
340
+ "deepseek",
341
+ "command",
342
+ "sonnet",
343
+ "opus",
344
+ "haiku"
345
+ ]
346
+
347
+ def walker(obj):
348
+
349
+ # dict
350
+ if isinstance(obj, dict):
351
+
352
+ for key, value in obj.items():
353
+
354
+ if key.lower() in model_keys:
355
+
356
+ if isinstance(value, str):
357
+
358
+ lower = value.lower()
359
+
360
+ if any(
361
+ token in lower
362
+ for token in model_tokens
363
+ ):
364
+
365
+ if len(value) < 80:
366
+ discovered.add(value)
367
+
368
+ walker(value)
369
+
370
+ # list
371
+ elif isinstance(obj, list):
372
+
373
+ for item in obj:
374
+ walker(item)
375
+
376
+ # raw strings
377
+ elif isinstance(obj, str):
378
+
379
+ lower = obj.lower()
380
+
381
+ if any(
382
+ token in lower
383
+ for token in model_tokens
384
+ ):
385
+
386
+ if len(obj) < 80:
387
+ discovered.add(obj)
388
+
389
+ walker(data)
390
+
391
+ return list(discovered)
392
+
393
+ # =====================================================
394
+ # VALIDATION
395
+ # =====================================================
396
+
397
+ async def validate_model(
398
+ self,
399
+ model: str
400
+ ) -> Tuple[bool, float]:
401
+
402
+ payload = {
403
+ "model": model,
404
+ "messages": VALIDATION_PROMPT
405
+ }
406
+
407
  start = time.time()
408
+
409
  result = await self.attempt_request(payload)
410
+
411
  latency = time.time() - start
412
 
413
+ if (
414
+ result
415
+ and "ok" in result.lower()
416
+ ):
417
+
418
  self.success += 1
 
 
 
 
 
419
 
420
+ self.latency = (
421
+ (self.latency + latency) / 2
422
+ if self.latency > 0
423
+ else latency
424
+ )
425
+
426
+ self._update_health()
427
+
428
+ return True, latency
429
+
430
+ self.fails += 1
431
+
432
+ self._update_health()
433
+
434
+ return False, latency
435
+
436
+ # =====================================================
437
+ # REQUEST
438
+ # =====================================================
439
+
440
+ async def attempt_request(
441
+ self,
442
+ payload: dict
443
+ ) -> Optional[str]:
444
 
445
  if time.time() < self.cooldown:
446
  return None
447
 
448
+ payload = payload.copy()
449
+
450
  model_id = payload.get("model", "")
451
+
452
+ payload["model"] = (
453
+ self.aliases.get(model_id, model_id)
454
+ )
455
 
456
  try:
457
+
458
  async with REQUEST_LIMITER:
459
+
460
  loop = asyncio.get_event_loop()
 
461
 
462
+ result = await loop.run_in_executor(
463
+ EXECUTOR,
464
+ self._make_request,
465
+ payload
466
+ )
467
+
468
+ if result:
469
+
470
  self.fails = 0
471
  self.success += 1
472
+
473
+ self._update_health()
474
+
475
+ return result
476
 
477
  self.fails += 1
478
+
479
+ self._update_health()
480
+
481
  if self.fails >= 3:
482
  self.cooldown = time.time() + 60
483
+
484
  return None
485
 
486
  except Exception:
487
+
488
  self.fails += 1
489
+
490
+ self._update_health()
491
+
492
  return None
493
 
494
+ def _make_request(
495
+ self,
496
+ payload: dict
497
+ ) -> Optional[str]:
498
+
499
  with curl_requests.Session() as session:
500
+
501
  try:
502
+
503
  resp = session.post(
504
  self.url,
505
+ headers=get_stealth_headers(),
506
  json=payload,
507
  impersonate="chrome124",
508
  timeout=25
509
  )
510
+
511
+ if resp.status_code != 200:
512
+ return None
513
+
514
+ try:
515
  data = resp.json()
516
+ except:
517
+ return None
518
+
519
+ return extract_content(data)
520
+
521
  except Exception:
522
+ return None
 
523
 
524
+ # =====================================================
525
+ # HEALTH
526
+ # =====================================================
527
+
528
+ def _update_health(self):
529
+
530
+ total = self.success + self.fails
531
+
532
+ if total > 0:
533
+
534
+ self.health = int(
535
+ (self.success / total) * 100
536
+ )
537
+
538
+
539
+ # =========================================================
540
+ # PROVIDERS
541
+ # =========================================================
542
 
 
 
 
543
  class GroqProvider(BaseProvider):
544
+
545
  def __init__(self):
546
+
547
+ super().__init__(
548
+ "Groq",
549
+ "https://g4f.space/api/groq"
550
+ )
551
+
552
+ self.aliases = {
553
+ "gpt-4o": "llama-3-70b"
554
+ }
555
 
556
 
557
  class GeminiProvider(BaseProvider):
558
+
559
  def __init__(self):
560
+
561
+ super().__init__(
562
+ "Gemini",
563
+ "https://g4f.space/api/gemini"
564
+ )
565
+
566
+ self.aliases = {
567
+ "claude-3-5-sonnet": "gemini-1.5-pro"
568
+ }
569
 
570
 
571
  class PollinationsProvider(BaseProvider):
572
+
573
  def __init__(self):
574
+
575
+ super().__init__(
576
+ "Pollinations",
577
+ "https://g4f.space/api/pollinations"
578
+ )
579
+
580
+ self.aliases = {
581
+ "gpt-4o": "gpt-4"
582
+ }
583
 
584
 
585
  class OllamaProvider(BaseProvider):
586
+
587
  def __init__(self):
 
588
 
589
+ super().__init__(
590
+ "Ollama",
591
+ "https://g4f.space/api/ollama"
592
+ )
593
 
 
 
 
594
 
595
+ class PerplexityProvider(BaseProvider):
596
 
 
 
 
 
597
  def __init__(self):
 
 
598
 
599
+ super().__init__(
600
+ "Perplexity",
601
+ "https://g4f.space/api/perplexity"
602
+ )
 
 
 
 
 
603
 
 
 
 
 
 
 
 
 
 
 
604
 
605
+ PROVIDER_INSTANCES = [
 
606
  GroqProvider(),
607
  GeminiProvider(),
608
  PollinationsProvider(),
609
  OllamaProvider(),
610
+ PerplexityProvider()
 
611
  ]
612
 
 
613
  # =========================================================
614
+ # CACHE
615
  # =========================================================
616
+
617
  async def load_cache():
618
+
619
+ global WORKING_MODELS
620
+ global PROVIDER_MODEL_MAP
621
+
622
+ if not os.path.exists(CACHE_FILE):
623
+ return
624
+
625
  try:
626
+
627
+ with open(
628
+ CACHE_FILE,
629
+ "r",
630
+ encoding="utf-8"
631
+ ) as f:
632
+
633
+ data = json.load(f)
634
+
635
+ async with STATE_LOCK:
636
+
637
+ WORKING_MODELS = data.get(
638
+ "WORKING_MODELS",
639
+ {}
640
+ )
641
+
642
+ PROVIDER_MODEL_MAP = data.get(
643
+ "PROVIDER_MODEL_MAP",
644
+ {}
645
+ )
646
+
647
+ logger.info(
648
+ f"✅ Cache loaded ({len(WORKING_MODELS)} models)"
649
+ )
650
+
651
  except Exception as e:
652
+
653
+ logger.error(
654
+ f"Cache load error: {e}"
655
+ )
656
 
657
 
658
  async def save_cache():
659
+
660
  try:
661
+
662
  async with STATE_LOCK:
663
+
664
  snapshot = {
665
+ "WORKING_MODELS": WORKING_MODELS,
666
+ "PROVIDER_MODEL_MAP": PROVIDER_MODEL_MAP
667
  }
668
+
669
+ with open(
670
+ CACHE_FILE,
671
+ "w",
672
+ encoding="utf-8"
673
+ ) as f:
674
+
675
+ json.dump(
676
+ snapshot,
677
+ f,
678
+ indent=4,
679
+ ensure_ascii=False
680
+ )
681
+
682
  except Exception as e:
 
683
 
684
+ logger.error(
685
+ f"Cache save error: {e}"
686
+ )
687
 
688
  # =========================================================
689
+ # DISCOVERY ENGINE
690
  # =========================================================
691
+
692
  async def discovery_engine():
693
+
694
+ global WORKING_MODELS
695
+ global PROVIDER_MODEL_MAP
696
+
697
  await load_cache()
698
 
699
  while True:
 
700
 
701
+ logger.info(
702
+ "📡 Discovery cycle started..."
703
+ )
704
 
705
+ fresh_provider_map = {}
706
+ fresh_working_models = {}
707
+
708
+ tasks = {
709
+ provider: asyncio.create_task(
710
+ provider.fetch_models()
711
+ )
712
+ for provider in PROVIDER_INSTANCES
713
+ }
714
 
715
  for provider, task in tasks.items():
 
 
716
 
717
+ try:
718
+
719
+ discovered = await task
720
+
721
+ except Exception:
722
+
723
+ discovered = []
724
+
725
+ logger.info(
726
+ f"[{provider.name}] discovered {len(discovered)} models"
727
+ )
728
+
729
+ clean_models = []
730
+
731
  validation_tasks = []
 
 
 
 
732
 
733
+ for model in discovered:
734
 
735
+ validation_tasks.append(
736
+ (
737
+ model,
738
+ asyncio.create_task(
739
+ provider.validate_model(model)
740
+ )
741
+ )
742
+ )
743
 
744
  for model, vtask in validation_tasks:
745
+
746
  try:
747
+
748
+ valid, latency = await vtask
749
+
750
  except Exception:
 
751
 
752
+ valid = False
753
+ latency = 0
754
 
755
+ if not valid:
756
+ continue
 
 
 
 
 
757
 
758
+ clean_models.append(model)
 
 
 
 
759
 
760
+ if model not in fresh_working_models:
761
+
762
+ fresh_working_models[model] = {
763
+ "providers": [],
764
+ "latency": latency,
765
+ "health": provider.health
766
+ }
767
+
768
+ info = fresh_working_models[model]
769
+
770
+ if provider.url not in info["providers"]:
771
+ info["providers"].append(provider.url)
772
+
773
+ fresh_provider_map[
774
+ provider.url
775
+ ] = clean_models
776
+
777
+ logger.info(
778
+ f"[{provider.name}] validated {len(clean_models)} models"
779
+ )
780
 
 
781
  async with STATE_LOCK:
782
+
783
  PROVIDER_MODEL_MAP = fresh_provider_map
784
  WORKING_MODELS = fresh_working_models
785
 
786
  await save_cache()
 
 
787
 
788
+ logger.info(
789
+ f"🚀 Active models: {len(WORKING_MODELS)}"
790
+ )
791
+
792
+ await asyncio.sleep(
793
+ VALIDATION_INTERVAL
794
+ )
795
 
796
  # =========================================================
797
+ # STARTUP
798
  # =========================================================
799
+
800
  @app.on_event("startup")
801
  async def startup():
 
802
 
803
+ asyncio.create_task(
804
+ discovery_engine()
805
+ )
806
 
807
+ # =========================================================
808
+ # UNIVERSAL ROUTER
809
+ # =========================================================
810
+
811
+ @app.api_route(
812
+ "/{path:path}",
813
+ methods=["GET", "POST", "HEAD", "OPTIONS"]
814
+ )
815
+ async def universal_handler(
816
+ request: Request,
817
+ path: str
818
+ ):
819
 
820
  path_lower = path.lower().strip("/")
821
 
822
+ # =====================================================
823
+ # OPTIONS
824
+ # =====================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
 
826
+ if request.method == "OPTIONS":
 
827
 
828
+ return Response(
829
+ status_code=204
830
+ )
831
 
832
+ # =====================================================
833
+ # HEALTH
834
+ # =====================================================
 
 
835
 
836
+ if request.method in ("GET", "HEAD"):
 
837
 
838
+ if (
839
+ path_lower in ("", "v1", "v1/")
840
+ or "models" in path_lower
841
+ ):
842
 
843
+ if "models" in path_lower:
 
 
 
 
 
 
844
 
845
+ async with STATE_LOCK:
846
 
847
+ models = (
848
+ list(WORKING_MODELS.keys())
849
+ if WORKING_MODELS
850
+ else ["gpt-4o"]
851
+ )
852
 
853
+ return {
854
+ "object": "list",
855
+ "data": [
856
+ {
857
+ "id": model,
858
+ "object": "model",
859
+ "created": int(time.time()),
860
+ "owned_by": "omega"
861
+ }
862
+ for model in sorted(models)
863
+ ]
864
+ }
865
 
866
+ return Response(status_code=200)
 
 
 
 
 
 
867
 
868
+ # =====================================================
869
+ # AUTH
870
+ # =====================================================
871
+
872
+ if not verify_api_key(request):
873
+
874
+ raise HTTPException(
875
+ status_code=401,
876
+ detail="Unauthorized"
877
+ )
878
+
879
+ # =====================================================
880
+ # REQUEST BODY
881
+ # =====================================================
882
+
883
+ try:
884
+
885
+ body = await request.json()
886
+
887
+ except Exception:
888
+
889
+ raise HTTPException(
890
+ status_code=400,
891
+ detail="Invalid JSON"
892
+ )
893
+
894
+ model = body.get(
895
+ "model",
896
+ "gpt-4o"
897
+ )
898
+
899
+ messages = body.get(
900
+ "messages",
901
+ []
902
+ )
903
+
904
+ if not messages:
905
+
906
+ raise HTTPException(
907
+ status_code=400,
908
+ detail="messages required"
909
+ )
910
+
911
+ # =====================================================
912
+ # ROUTING
913
+ # =====================================================
914
+
915
+ async with STATE_LOCK:
916
+
917
+ model_info = WORKING_MODELS.get(model)
918
 
919
+ if (
920
+ model_info
921
+ and model_info.get("providers")
922
+ ):
923
+
924
+ target_urls = model_info["providers"]
925
+
926
+ else:
927
+
928
+ target_urls = [
929
+ p.url
930
+ for p in PROVIDER_INSTANCES
931
+ ]
932
+
933
+ providers = [
934
+ p
935
+ for p in PROVIDER_INSTANCES
936
+ if p.url in target_urls
937
+ ]
938
+
939
+ providers.sort(
940
+ key=lambda p: (
941
+ p.fails,
942
+ -p.health,
943
+ p.latency
944
+ )
945
+ )
946
+
947
+ reply = None
948
+
949
+ for provider in providers:
950
+
951
+ if time.time() < provider.cooldown:
952
+ continue
953
+
954
+ reply = await provider.attempt_request(body)
955
+
956
+ if reply:
957
+
958
+ logger.info(
959
+ f"✅ Served by {provider.name}"
960
+ )
961
+
962
+ break
963
+
964
+ # =====================================================
965
+ # FALLBACK
966
+ # =====================================================
967
+
968
+ if not reply:
969
+
970
+ try:
971
+
972
+ from g4f.client import Client
973
+
974
+ loop = asyncio.get_event_loop()
975
+
976
+ def fallback_req():
977
+
978
+ return (
979
+ Client()
980
+ .chat.completions
981
+ .create(
982
+ model=model,
983
+ messages=messages
984
  )
985
+ .choices[0]
986
+ .message.content
987
+ )
988
 
989
+ reply = await loop.run_in_executor(
990
+ EXECUTOR,
991
+ fallback_req
992
+ )
993
+
994
+ logger.info(
995
+ "🔄 Served via fallback"
996
+ )
997
+
998
+ except Exception as e:
999
+
1000
+ logger.warning(
1001
+ f"Fallback failed: {e}"
1002
+ )
1003
+
1004
+ if not reply:
1005
+
1006
+ raise HTTPException(
1007
+ status_code=502,
1008
+ detail="All providers failed"
1009
+ )
1010
+
1011
+ # =====================================================
1012
+ # CLAUDE FORMAT
1013
+ # =====================================================
1014
+
1015
+ if (
1016
+ "messages" in path_lower
1017
+ and "chat" not in path_lower
1018
+ ):
1019
 
 
1020
  return {
1021
+ "id": f"msg_{uuid.uuid4().hex}",
1022
+ "type": "message",
1023
+ "role": "assistant",
1024
  "model": model,
1025
+ "content": [
1026
  {
1027
+ "type": "text",
1028
+ "text": reply
 
1029
  }
1030
  ],
1031
+ "stop_reason": "end_turn"
1032
  }
1033
 
1034
+ # =====================================================
1035
+ # OPENAI FORMAT
1036
+ # =====================================================
1037
 
1038
+ return {
1039
+ "id": f"chatcmpl-{uuid.uuid4().hex}",
1040
+ "object": "chat.completion",
1041
+ "created": int(time.time()),
1042
+ "model": model,
1043
+ "choices": [
1044
+ {
1045
+ "index": 0,
1046
+ "message": {
1047
+ "role": "assistant",
1048
+ "content": reply
1049
+ },
1050
+ "finish_reason": "stop"
1051
+ }
1052
+ ]
1053
+ }
1054
 
1055
  # =========================================================
1056
+ # RUN
1057
  # =========================================================
1058
+
1059
  if __name__ == "__main__":
1060
+
1061
+ uvicorn.run(
1062
+ app,
1063
+ host="0.0.0.0",
1064
+ port=PORT,
1065
+ log_level="info"
1066
+ )