bahi-bh commited on
Commit
f0b5452
ยท
verified ยท
1 Parent(s): 32d4bc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +360 -776
app.py CHANGED
@@ -6,24 +6,20 @@ import asyncio
6
  import logging
7
  import uvicorn
8
  import random
9
-
10
  from fastapi import FastAPI, HTTPException, Request, Response
11
  from fastapi.middleware.cors import CORSMiddleware
12
-
13
  from concurrent.futures import ThreadPoolExecutor
14
  from curl_cffi import requests as curl_requests
15
-
16
  from typing import Dict, List, Optional, Tuple
17
 
18
  # =========================================================
19
- # CONFIG
20
  # =========================================================
21
-
22
  API_KEY = os.environ.get("API_KEY", "sk-your-secret-key")
23
  PORT = int(os.environ.get("PORT", 7860))
24
-
25
  MAX_WORKERS = 50
26
  VALIDATION_INTERVAL = 300
 
27
  CACHE_FILE = "models_cache.json"
28
 
29
  MODEL_BLACKLIST = [
@@ -43,27 +39,20 @@ MODEL_BLACKLIST = [
43
  VALIDATION_PROMPT = [
44
  {
45
  "role": "user",
46
- "content": "Reply only with OK"
47
  }
48
  ]
49
 
 
50
  WORKING_MODELS: Dict[str, dict] = {}
51
  PROVIDER_MODEL_MAP: Dict[str, List[str]] = {}
52
 
 
53
  STATE_LOCK = asyncio.Lock()
54
  REQUEST_LIMITER = asyncio.Semaphore(25)
55
-
56
  EXECUTOR = ThreadPoolExecutor(max_workers=MAX_WORKERS)
57
 
58
- # =========================================================
59
- # FASTAPI
60
- # =========================================================
61
-
62
- app = FastAPI(
63
- title="Omega Orchestrator",
64
- version="3.0"
65
- )
66
-
67
  app.add_middleware(
68
  CORSMiddleware,
69
  allow_origins=["*"],
@@ -71,27 +60,16 @@ app.add_middleware(
71
  allow_headers=["*"]
72
  )
73
 
74
- logging.basicConfig(
75
- level=logging.INFO,
76
- format="%(asctime)s - %(levelname)s - %(message)s"
77
- )
78
-
79
- logger = logging.getLogger("OMEGA")
80
 
81
- # =========================================================
82
- # HELPERS
83
- # =========================================================
84
 
85
  def get_stealth_headers():
86
-
87
  return {
88
  "User-Agent": (
89
- f"Mozilla/5.0 "
90
- f"(Windows NT 10.0; Win64; x64) "
91
- f"AppleWebKit/537.36 "
92
- f"(KHTML, like Gecko) "
93
- f"Chrome/{random.randint(120,124)}.0.0.0 "
94
- f"Safari/537.36"
95
  ),
96
  "Origin": "https://g4f.space",
97
  "Referer": "https://g4f.space/",
@@ -101,25 +79,33 @@ def get_stealth_headers():
101
  }
102
 
103
 
 
 
 
104
  def verify_api_key(request: Request) -> bool:
105
-
 
 
 
 
 
 
106
  if not API_KEY or API_KEY == "sk-your-secret-key":
107
- return True
108
 
109
  auth_header = request.headers.get("Authorization", "")
110
- x_api_key = request.headers.get("x-api-key", "")
111
  api_key_hdr = request.headers.get("api-key", "")
112
 
113
  candidates = []
114
 
 
115
  if auth_header.startswith("Bearer "):
116
- candidates.append(
117
- auth_header[len("Bearer "):].strip()
118
- )
119
 
 
120
  if x_api_key:
121
  candidates.append(x_api_key.strip())
122
-
123
  if api_key_hdr:
124
  candidates.append(api_key_hdr.strip())
125
 
@@ -127,940 +113,538 @@ def verify_api_key(request: Request) -> bool:
127
 
128
 
129
  # =========================================================
130
- # CONTENT EXTRACTION
131
  # =========================================================
132
-
133
  def extract_content(data) -> Optional[str]:
134
-
135
  if not isinstance(data, dict):
136
  return None
137
 
138
- for field in (
139
- "response",
140
- "content",
141
- "text",
142
- "output",
143
- "result",
144
- "generated_text",
145
- "completion"
146
- ):
147
-
148
- value = data.get(field)
149
-
150
- if isinstance(value, str) and value.strip():
151
- return value
152
-
153
- if "choices" in data:
154
-
155
- choices = data.get("choices")
156
-
157
- if isinstance(choices, list) and choices:
158
-
159
- choice = choices[0]
160
-
161
- if isinstance(choice, dict):
162
-
163
- msg = choice.get("message")
164
-
165
- if isinstance(msg, dict):
166
-
167
- content = msg.get("content")
168
-
169
- if isinstance(content, str):
170
- return content
171
-
172
- txt = choice.get("text")
173
-
174
- if isinstance(txt, str):
175
- return txt
176
-
177
  if "message" in data:
178
-
179
- message = data["message"]
180
-
181
- if isinstance(message, str):
182
- return message
183
-
184
- if isinstance(message, dict):
185
-
186
- content = message.get("content")
187
-
188
- if isinstance(content, str):
189
- return content
190
-
191
- if isinstance(content, list):
192
-
193
- texts = []
194
-
195
- for block in content:
196
-
197
- if (
198
- isinstance(block, dict)
199
- and block.get("type") == "text"
200
- ):
201
- texts.append(block.get("text", ""))
202
-
203
  combined = "".join(texts)
204
-
205
  if combined.strip():
206
  return combined
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  return None
209
 
210
 
211
  # =========================================================
212
- # PROVIDERS
213
  # =========================================================
214
-
215
  class BaseProvider:
216
-
217
  def __init__(self, name: str, url: str):
218
-
219
  self.name = name
220
  self.url = url
221
-
222
  self.aliases: Dict[str, str] = {}
223
-
224
  self.fails = 0
225
  self.success = 0
226
-
227
  self.cooldown = 0.0
228
-
229
- self.health = 100
230
  self.latency = 0.0
 
231
 
232
- # =====================================================
233
- # AUTO MODEL IMPORT
234
- # =====================================================
 
235
 
 
 
 
236
  async def fetch_models(self) -> List[str]:
237
-
238
  loop = asyncio.get_event_loop()
239
-
240
  try:
241
-
242
  async with REQUEST_LIMITER:
243
-
244
- models = await loop.run_in_executor(
245
- EXECUTOR,
246
- self._fetch_models_sync
247
- )
248
-
249
  return list(set(models))
250
-
251
  except Exception as e:
252
-
253
- logger.debug(
254
- f"[{self.name}] fetch_models error: {e}"
255
- )
256
-
257
  return []
258
 
259
  def _fetch_models_sync(self) -> List[str]:
260
-
261
- discovered = set()
262
-
 
 
263
  endpoints = [
264
  f"{self.url}/v1/models",
265
  f"{self.url}/models",
266
- self.url
267
  ]
268
 
269
  with curl_requests.Session() as session:
270
-
271
  for endpoint in endpoints:
272
-
273
  try:
274
-
275
  resp = session.get(
276
  endpoint,
277
- headers=get_stealth_headers(),
278
  impersonate="chrome124",
279
- timeout=15
280
  )
281
-
282
  if resp.status_code != 200:
283
  continue
284
-
285
- try:
286
- data = resp.json()
287
- except:
288
- continue
289
-
290
- extracted = self._deep_extract_models(data)
291
-
292
- for model in extracted:
293
-
294
- if not isinstance(model, str):
295
- continue
296
-
297
- model = model.strip()
298
-
299
- if not model:
300
- continue
301
-
302
- if any(
303
- x in model.lower()
304
- for x in MODEL_BLACKLIST
305
- ):
306
- continue
307
-
308
- discovered.add(model)
309
-
310
  except Exception:
311
  continue
312
 
313
- return list(discovered)
314
-
315
- # =====================================================
316
- # FULL DEEP AUTO EXTRACTION
317
- # =====================================================
318
-
319
- def _deep_extract_models(self, data):
320
-
321
- discovered = set()
322
-
323
- model_keys = {
324
- "id",
325
- "model",
326
- "model_id",
327
- "name",
328
- "slug"
329
- }
330
-
331
- model_tokens = [
332
- "gpt",
333
- "claude",
334
- "llama",
335
- "gemini",
336
- "mixtral",
337
- "mistral",
338
- "phi",
339
- "qwen",
340
- "deepseek",
341
- "command",
342
- "sonnet",
343
- "opus",
344
- "haiku"
345
- ]
346
-
347
- def walker(obj):
348
-
349
- # dict
350
- if isinstance(obj, dict):
351
-
352
- for key, value in obj.items():
353
-
354
- if key.lower() in model_keys:
355
-
356
- if isinstance(value, str):
357
-
358
- lower = value.lower()
359
-
360
- if any(
361
- token in lower
362
- for token in model_tokens
363
- ):
364
-
365
- if len(value) < 80:
366
- discovered.add(value)
367
-
368
- walker(value)
369
-
370
- # list
371
- elif isinstance(obj, list):
372
-
373
- for item in obj:
374
- walker(item)
375
-
376
- # raw strings
377
- elif isinstance(obj, str):
378
-
379
- lower = obj.lower()
380
-
381
- if any(
382
- token in lower
383
- for token in model_tokens
384
- ):
385
-
386
- if len(obj) < 80:
387
- discovered.add(obj)
388
-
389
- walker(data)
390
-
391
- return list(discovered)
392
-
393
- # =====================================================
394
- # VALIDATION
395
- # =====================================================
396
-
397
- async def validate_model(
398
- self,
399
- model: str
400
- ) -> Tuple[bool, float]:
401
-
402
- payload = {
403
- "model": model,
404
- "messages": VALIDATION_PROMPT
405
- }
406
-
407
  start = time.time()
408
-
409
  result = await self.attempt_request(payload)
410
-
411
  latency = time.time() - start
412
 
413
- if (
414
- result
415
- and "ok" in result.lower()
416
- ):
417
-
418
  self.success += 1
 
 
 
 
 
419
 
420
- self.latency = (
421
- (self.latency + latency) / 2
422
- if self.latency > 0
423
- else latency
424
- )
425
-
426
- self._update_health()
427
-
428
- return True, latency
429
-
430
- self.fails += 1
431
-
432
- self._update_health()
433
-
434
- return False, latency
435
-
436
- # =====================================================
437
- # REQUEST
438
- # =====================================================
439
-
440
- async def attempt_request(
441
- self,
442
- payload: dict
443
- ) -> Optional[str]:
444
 
445
  if time.time() < self.cooldown:
446
  return None
447
 
448
- payload = payload.copy()
449
-
450
  model_id = payload.get("model", "")
451
-
452
- payload["model"] = (
453
- self.aliases.get(model_id, model_id)
454
- )
455
 
456
  try:
457
-
458
  async with REQUEST_LIMITER:
459
-
460
  loop = asyncio.get_event_loop()
 
461
 
462
- result = await loop.run_in_executor(
463
- EXECUTOR,
464
- self._make_request,
465
- payload
466
- )
467
-
468
- if result:
469
-
470
  self.fails = 0
471
  self.success += 1
472
-
473
- self._update_health()
474
-
475
- return result
476
 
477
  self.fails += 1
478
-
479
- self._update_health()
480
-
481
  if self.fails >= 3:
482
  self.cooldown = time.time() + 60
483
-
484
  return None
485
 
486
  except Exception:
487
-
488
  self.fails += 1
489
-
490
- self._update_health()
491
-
492
  return None
493
 
494
- def _make_request(
495
- self,
496
- payload: dict
497
- ) -> Optional[str]:
498
-
499
  with curl_requests.Session() as session:
500
-
501
  try:
502
-
503
  resp = session.post(
504
  self.url,
505
- headers=get_stealth_headers(),
506
  json=payload,
507
  impersonate="chrome124",
508
  timeout=25
509
  )
510
-
511
- if resp.status_code != 200:
512
- return None
513
-
514
- try:
515
  data = resp.json()
516
- except:
517
- return None
518
-
519
- return extract_content(data)
520
-
521
  except Exception:
522
- return None
523
-
524
- # =====================================================
525
- # HEALTH
526
- # =====================================================
527
-
528
- def _update_health(self):
529
-
530
- total = self.success + self.fails
531
-
532
- if total > 0:
533
-
534
- self.health = int(
535
- (self.success / total) * 100
536
- )
537
-
538
 
539
- # =========================================================
540
- # PROVIDERS
541
- # =========================================================
542
 
 
 
 
543
  class GroqProvider(BaseProvider):
544
-
545
  def __init__(self):
546
-
547
- super().__init__(
548
- "Groq",
549
- "https://g4f.space/api/groq"
550
- )
551
-
552
- self.aliases = {
553
- "gpt-4o": "llama-3-70b"
554
- }
555
 
556
 
557
  class GeminiProvider(BaseProvider):
558
-
559
  def __init__(self):
560
-
561
- super().__init__(
562
- "Gemini",
563
- "https://g4f.space/api/gemini"
564
- )
565
-
566
- self.aliases = {
567
- "claude-3-5-sonnet": "gemini-1.5-pro"
568
- }
569
 
570
 
571
  class PollinationsProvider(BaseProvider):
572
-
573
  def __init__(self):
574
-
575
- super().__init__(
576
- "Pollinations",
577
- "https://g4f.space/api/pollinations"
578
- )
579
-
580
- self.aliases = {
581
- "gpt-4o": "gpt-4"
582
- }
583
 
584
 
585
  class OllamaProvider(BaseProvider):
586
-
587
  def __init__(self):
588
-
589
- super().__init__(
590
- "Ollama",
591
- "https://g4f.space/api/ollama"
592
- )
593
 
594
 
595
  class PerplexityProvider(BaseProvider):
 
 
596
 
 
 
 
 
 
597
  def __init__(self):
 
 
598
 
599
- super().__init__(
600
- "Perplexity",
601
- "https://g4f.space/api/perplexity"
602
- )
 
 
 
 
 
603
 
 
 
 
 
 
 
 
 
 
 
604
 
605
- PROVIDER_INSTANCES = [
 
606
  GroqProvider(),
607
  GeminiProvider(),
608
  PollinationsProvider(),
609
  OllamaProvider(),
610
- PerplexityProvider()
 
611
  ]
612
 
 
613
  # =========================================================
614
- # CACHE
615
  # =========================================================
616
-
617
  async def load_cache():
618
-
619
- global WORKING_MODELS
620
- global PROVIDER_MODEL_MAP
621
-
622
- if not os.path.exists(CACHE_FILE):
623
- return
624
-
625
  try:
626
-
627
- with open(
628
- CACHE_FILE,
629
- "r",
630
- encoding="utf-8"
631
- ) as f:
632
-
633
- data = json.load(f)
634
-
635
- async with STATE_LOCK:
636
-
637
- WORKING_MODELS = data.get(
638
- "WORKING_MODELS",
639
- {}
640
- )
641
-
642
- PROVIDER_MODEL_MAP = data.get(
643
- "PROVIDER_MODEL_MAP",
644
- {}
645
- )
646
-
647
- logger.info(
648
- f"โœ… Cache loaded ({len(WORKING_MODELS)} models)"
649
- )
650
-
651
  except Exception as e:
652
-
653
- logger.error(
654
- f"Cache load error: {e}"
655
- )
656
 
657
 
658
  async def save_cache():
659
-
660
  try:
661
-
662
  async with STATE_LOCK:
663
-
664
  snapshot = {
665
- "WORKING_MODELS": WORKING_MODELS,
666
- "PROVIDER_MODEL_MAP": PROVIDER_MODEL_MAP
667
  }
668
-
669
- with open(
670
- CACHE_FILE,
671
- "w",
672
- encoding="utf-8"
673
- ) as f:
674
-
675
- json.dump(
676
- snapshot,
677
- f,
678
- indent=4,
679
- ensure_ascii=False
680
- )
681
-
682
  except Exception as e:
 
683
 
684
- logger.error(
685
- f"Cache save error: {e}"
686
- )
687
 
688
  # =========================================================
689
- # DISCOVERY ENGINE
690
  # =========================================================
691
-
692
  async def discovery_engine():
693
-
694
- global WORKING_MODELS
695
- global PROVIDER_MODEL_MAP
696
-
697
  await load_cache()
698
 
699
  while True:
 
700
 
701
- logger.info(
702
- "๐Ÿ“ก Discovery cycle started..."
703
- )
704
 
705
- fresh_provider_map = {}
706
- fresh_working_models = {}
707
-
708
- tasks = {
709
- provider: asyncio.create_task(
710
- provider.fetch_models()
711
- )
712
- for provider in PROVIDER_INSTANCES
713
- }
714
 
715
  for provider, task in tasks.items():
 
 
716
 
717
- try:
718
-
719
- discovered = await task
720
-
721
- except Exception:
722
-
723
- discovered = []
724
-
725
- logger.info(
726
- f"[{provider.name}] discovered {len(discovered)} models"
727
- )
728
-
729
- clean_models = []
730
-
731
  validation_tasks = []
 
 
 
 
732
 
733
- for model in discovered:
734
 
735
- validation_tasks.append(
736
- (
737
- model,
738
- asyncio.create_task(
739
- provider.validate_model(model)
740
- )
741
- )
742
- )
743
 
744
  for model, vtask in validation_tasks:
745
-
746
  try:
747
-
748
- valid, latency = await vtask
749
-
750
  except Exception:
 
751
 
752
- valid = False
753
- latency = 0
754
-
755
- if not valid:
756
- continue
757
-
758
- clean_models.append(model)
759
 
760
- if model not in fresh_working_models:
761
-
762
- fresh_working_models[model] = {
763
- "providers": [],
764
- "latency": latency,
765
- "health": provider.health
766
- }
767
-
768
- info = fresh_working_models[model]
769
-
770
- if provider.url not in info["providers"]:
771
- info["providers"].append(provider.url)
772
 
773
- fresh_provider_map[
774
- provider.url
775
- ] = clean_models
 
 
776
 
777
- logger.info(
778
- f"[{provider.name}] validated {len(clean_models)} models"
779
- )
780
 
 
781
  async with STATE_LOCK:
782
-
783
  PROVIDER_MODEL_MAP = fresh_provider_map
784
  WORKING_MODELS = fresh_working_models
785
 
786
  await save_cache()
 
 
787
 
788
- logger.info(
789
- f"๐Ÿš€ Active models: {len(WORKING_MODELS)}"
790
- )
791
-
792
- await asyncio.sleep(
793
- VALIDATION_INTERVAL
794
- )
795
 
796
  # =========================================================
797
- # STARTUP
798
  # =========================================================
799
-
800
  @app.on_event("startup")
801
  async def startup():
 
802
 
803
- asyncio.create_task(
804
- discovery_engine()
805
- )
806
-
807
- # =========================================================
808
- # UNIVERSAL ROUTER
809
- # =========================================================
810
-
811
- @app.api_route(
812
- "/{path:path}",
813
- methods=["GET", "POST", "HEAD", "OPTIONS"]
814
- )
815
- async def universal_handler(
816
- request: Request,
817
- path: str
818
- ):
819
-
820
- path_lower = path.lower().strip("/")
821
-
822
- # =====================================================
823
- # OPTIONS
824
- # =====================================================
825
 
 
 
 
826
  if request.method == "OPTIONS":
 
 
 
 
 
827
 
828
- return Response(
829
- status_code=204
830
- )
831
-
832
- # =====================================================
833
- # HEALTH
834
- # =====================================================
835
-
836
- if request.method in ("GET", "HEAD"):
837
-
838
- if (
839
- path_lower in ("", "v1", "v1/")
840
- or "models" in path_lower
841
- ):
842
-
843
- if "models" in path_lower:
844
-
845
- async with STATE_LOCK:
846
-
847
- models = (
848
- list(WORKING_MODELS.keys())
849
- if WORKING_MODELS
850
- else ["gpt-4o"]
851
- )
852
-
853
- return {
854
- "object": "list",
855
- "data": [
856
- {
857
- "id": model,
858
- "object": "model",
859
- "created": int(time.time()),
860
- "owned_by": "omega"
861
- }
862
- for model in sorted(models)
863
- ]
864
- }
865
-
866
- return Response(status_code=200)
867
-
868
- # =====================================================
869
- # AUTH
870
- # =====================================================
871
-
872
- if not verify_api_key(request):
873
-
874
- raise HTTPException(
875
- status_code=401,
876
- detail="Unauthorized"
877
- )
878
-
879
- # =====================================================
880
- # REQUEST BODY
881
- # =====================================================
882
-
883
- try:
884
-
885
- body = await request.json()
886
-
887
- except Exception:
888
-
889
- raise HTTPException(
890
- status_code=400,
891
- detail="Invalid JSON"
892
- )
893
-
894
- model = body.get(
895
- "model",
896
- "gpt-4o"
897
- )
898
-
899
- messages = body.get(
900
- "messages",
901
- []
902
- )
903
-
904
- if not messages:
905
-
906
- raise HTTPException(
907
- status_code=400,
908
- detail="messages required"
909
- )
910
-
911
- # =====================================================
912
- # ROUTING
913
- # =====================================================
914
-
915
- async with STATE_LOCK:
916
-
917
- model_info = WORKING_MODELS.get(model)
918
-
919
- if (
920
- model_info
921
- and model_info.get("providers")
922
- ):
923
-
924
- target_urls = model_info["providers"]
925
-
926
- else:
927
-
928
- target_urls = [
929
- p.url
930
- for p in PROVIDER_INSTANCES
931
- ]
932
-
933
- providers = [
934
- p
935
- for p in PROVIDER_INSTANCES
936
- if p.url in target_urls
937
- ]
938
-
939
- providers.sort(
940
- key=lambda p: (
941
- p.fails,
942
- -p.health,
943
- p.latency
944
- )
945
- )
946
-
947
- reply = None
948
-
949
- for provider in providers:
950
-
951
- if time.time() < provider.cooldown:
952
- continue
953
-
954
- reply = await provider.attempt_request(body)
955
-
956
- if reply:
957
-
958
- logger.info(
959
- f"โœ… Served by {provider.name}"
960
- )
961
 
962
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
963
 
964
- # =====================================================
965
- # FALLBACK
966
- # =====================================================
967
 
968
- if not reply:
 
 
969
 
 
970
  try:
 
 
 
971
 
972
- from g4f.client import Client
973
-
974
- loop = asyncio.get_event_loop()
975
-
976
- def fallback_req():
977
 
978
- return (
979
- Client()
980
- .chat.completions
981
- .create(
982
- model=model,
983
- messages=messages
984
- )
985
- .choices[0]
986
- .message.content
987
- )
988
 
989
- reply = await loop.run_in_executor(
990
- EXECUTOR,
991
- fallback_req
992
- )
 
 
 
993
 
994
- logger.info(
995
- "๐Ÿ”„ Served via fallback"
996
- )
997
 
998
- except Exception as e:
 
999
 
1000
- logger.warning(
1001
- f"Fallback failed: {e}"
1002
- )
1003
 
1004
- if not reply:
 
 
 
 
 
 
1005
 
1006
- raise HTTPException(
1007
- status_code=502,
1008
- detail="All providers failed"
1009
- )
 
1010
 
1011
- # =====================================================
1012
- # CLAUDE FORMAT
1013
- # =====================================================
 
 
 
1014
 
1015
- if (
1016
- "messages" in path_lower
1017
- and "chat" not in path_lower
1018
- ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1019
 
 
1020
  return {
1021
- "id": f"msg_{uuid.uuid4().hex}",
1022
- "type": "message",
1023
- "role": "assistant",
1024
  "model": model,
1025
- "content": [
1026
  {
1027
- "type": "text",
1028
- "text": reply
 
1029
  }
1030
  ],
1031
- "stop_reason": "end_turn"
1032
  }
1033
 
1034
- # =====================================================
1035
- # OPENAI FORMAT
1036
- # =====================================================
1037
 
1038
- return {
1039
- "id": f"chatcmpl-{uuid.uuid4().hex}",
1040
- "object": "chat.completion",
1041
- "created": int(time.time()),
1042
- "model": model,
1043
- "choices": [
1044
- {
1045
- "index": 0,
1046
- "message": {
1047
- "role": "assistant",
1048
- "content": reply
1049
- },
1050
- "finish_reason": "stop"
1051
- }
1052
- ]
1053
- }
1054
 
1055
  # =========================================================
1056
- # RUN
1057
  # =========================================================
1058
-
1059
  if __name__ == "__main__":
1060
-
1061
- uvicorn.run(
1062
- app,
1063
- host="0.0.0.0",
1064
- port=PORT,
1065
- log_level="info"
1066
- )
 
6
  import logging
7
  import uvicorn
8
  import random
 
9
  from fastapi import FastAPI, HTTPException, Request, Response
10
  from fastapi.middleware.cors import CORSMiddleware
 
11
  from concurrent.futures import ThreadPoolExecutor
12
  from curl_cffi import requests as curl_requests
 
13
  from typing import Dict, List, Optional, Tuple
14
 
15
  # =========================================================
16
+ # 1. ุงู„ุฅุนุฏุงุฏุงุช ุงู„ุนู„ูŠุง (Orchestration Config)
17
  # =========================================================
 
18
  API_KEY = os.environ.get("API_KEY", "sk-your-secret-key")
19
  PORT = int(os.environ.get("PORT", 7860))
 
20
  MAX_WORKERS = 50
21
  VALIDATION_INTERVAL = 300
22
+ GLOBAL_TIMEOUT = 60
23
  CACHE_FILE = "models_cache.json"
24
 
25
  MODEL_BLACKLIST = [
 
39
  VALIDATION_PROMPT = [
40
  {
41
  "role": "user",
42
+ "content": "Reply only with: OK"
43
  }
44
  ]
45
 
46
+ # Capability Registry
47
  WORKING_MODELS: Dict[str, dict] = {}
48
  PROVIDER_MODEL_MAP: Dict[str, List[str]] = {}
49
 
50
+ # Atomic State & Concurrency Control
51
  STATE_LOCK = asyncio.Lock()
52
  REQUEST_LIMITER = asyncio.Semaphore(25)
 
53
  EXECUTOR = ThreadPoolExecutor(max_workers=MAX_WORKERS)
54
 
55
+ app = FastAPI(title="Omega Orchestrator", version="2.0")
 
 
 
 
 
 
 
 
56
  app.add_middleware(
57
  CORSMiddleware,
58
  allow_origins=["*"],
 
60
  allow_headers=["*"]
61
  )
62
 
63
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
64
+ logger = logging.getLogger("ORCHESTRATOR")
 
 
 
 
65
 
 
 
 
66
 
67
  def get_stealth_headers():
 
68
  return {
69
  "User-Agent": (
70
+ f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
71
+ f"AppleWebKit/537.36 (KHTML, like Gecko) "
72
+ f"Chrome/{random.randint(120, 124)}.0.0.0 Safari/537.36"
 
 
 
73
  ),
74
  "Origin": "https://g4f.space",
75
  "Referer": "https://g4f.space/",
 
79
  }
80
 
81
 
82
+ # =========================================================
83
+ # 2. ุงู„ุชุญู‚ู‚ ู…ู† ุงู„ู…ุตุงุฏู‚ุฉ (Auth Verification) โ€” ู…ูุตู„ุญ
84
+ # =========================================================
85
  def verify_api_key(request: Request) -> bool:
86
+ """
87
+ ูŠุฏุนู… ุซู„ุงุซ ุทุฑู‚ ู„ู„ู…ุตุงุฏู‚ุฉ:
88
+ 1. Authorization: Bearer <key>
89
+ 2. x-api-key: <key>
90
+ 3. api-key: <key>
91
+ ุฅุฐุง ู„ู… ูŠูุถุจุท API_KEY ูุงู„ูˆุตูˆู„ ู…ูุชูˆุญ.
92
+ """
93
  if not API_KEY or API_KEY == "sk-your-secret-key":
94
+ return True # No key configured โ†’ open access
95
 
96
  auth_header = request.headers.get("Authorization", "")
97
+ x_api_key = request.headers.get("x-api-key", "")
98
  api_key_hdr = request.headers.get("api-key", "")
99
 
100
  candidates = []
101
 
102
+ # Bearer token
103
  if auth_header.startswith("Bearer "):
104
+ candidates.append(auth_header[len("Bearer "):].strip())
 
 
105
 
106
+ # x-api-key / api-key headers
107
  if x_api_key:
108
  candidates.append(x_api_key.strip())
 
109
  if api_key_hdr:
110
  candidates.append(api_key_hdr.strip())
111
 
 
113
 
114
 
115
  # =========================================================
116
+ # 3. ู…ุณุชุฎุฑุฌ ุงู„ู…ุญุชูˆู‰ ุงู„ู…ุฑูƒุฒูŠ (Central Content Extractor)
117
  # =========================================================
 
118
  def extract_content(data) -> Optional[str]:
 
119
  if not isinstance(data, dict):
120
  return None
121
 
122
+ # Direct content fields
123
+ for field in ("response", "content", "text", "output", "result", "generated_text"):
124
+ if field in data and isinstance(data[field], str) and data[field].strip():
125
+ return data[field]
126
+
127
+ # OpenAI-style choices
128
+ if "choices" in data and isinstance(data["choices"], list) and data["choices"]:
129
+ choice = data["choices"][0]
130
+ if isinstance(choice, dict):
131
+ msg = choice.get("message", {})
132
+ if isinstance(msg, dict) and "content" in msg:
133
+ return msg["content"]
134
+ if "text" in choice and choice["text"].strip():
135
+ return choice["text"]
136
+ if "delta" in choice and "content" in choice["delta"]:
137
+ return choice["delta"]["content"]
138
+
139
+ # Anthropic-style message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  if "message" in data:
141
+ m = data["message"]
142
+ if isinstance(m, dict) and "content" in m:
143
+ c = m["content"]
144
+ # content can be a list of blocks
145
+ if isinstance(c, list):
146
+ texts = [b.get("text", "") for b in c if isinstance(b, dict) and b.get("type") == "text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  combined = "".join(texts)
 
148
  if combined.strip():
149
  return combined
150
+ elif isinstance(c, str) and c.strip():
151
+ return c
152
+ elif isinstance(m, str) and m.strip():
153
+ return m
154
+
155
+ # Nested data.message
156
+ if "data" in data and isinstance(data["data"], dict):
157
+ return extract_content(data["data"])
158
+
159
+ # Completion field (older APIs)
160
+ if "completion" in data and isinstance(data["completion"], str):
161
+ return data["completion"]
162
 
163
  return None
164
 
165
 
166
  # =========================================================
167
+ # 4. ู…ุฒูˆุฏุงุช ุงู„ุฎุฏู…ุฉ (Provider Classes)
168
  # =========================================================
 
169
  class BaseProvider:
 
170
  def __init__(self, name: str, url: str):
 
171
  self.name = name
172
  self.url = url
173
+ self.headers = get_stealth_headers()
174
  self.aliases: Dict[str, str] = {}
 
175
  self.fails = 0
176
  self.success = 0
 
177
  self.cooldown = 0.0
 
 
178
  self.latency = 0.0
179
+ self.health = 100
180
 
181
+ def update_health(self):
182
+ total = self.success + self.fails
183
+ if total > 0:
184
+ self.health = int((self.success / total) * 100)
185
 
186
+ # ------------------------------------------------------------------
187
+ # Model Discovery
188
+ # ------------------------------------------------------------------
189
  async def fetch_models(self) -> List[str]:
 
190
  loop = asyncio.get_event_loop()
 
191
  try:
 
192
  async with REQUEST_LIMITER:
193
+ models = await loop.run_in_executor(EXECUTOR, self._fetch_models_sync)
 
 
 
 
 
194
  return list(set(models))
 
195
  except Exception as e:
196
+ logger.debug(f"[{self.name}] fetch_models error: {e}")
 
 
 
 
197
  return []
198
 
199
  def _fetch_models_sync(self) -> List[str]:
200
+ """
201
+ ูŠุฌุฑุจ ู†ู‚ุงุท ู†ู‡ุงูŠุฉ ู…ุชุนุฏุฏุฉ ู„ุงุณุชุฎุฑุงุฌ ุฌู…ูŠุน ุงู„ู†ู…ุงุฐุฌ ุงู„ู…ุชุงุญุฉ.
202
+ ูŠุฏุนู… ูƒุงูุฉ ุงู„ู‡ูŠุงูƒู„ ุงู„ุดุงุฆุนุฉ: ู‚ูˆุงุฆู… ู…ุณุทู‘ุญุฉุŒ ู‚ูˆุงู…ูŠุณุŒ ุจูŠุงู†ุงุช ู…ุชุฏุงุฎู„ุฉ.
203
+ """
204
+ discovered: List[str] = []
205
  endpoints = [
206
  f"{self.url}/v1/models",
207
  f"{self.url}/models",
208
+ self.url,
209
  ]
210
 
211
  with curl_requests.Session() as session:
 
212
  for endpoint in endpoints:
 
213
  try:
 
214
  resp = session.get(
215
  endpoint,
216
+ headers=self.headers,
217
  impersonate="chrome124",
218
+ timeout=10
219
  )
 
220
  if resp.status_code != 200:
221
  continue
222
+ data = resp.json()
223
+ extracted = self._parse_models_response(data)
224
+ if extracted:
225
+ discovered.extend(extracted)
226
+ break # Found models, no need to try other endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  except Exception:
228
  continue
229
 
230
+ return discovered
231
+
232
+ @staticmethod
233
+ def _parse_models_response(data) -> List[str]:
234
+ """
235
+ ูŠุณุชุฎุฑุฌ ู…ุนุฑู‘ูุงุช ุงู„ู†ู…ุงุฐุฌ ู…ู† ุฃูŠ ู‡ูŠูƒู„ ุจูŠุงู†ุงุช ู…ุญุชู…ู„.
236
+ """
237
+ ids: List[str] = []
238
+
239
+ if isinstance(data, list):
240
+ for item in data:
241
+ if isinstance(item, str):
242
+ ids.append(item)
243
+ elif isinstance(item, dict):
244
+ for key in ("id", "name", "model", "model_id"):
245
+ if key in item and isinstance(item[key], str):
246
+ ids.append(item[key])
247
+ break
248
+
249
+ elif isinstance(data, dict):
250
+ # OpenAI-style: {"data": [...]}
251
+ if "data" in data and isinstance(data["data"], list):
252
+ ids.extend(BaseProvider._parse_models_response(data["data"]))
253
+
254
+ # {"models": [...]}
255
+ elif "models" in data and isinstance(data["models"], list):
256
+ ids.extend(BaseProvider._parse_models_response(data["models"]))
257
+
258
+ # {"result": [...]} or {"results": [...]}
259
+ elif "result" in data and isinstance(data["result"], list):
260
+ ids.extend(BaseProvider._parse_models_response(data["result"]))
261
+ elif "results" in data and isinstance(data["results"], list):
262
+ ids.extend(BaseProvider._parse_models_response(data["results"]))
263
+
264
+ # Single model dict
265
+ elif "id" in data:
266
+ ids.append(data["id"])
267
+
268
+ return ids
269
+
270
+ # ------------------------------------------------------------------
271
+ # Validation
272
+ # ------------------------------------------------------------------
273
+ async def validate_model(self, model: str) -> Tuple[bool, float]:
274
+ payload = {"model": model, "messages": VALIDATION_PROMPT}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  start = time.time()
 
276
  result = await self.attempt_request(payload)
 
277
  latency = time.time() - start
278
 
279
+ ok = result is not None and "ok" in result.strip().lower()
280
+ if ok:
 
 
 
281
  self.success += 1
282
+ self.latency = (self.latency + latency) / 2 if self.latency > 0 else latency
283
+ else:
284
+ self.fails += 1
285
+ self.update_health()
286
+ return ok, latency
287
 
288
+ # ------------------------------------------------------------------
289
+ # Request Handling
290
+ # ------------------------------------------------------------------
291
+ async def attempt_request(self, payload: dict) -> Optional[str]:
292
+ payload = payload.copy() # prevent mutation leakage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
  if time.time() < self.cooldown:
295
  return None
296
 
 
 
297
  model_id = payload.get("model", "")
298
+ payload["model"] = self.aliases.get(model_id, model_id)
 
 
 
299
 
300
  try:
 
301
  async with REQUEST_LIMITER:
 
302
  loop = asyncio.get_event_loop()
303
+ content = await loop.run_in_executor(EXECUTOR, self._make_request, payload)
304
 
305
+ if content:
 
 
 
 
 
 
 
306
  self.fails = 0
307
  self.success += 1
308
+ self.update_health()
309
+ return content
 
 
310
 
311
  self.fails += 1
312
+ self.update_health()
 
 
313
  if self.fails >= 3:
314
  self.cooldown = time.time() + 60
 
315
  return None
316
 
317
  except Exception:
 
318
  self.fails += 1
319
+ self.update_health()
 
 
320
  return None
321
 
322
+ def _make_request(self, payload: dict) -> Optional[str]:
 
 
 
 
323
  with curl_requests.Session() as session:
 
324
  try:
 
325
  resp = session.post(
326
  self.url,
327
+ headers=self.headers,
328
  json=payload,
329
  impersonate="chrome124",
330
  timeout=25
331
  )
332
+ if resp.status_code == 200:
 
 
 
 
333
  data = resp.json()
334
+ content = extract_content(data)
335
+ if content and str(content).strip():
336
+ return str(content).strip()
 
 
337
  except Exception:
338
+ pass
339
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
 
 
 
341
 
342
+ # ------------------------------------------------------------------
343
+ # Provider Definitions
344
+ # ------------------------------------------------------------------
345
  class GroqProvider(BaseProvider):
 
346
  def __init__(self):
347
+ super().__init__("Groq", "https://g4f.space/api/groq")
348
+ self.aliases = {"gpt-4o": "llama-3-70b"}
 
 
 
 
 
 
 
349
 
350
 
351
  class GeminiProvider(BaseProvider):
 
352
  def __init__(self):
353
+ super().__init__("Gemini", "https://g4f.space/api/gemini")
354
+ self.aliases = {"claude-3-5-sonnet": "gemini-1.5-pro"}
 
 
 
 
 
 
 
355
 
356
 
357
  class PollinationsProvider(BaseProvider):
 
358
  def __init__(self):
359
+ super().__init__("Pollinations", "https://g4f.space/api/pollinations")
360
+ self.aliases = {"gpt-4o": "gpt-4"}
 
 
 
 
 
 
 
361
 
362
 
363
  class OllamaProvider(BaseProvider):
 
364
  def __init__(self):
365
+ super().__init__("Ollama", "https://g4f.space/api/ollama")
 
 
 
 
366
 
367
 
368
  class PerplexityProvider(BaseProvider):
369
+ def __init__(self):
370
+ super().__init__("Perplexity", "https://g4f.space/api/perplexity")
371
 
372
+
373
+ class OpenRouterProvider(BaseProvider):
374
+ """
375
+ ู…ุฒูˆู‘ุฏ ุฅุถุงููŠ: OpenRouter โ€” ูŠุชูŠุญ ุงู„ูˆุตูˆู„ ุฅู„ู‰ ู…ุฆุงุช ุงู„ู†ู…ุงุฐุฌ ุชู„ู‚ุงุฆูŠุงู‹.
376
+ """
377
  def __init__(self):
378
+ super().__init__("OpenRouter", "https://openrouter.ai/api/v1/chat/completions")
379
+ self.models_url = "https://openrouter.ai/api/v1/models"
380
 
381
+ async def fetch_models(self) -> List[str]:
382
+ loop = asyncio.get_event_loop()
383
+ try:
384
+ async with REQUEST_LIMITER:
385
+ models = await loop.run_in_executor(EXECUTOR, self._fetch_openrouter_models)
386
+ return list(set(models))
387
+ except Exception as e:
388
+ logger.debug(f"[OpenRouter] fetch_models error: {e}")
389
+ return []
390
 
391
+ def _fetch_openrouter_models(self) -> List[str]:
392
+ with curl_requests.Session() as session:
393
+ try:
394
+ resp = session.get(self.models_url, headers=self.headers, impersonate="chrome124", timeout=10)
395
+ if resp.status_code == 200:
396
+ data = resp.json()
397
+ return self._parse_models_response(data)
398
+ except Exception:
399
+ pass
400
+ return []
401
 
402
+
403
+ PROVIDER_INSTANCES: List[BaseProvider] = [
404
  GroqProvider(),
405
  GeminiProvider(),
406
  PollinationsProvider(),
407
  OllamaProvider(),
408
+ PerplexityProvider(),
409
+ OpenRouterProvider(),
410
  ]
411
 
412
+
413
  # =========================================================
414
+ # 5. ุฅุฏุงุฑุฉ ุงู„ุญุงู„ุฉ ูˆุงู„ุชุฎุฒูŠู† ุงู„ู…ุคู‚ุช
415
  # =========================================================
 
416
  async def load_cache():
417
+ global WORKING_MODELS, PROVIDER_MODEL_MAP
 
 
 
 
 
 
418
  try:
419
+ if os.path.exists(CACHE_FILE):
420
+ with open(CACHE_FILE, "r", encoding="utf-8") as f:
421
+ data = json.load(f)
422
+ async with STATE_LOCK:
423
+ WORKING_MODELS = data.get("WORKING_MODELS", {})
424
+ PROVIDER_MODEL_MAP = data.get("PROVIDER_MODEL_MAP", {})
425
+ logger.info(f"โœ… Cache loaded โ€” {len(WORKING_MODELS)} models.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  except Exception as e:
427
+ logger.error(f"โš ๏ธ Cache load error: {e}")
 
 
 
428
 
429
 
430
  async def save_cache():
 
431
  try:
 
432
  async with STATE_LOCK:
 
433
  snapshot = {
434
+ "WORKING_MODELS": dict(WORKING_MODELS),
435
+ "PROVIDER_MODEL_MAP": dict(PROVIDER_MODEL_MAP)
436
  }
437
+ with open(CACHE_FILE, "w", encoding="utf-8") as f:
438
+ json.dump(snapshot, f, indent=4, ensure_ascii=False)
439
+ logger.info("๐Ÿ’พ Cache saved.")
 
 
 
 
 
 
 
 
 
 
 
440
  except Exception as e:
441
+ logger.error(f"โš ๏ธ Cache save error: {e}")
442
 
 
 
 
443
 
444
  # =========================================================
445
+ # 6. ู…ุญุฑูƒ ุงู„ุงุณุชูƒุดุงู ูˆุงู„ุชุญู‚ู‚
446
  # =========================================================
 
447
  async def discovery_engine():
 
 
 
 
448
  await load_cache()
449
 
450
  while True:
451
+ logger.info("๐Ÿ“ก Starting validated discovery cycle โ€ฆ")
452
 
453
+ fresh_provider_map: Dict[str, List[str]] = {}
454
+ fresh_working_models: Dict[str, dict] = {}
 
455
 
456
+ # Run provider discovery concurrently
457
+ tasks = {provider: asyncio.create_task(provider.fetch_models()) for provider in PROVIDER_INSTANCES}
 
 
 
 
 
 
 
458
 
459
  for provider, task in tasks.items():
460
+ discovered = await task
461
+ clean_models: List[str] = []
462
 
463
+ # Filter & validate
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  validation_tasks = []
465
+ filtered = [
466
+ m for m in discovered
467
+ if not any(bl in m.lower() for bl in MODEL_BLACKLIST)
468
+ ]
469
 
470
+ logger.info(f"[{provider.name}] Discovered {len(filtered)} candidate models โ€” validating โ€ฆ")
471
 
472
+ for model in filtered:
473
+ validation_tasks.append((model, asyncio.create_task(provider.validate_model(model))))
 
 
 
 
 
 
474
 
475
  for model, vtask in validation_tasks:
 
476
  try:
477
+ is_valid, latency = await vtask
 
 
478
  except Exception:
479
+ is_valid, latency = False, 0.0
480
 
481
+ if is_valid:
482
+ clean_models.append(model)
 
 
 
 
 
483
 
484
+ if model not in fresh_working_models:
485
+ fresh_working_models[model] = {
486
+ "providers": [],
487
+ "latency": latency,
488
+ "health": provider.health,
489
+ "aliases": provider.aliases
490
+ }
 
 
 
 
 
491
 
492
+ info = fresh_working_models[model]
493
+ if provider.url not in info["providers"]:
494
+ info["providers"].append(provider.url)
495
+ info["latency"] = (info["latency"] + latency) / 2
496
+ info["health"] = (info["health"] + provider.health) // 2
497
 
498
+ fresh_provider_map[provider.url] = clean_models
499
+ logger.info(f"โœ… [{provider.name}] {len(clean_models)} valid models.")
 
500
 
501
+ # Atomic publish
502
  async with STATE_LOCK:
503
+ global PROVIDER_MODEL_MAP, WORKING_MODELS
504
  PROVIDER_MODEL_MAP = fresh_provider_map
505
  WORKING_MODELS = fresh_working_models
506
 
507
  await save_cache()
508
+ logger.info(f"๐Ÿš€ Orchestrator ready โ€” {len(WORKING_MODELS)} active models.")
509
+ await asyncio.sleep(VALIDATION_INTERVAL)
510
 
 
 
 
 
 
 
 
511
 
512
  # =========================================================
513
+ # 7. ุงู„ู…ุนุงู„ุฌ ุงู„ู…ุฑูƒุฒูŠ (Omega Handler)
514
  # =========================================================
 
515
  @app.on_event("startup")
516
  async def startup():
517
+ asyncio.create_task(discovery_engine())
518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
 
520
+ @app.api_route("/{path:path}", methods=["GET", "HEAD", "POST", "OPTIONS"])
521
+ async def omega_handler(request: Request, path: str):
522
+ # ---- CORS preflight ----
523
  if request.method == "OPTIONS":
524
+ return Response(status_code=204, headers={
525
+ "Access-Control-Allow-Origin": "*",
526
+ "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
527
+ "Access-Control-Allow-Headers": "*"
528
+ })
529
 
530
+ path_lower = path.lower().strip("/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
 
532
+ # ---- Model listing ----
533
+ if request.method in ("GET", "HEAD") and ("models" in path_lower or path_lower in ("", "v1", "v1/")):
534
+ if "models" in path_lower:
535
+ async with STATE_LOCK:
536
+ model_ids = list(WORKING_MODELS.keys()) if WORKING_MODELS else ["gpt-4o", "claude-3-5-sonnet"]
537
+ return {
538
+ "object": "list",
539
+ "data": [
540
+ {
541
+ "id": m,
542
+ "object": "model",
543
+ "created": int(time.time()),
544
+ "owned_by": "omega-orchestrator"
545
+ }
546
+ for m in sorted(model_ids)
547
+ ]
548
+ }
549
+ return Response(status_code=200)
550
 
551
+ # ---- Chat completions ----
552
+ if request.method == "POST" and any(x in path_lower for x in ("messages", "completions", "chat")):
 
553
 
554
+ # --- Auth check (ู…ูุตู„ุญ) ---
555
+ if not verify_api_key(request):
556
+ raise HTTPException(status_code=401, detail="Unauthorized: invalid or missing API key.")
557
 
558
+ # --- Parse body ---
559
  try:
560
+ body = await request.json()
561
+ except Exception:
562
+ raise HTTPException(status_code=400, detail="Invalid JSON body.")
563
 
564
+ model = body.get("model", "gpt-4o")
565
+ messages = body.get("messages", [])
 
 
 
566
 
567
+ if not messages:
568
+ raise HTTPException(status_code=400, detail="messages field is required.")
 
 
 
 
 
 
 
 
569
 
570
+ # --- Smart routing ---
571
+ async with STATE_LOCK:
572
+ model_info = WORKING_MODELS.get(model)
573
+ if model_info and model_info.get("providers"):
574
+ target_urls = list(model_info["providers"])
575
+ else:
576
+ target_urls = [p.url for p in PROVIDER_INSTANCES]
577
 
578
+ providers = [p for p in PROVIDER_INSTANCES if p.url in target_urls]
 
 
579
 
580
+ # Sort: fewest fails โ†’ highest health โ†’ lowest latency
581
+ providers.sort(key=lambda p: (p.fails, -p.health, p.latency))
582
 
583
+ reply: Optional[str] = None
 
 
584
 
585
+ for provider in providers:
586
+ if time.time() < provider.cooldown:
587
+ continue
588
+ reply = await provider.attempt_request(body)
589
+ if reply:
590
+ logger.info(f"โœ… Served by [{provider.name}] model={model}")
591
+ break
592
 
593
+ # --- Internal fallback via g4f ---
594
+ if not reply:
595
+ try:
596
+ from g4f.client import Client
597
+ loop = asyncio.get_event_loop()
598
 
599
+ def fallback_req():
600
+ return (
601
+ Client()
602
+ .chat.completions.create(model=model, messages=messages)
603
+ .choices[0].message.content
604
+ )
605
 
606
+ reply = await loop.run_in_executor(EXECUTOR, fallback_req)
607
+ logger.info("๐Ÿ”„ Served via g4f fallback.")
608
+ except Exception as e:
609
+ logger.warning(f"g4f fallback failed: {e}")
610
+
611
+ if not reply:
612
+ raise HTTPException(status_code=502, detail="Orchestration Failed: all routes exhausted.")
613
+
614
+ # --- Format response ---
615
+ # Anthropic messages format
616
+ if "messages" in path_lower and "chat" not in path_lower:
617
+ return {
618
+ "id": f"msg_{uuid.uuid4().hex}",
619
+ "type": "message",
620
+ "role": "assistant",
621
+ "model": model,
622
+ "content": [{"type": "text", "text": reply}],
623
+ "stop_reason": "end_turn",
624
+ "usage": {"input_tokens": 0, "output_tokens": 0}
625
+ }
626
 
627
+ # OpenAI chat.completions format (default)
628
  return {
629
+ "id": f"chatcmpl-{uuid.uuid4().hex}",
630
+ "object": "chat.completion",
631
+ "created": int(time.time()),
632
  "model": model,
633
+ "choices": [
634
  {
635
+ "index": 0,
636
+ "message": {"role": "assistant", "content": reply},
637
+ "finish_reason": "stop"
638
  }
639
  ],
640
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
641
  }
642
 
643
+ return Response(status_code=404)
 
 
644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
 
646
  # =========================================================
647
+ # 8. ู†ู‚ุทุฉ ุงู„ุฏุฎูˆู„
648
  # =========================================================
 
649
  if __name__ == "__main__":
650
+ uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info")