bahi-bh commited on
Commit
a0d3539
·
verified ·
1 Parent(s): 890b2f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +425 -353
app.py CHANGED
@@ -1,54 +1,267 @@
1
- from fastapi import FastAPI, Request, HTTPException
 
 
 
 
 
 
 
 
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.responses import StreamingResponse, JSONResponse
4
  from pydantic import BaseModel
5
- from typing import List, Optional
 
6
 
7
  import asyncio
8
  import json
9
  import time
10
  import uuid
11
  import logging
12
-
 
13
  import g4f
14
  from g4f.client import Client
 
15
 
16
-
17
- # =====================================================
18
  # LOGGING
19
- # =====================================================
20
-
21
- logging.basicConfig(level=logging.INFO)
22
- logger = logging.getLogger(__name__)
 
 
 
23
 
24
 
25
- # =====================================================
26
  # CONFIG
27
- # =====================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- API_KEY = "sk-your-secret-key"
30
 
31
- # timeout لمنع التعليق الأبدي
32
- REQUEST_TIMEOUT = 45
 
 
 
 
 
 
33
 
34
- # retry خفيف
35
- MAX_RETRIES = 2
36
 
 
 
 
 
 
37
 
38
- # =====================================================
39
- # FASTAPI
40
- # =====================================================
41
 
 
 
 
42
  app = FastAPI(
43
  title="Universal AI Gateway",
44
- version="4.2.0"
 
 
45
  )
46
 
47
-
48
- # =====================================================
49
- # CORS
50
- # =====================================================
51
-
52
  app.add_middleware(
53
  CORSMiddleware,
54
  allow_origins=["*"],
@@ -58,10 +271,9 @@ app.add_middleware(
58
  )
59
 
60
 
61
- # =====================================================
62
  # MODELS
63
- # =====================================================
64
-
65
  class Message(BaseModel):
66
  role: str
67
  content: str
@@ -75,257 +287,188 @@ class ChatRequest(BaseModel):
75
  max_tokens: Optional[int] = 4096
76
 
77
 
78
- # =====================================================
79
  # AUTH
80
- # =====================================================
81
-
82
  def verify_api_key(req: Request):
83
-
84
- auth = req.headers.get("Authorization")
85
-
86
- # السماح للاختبار
87
  if not auth:
88
- return True
89
-
90
  if not auth.startswith("Bearer "):
91
- raise HTTPException(
92
- status_code=401,
93
- detail="Invalid Authorization Format"
94
- )
95
-
96
- token = auth.replace("Bearer ", "").strip()
97
-
98
  if token != API_KEY:
99
- raise HTTPException(
100
- status_code=403,
101
- detail="Invalid API Key"
102
- )
103
-
104
  return True
105
 
106
 
107
- # =====================================================
108
- # ROOT
109
- # =====================================================
110
-
111
- @app.get("/")
112
- async def root():
113
-
114
- return {
115
- "status": "online",
116
- "service": "Universal AI Gateway",
117
- "version": "4.2.0"
118
- }
119
-
120
-
121
- # =====================================================
122
- # MODELS
123
- # =====================================================
124
-
125
- @app.get("/v1/models")
126
- async def get_models():
127
-
128
- models_data = []
129
-
130
- # =================================================
131
- # MODELS THAT WORK WELL
132
- # =================================================
133
-
134
- fallback_models = [
135
-
136
- # GPT
137
- "gpt-4o-mini",
138
- "gpt-4o",
139
- "gpt-4",
140
- "gpt-3.5-turbo",
141
-
142
- # Claude
143
- "claude-3-haiku",
144
-
145
- # Llama
146
- "llama-3.1-70b",
147
-
148
- # Mixtral
149
- "mixtral-8x7b",
150
-
151
- # Deepseek
152
- "deepseek-chat",
153
-
154
- # Gemini
155
- "gemini-pro",
156
-
157
- # =================================================
158
- # COHERE FAMILY
159
- # =================================================
160
-
161
- "command-r",
162
- "command-r-plus",
163
- "command-r7b",
164
- "command",
165
- "command-nightly",
166
-
167
- # Additional Cohere-style names
168
- "cohere-command-r",
169
- "cohere-command-r-plus",
170
- ]
171
-
172
- added_models = set()
173
-
174
- try:
175
-
176
- if hasattr(g4f.models, "_all_models"):
177
-
178
- all_models = list(g4f.models._all_models)
179
-
180
- for model in all_models[:100]:
181
-
182
- model_name = str(model)
183
-
184
- if model_name not in added_models:
185
-
186
- models_data.append({
187
- "id": model_name,
188
- "object": "model",
189
- "created": int(time.time()),
190
- "owned_by": "g4f"
191
- })
192
-
193
- added_models.add(model_name)
194
-
195
- except Exception as e:
196
-
197
- logger.error(f"Models error: {e}")
198
-
199
- # fallback models
200
- for model in fallback_models:
201
 
202
- if model not in added_models:
 
203
 
204
- models_data.append({
205
- "id": model,
206
- "object": "model",
207
- "created": int(time.time()),
208
- "owned_by": "g4f"
209
- })
210
-
211
- added_models.add(model)
212
-
213
- return {
214
- "object": "list",
215
- "data": models_data
216
- }
217
 
218
-
219
- # =====================================================
220
- # SAFE COMPLETION
221
- # =====================================================
222
-
223
- async def safe_completion(
224
- model,
225
- messages,
226
- stream=False
227
- ):
228
-
229
- last_error = None
230
-
231
- for attempt in range(MAX_RETRIES):
232
 
233
  try:
234
-
235
  logger.info(
236
- f"Attempt {attempt + 1} | model={model}"
 
237
  )
238
 
239
- client = Client()
 
 
 
 
 
 
 
 
240
 
241
- # timeout لمنع التعليق الأبدي
242
  response = await asyncio.wait_for(
243
- asyncio.to_thread(
244
- client.chat.completions.create,
245
- model=model,
246
- messages=messages,
247
- stream=stream
248
- ),
249
- timeout=REQUEST_TIMEOUT
250
  )
251
 
252
- logger.info(
253
- f"Success | model={model}"
254
- )
 
255
 
 
256
  return response
257
 
258
  except asyncio.TimeoutError:
 
 
 
 
 
 
259
 
260
- last_error = "Request timeout"
 
 
261
 
262
- logger.warning(
263
- f"Timeout | model={model}"
264
- )
265
 
266
- except Exception as e:
267
 
268
- last_error = e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
- logger.warning(
271
- f"Attempt failed {attempt + 1} | {e}"
272
- )
273
 
274
- await asyncio.sleep(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
- raise Exception(last_error)
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
 
279
- # =====================================================
280
- # CHAT COMPLETIONS
281
- # =====================================================
 
 
 
282
 
283
- @app.post("/v1/chat/completions")
284
- async def chat_completions(
285
- req: Request,
286
- body: ChatRequest
287
- ):
288
 
 
 
 
289
  verify_api_key(req)
290
 
291
- messages = [
292
- {
293
- "role": m.role,
294
- "content": m.content
295
- }
296
- for m in body.messages
297
- ]
298
 
299
- logger.info(
300
- f"Request model={body.model} stream={body.stream}"
301
- )
 
 
 
 
 
 
 
 
 
 
302
 
303
- # =================================================
304
- # STREAMING
305
- # =================================================
306
 
307
- if body.stream:
 
 
308
 
309
- async def generate_stream():
 
310
 
 
 
 
 
 
311
  try:
312
-
313
- response = await safe_completion(
314
- model=body.model,
315
- messages=messages,
316
- stream=True
317
  )
318
-
319
- chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
320
-
321
- has_content = False
322
-
323
  for chunk in response:
324
-
325
  try:
326
-
327
  content = ""
328
-
329
  if (
330
  hasattr(chunk, "choices")
331
  and chunk.choices
@@ -333,168 +476,97 @@ async def chat_completions(
333
  and chunk.choices[0].delta.content
334
  ):
335
  content = chunk.choices[0].delta.content
336
-
337
  if content:
338
-
339
  has_content = True
340
-
341
- payload = {
342
- "id": chunk_id,
343
- "object": "chat.completion.chunk",
344
- "created": int(time.time()),
345
- "model": body.model,
346
- "choices": [
347
- {
348
- "index": 0,
349
- "delta": {
350
- "content": content
351
- },
352
- "finish_reason": None
353
- }
354
- ]
355
- }
356
-
357
  yield (
358
- f"data: "
359
- f"{json.dumps(payload, ensure_ascii=False)}\n\n"
 
 
 
 
 
 
 
 
 
 
 
360
  )
361
-
362
  await asyncio.sleep(0)
 
 
363
 
364
- except Exception as chunk_error:
365
-
366
- logger.error(
367
- f"Chunk error: {chunk_error}"
368
- )
369
-
370
- # provider فتح stream بدون محتوى
371
  if not has_content:
372
-
373
- error_payload = {
374
- "error": {
375
- "message": "Provider returned empty stream",
376
- "type": "empty_stream"
377
- }
378
- }
379
-
380
  yield (
381
- f"data: "
382
- f"{json.dumps(error_payload)}\n\n"
 
383
  )
384
 
385
- final_payload = {
386
- "id": chunk_id,
387
- "object": "chat.completion.chunk",
388
- "created": int(time.time()),
389
- "model": body.model,
390
- "choices": [
391
- {
392
- "index": 0,
393
- "delta": {},
394
- "finish_reason": "stop"
395
- }
396
- ]
397
- }
398
-
399
  yield (
400
- f"data: "
401
- f"{json.dumps(final_payload)}\n\n"
 
 
 
 
 
 
 
402
  )
403
-
404
  yield "data: [DONE]\n\n"
405
 
406
  except Exception as e:
407
-
408
  logger.error(f"Streaming error: {e}")
409
-
410
- error_payload = {
411
- "error": {
412
- "message": str(e),
413
- "type": "server_error"
414
- }
415
- }
416
-
417
  yield (
418
- f"data: "
419
- f"{json.dumps(error_payload)}\n\n"
 
420
  )
421
 
422
  return StreamingResponse(
423
- generate_stream(),
424
  media_type="text/event-stream",
425
  headers={
426
  "Cache-Control": "no-cache",
427
  "Connection": "keep-alive",
428
- "X-Accel-Buffering": "no"
429
- }
430
  )
431
 
432
- # =================================================
433
- # NORMAL RESPONSE
434
- # =================================================
435
-
436
  try:
437
-
438
- response = await safe_completion(
439
- model=body.model,
440
- messages=messages,
441
- stream=False
442
  )
443
-
444
- assistant_message = ""
445
-
446
  try:
447
-
448
- assistant_message = (
449
- response.choices[0].message.content
450
- )
451
-
452
  except Exception:
453
-
454
- assistant_message = str(response)
455
 
456
  return JSONResponse({
457
  "id": f"chatcmpl-{uuid.uuid4().hex}",
458
  "object": "chat.completion",
459
  "created": int(time.time()),
460
  "model": body.model,
461
- "choices": [
462
- {
463
- "index": 0,
464
- "message": {
465
- "role": "assistant",
466
- "content": assistant_message
467
- },
468
- "finish_reason": "stop"
469
- }
470
- ],
471
- "usage": {
472
- "prompt_tokens": 0,
473
- "completion_tokens": 0,
474
- "total_tokens": 0
475
- }
476
  })
477
 
478
  except Exception as e:
479
-
480
  logger.error(f"Chat error: {e}")
 
481
 
482
- raise HTTPException(
483
- status_code=500,
484
- detail=str(e)
485
- )
486
-
487
-
488
- # =====================================================
489
- # RUN
490
- # =====================================================
491
 
 
 
 
492
  if __name__ == "__main__":
493
-
494
  import uvicorn
495
-
496
- uvicorn.run(
497
- app,
498
- host="0.0.0.0",
499
- port=7860
500
- )
 
1
+ # ╔══════════════════════════════════════════════════════════════════╗
2
+ # ║ Universal AI Gateway – Advanced Edition v5.0 ║
3
+ # ║ • Auto-discovers WORKING providers via live health checks ║
4
+ # ║ • Fetches working models list from g4f-working (daily updated) ║
5
+ # ║ • Lightweight proxy rotation to avoid rate-limit IP bans ║
6
+ # ║ • Smart fallback chain: best provider → pool → any ║
7
+ # ║ • /status endpoint with live provider/model stats ║
8
+ # ╚══════════════════════════════════════════════════════════════════╝
9
+
10
+ from fastapi import FastAPI, Request, HTTPException, BackgroundTasks
11
  from fastapi.middleware.cors import CORSMiddleware
12
  from fastapi.responses import StreamingResponse, JSONResponse
13
  from pydantic import BaseModel
14
+ from typing import List, Optional, Dict, Any
15
+ from contextlib import asynccontextmanager
16
 
17
  import asyncio
18
  import json
19
  import time
20
  import uuid
21
  import logging
22
+ import random
23
+ import httpx
24
  import g4f
25
  from g4f.client import Client
26
+ from g4f import Provider
27
 
28
+ # ───────────────────────────────────────────────────────────────────
 
29
  # LOGGING
30
+ # ───────────────────────────────────────────────────────────────────
31
+ logging.basicConfig(
32
+ level=logging.INFO,
33
+ format="%(asctime)s │ %(levelname)-7s │ %(message)s",
34
+ datefmt="%H:%M:%S",
35
+ )
36
+ logger = logging.getLogger("gateway")
37
 
38
 
39
+ # ───────────────────────────────────────────────────────────────────
40
  # CONFIG
41
+ # ───────────────────────────────────────────────────────────────────
42
+ API_KEY = "sk-your-secret-key"
43
+ REQUEST_TIMEOUT = 50 # seconds per single attempt
44
+ MAX_RETRIES = 3
45
+ PROBE_TIMEOUT = 20 # health-check timeout
46
+ PROBE_CONCURRENCY = 8 # parallel health checks
47
+ CACHE_TTL = 3600 # re-check providers every 1 h
48
+
49
+ # Remote list of today's working models (no-auth, updated daily)
50
+ G4F_WORKING_MODELS_URL = (
51
+ "https://raw.githubusercontent.com/"
52
+ "Free-AI-Things/g4f-working/main/output/models.txt"
53
+ )
54
+
55
+ # Optional: rotate through these free proxies (add your own)
56
+ # Format: "http://host:port" or "socks5://user:pass@host:port"
57
+ PROXY_POOL: List[str] = [
58
+ # "http://proxy1:8080",
59
+ # "socks5://user:pass@proxy2:1080",
60
+ ]
61
+
62
+ # Providers confirmed to work without auth in recent tests
63
+ # (used as seed; health check updates this at runtime)
64
+ KNOWN_NO_AUTH_PROVIDERS = [
65
+ "PollinationsAI",
66
+ "DDG",
67
+ "Jmuz",
68
+ "OIVSCode",
69
+ "Liaobots",
70
+ "ChatGptEs",
71
+ "Free2GPT",
72
+ "HuggingSpace",
73
+ "Blackbox",
74
+ "You",
75
+ "Pizzagpt",
76
+ "LambdaChat",
77
+ "PerplexityLabs",
78
+ "RobocodersAPI",
79
+ "TeachAnything",
80
+ "AiChatOnline",
81
+ "Cloudflare",
82
+ "FreeNetfly",
83
+ "Gemini",
84
+ "GeminiPro",
85
+ ]
86
+
87
+
88
+ # ───────────────────────────────────────────────────────────────────
89
+ # GLOBAL RUNTIME STATE
90
+ # ───────────────────────────────────────────────────────────────────
91
+ class State:
92
+ working_providers: List[Any] = [] # provider objects that passed health check
93
+ working_models: List[str] = [] # model names fetched from remote list
94
+ provider_stats: Dict[str, Dict] = {} # per-provider success / fail counters
95
+ proxy_index: int = 0
96
+ last_probe: float = 0.0
97
+ startup_done: bool = False
98
+
99
+ state = State()
100
+
101
+
102
+ # ───────────────────────────────────────────────────────────────────
103
+ # PROXY ROTATION
104
+ # ───────────────────────────────────────────────────────────────────
105
+ def get_next_proxy() -> Optional[str]:
106
+ """Round-robin through the proxy pool; returns None if pool is empty."""
107
+ if not PROXY_POOL:
108
+ return None
109
+ proxy = PROXY_POOL[state.proxy_index % len(PROXY_POOL)]
110
+ state.proxy_index += 1
111
+ return proxy
112
+
113
+
114
+ def get_random_proxy() -> Optional[str]:
115
+ """Pick a random proxy from the pool."""
116
+ return random.choice(PROXY_POOL) if PROXY_POOL else None
117
+
118
+
119
+ # ───────────────────────────────────────────────────────────────────
120
+ # FETCH WORKING MODELS FROM REMOTE LIST
121
+ # ───────────────────────────────────────────────────────────────────
122
+ async def fetch_remote_working_models() -> List[str]:
123
+ """
124
+ Downloads the daily-updated models.txt from g4f-working.
125
+ Falls back to a hard-coded seed list on failure.
126
+ """
127
+ try:
128
+ async with httpx.AsyncClient(timeout=10) as client:
129
+ resp = await client.get(G4F_WORKING_MODELS_URL)
130
+ if resp.status_code == 200:
131
+ lines = [l.strip() for l in resp.text.splitlines() if l.strip()]
132
+ logger.info(f"✅ Remote working models fetched: {len(lines)} models")
133
+ return lines
134
+ except Exception as e:
135
+ logger.warning(f"⚠️ Could not fetch remote model list: {e}")
136
+
137
+ # Fallback seed – known stable models as of 2025
138
+ return [
139
+ "gpt-4o-mini", "gpt-4o", "gpt-4", "gpt-3.5-turbo",
140
+ "claude-3-haiku", "claude-3-sonnet", "claude-3-opus",
141
+ "llama-3.1-70b", "llama-3.1-8b", "llama-3.3-70b",
142
+ "mistral-7b", "mixtral-8x7b",
143
+ "deepseek-chat", "deepseek-r1",
144
+ "gemini-pro", "gemini-1.5-flash", "gemini-1.5-pro",
145
+ "command-r", "command-r-plus",
146
+ "qwen-2-72b", "qwen-2.5-72b",
147
+ "phi-3-mini", "phi-4",
148
+ "hermes-3",
149
+ ]
150
+
151
+
152
+ # ───────────────────────────────────────────────────────────────────
153
+ # PROVIDER HEALTH CHECK
154
+ # ───────────────────────────────────────────────────────────────────
155
+ async def probe_provider(provider_name: str) -> bool:
156
+ """
157
+ Send a trivial prompt to a provider.
158
+ Returns True only if we get a non-empty, non-error text back.
159
+ """
160
+ try:
161
+ provider_cls = getattr(Provider, provider_name, None)
162
+ if provider_cls is None:
163
+ return False
164
+
165
+ proxy = get_random_proxy()
166
+
167
+ def _call():
168
+ c = Client(provider=provider_cls)
169
+ r = c.chat.completions.create(
170
+ model="gpt-4o-mini",
171
+ messages=[{"role": "user", "content": "Hi"}],
172
+ proxy=proxy,
173
+ )
174
+ return r.choices[0].message.content or ""
175
+
176
+ text = await asyncio.wait_for(
177
+ asyncio.to_thread(_call),
178
+ timeout=PROBE_TIMEOUT,
179
+ )
180
+ ok = bool(text.strip()) and "error" not in text.lower()[:50]
181
+ logger.info(f" {'✅' if ok else '❌'} {provider_name}")
182
+ return ok
183
+
184
+ except Exception as e:
185
+ logger.debug(f" ❌ {provider_name}: {e}")
186
+ return False
187
+
188
+
189
+ async def run_health_checks():
190
+ """Probe all known no-auth providers concurrently and cache results."""
191
+ logger.info("🔍 Starting provider health checks …")
192
+ sem = asyncio.Semaphore(PROBE_CONCURRENCY)
193
+
194
+ async def guarded_probe(name):
195
+ async with sem:
196
+ result = await probe_provider(name)
197
+ state.provider_stats[name] = state.provider_stats.get(name, {
198
+ "success": 0, "fail": 0, "last_check": None
199
+ })
200
+ state.provider_stats[name]["last_check"] = time.time()
201
+ if result:
202
+ state.provider_stats[name]["success"] += 1
203
+ else:
204
+ state.provider_stats[name]["fail"] += 1
205
+ return name, result
206
+
207
+ tasks = [guarded_probe(name) for name in KNOWN_NO_AUTH_PROVIDERS]
208
+ results = await asyncio.gather(*tasks, return_exceptions=True)
209
+
210
+ working = []
211
+ for item in results:
212
+ if isinstance(item, tuple):
213
+ name, ok = item
214
+ if ok:
215
+ provider_cls = getattr(Provider, name, None)
216
+ if provider_cls:
217
+ working.append(provider_cls)
218
+
219
+ state.working_providers = working
220
+ state.last_probe = time.time()
221
+ logger.info(
222
+ f"✅ Health check done: {len(working)}/{len(KNOWN_NO_AUTH_PROVIDERS)} providers working"
223
+ )
224
+
225
+
226
+ # ───────────────────────────────────────────────────────────────────
227
+ # STARTUP / BACKGROUND REFRESH
228
+ # ───────────────────────────────────────────────────────────────────
229
+ async def startup_init():
230
+ logger.info("🚀 Gateway starting – fetching models & probing providers …")
231
+ state.working_models = await fetch_remote_working_models()
232
+ # Run health checks in background so startup is fast
233
+ asyncio.create_task(run_health_checks())
234
+ state.startup_done = True
235
+ logger.info(f"🎯 {len(state.working_models)} models loaded")
236
 
 
237
 
238
+ async def background_refresh(interval: int = CACHE_TTL):
239
+ """Periodically re-probe providers and refresh model list."""
240
+ await asyncio.sleep(interval)
241
+ while True:
242
+ logger.info("♻️ Refreshing provider health & model list …")
243
+ state.working_models = await fetch_remote_working_models()
244
+ await run_health_checks()
245
+ await asyncio.sleep(interval)
246
 
 
 
247
 
248
+ @asynccontextmanager
249
+ async def lifespan(app: FastAPI):
250
+ await startup_init()
251
+ asyncio.create_task(background_refresh())
252
+ yield
253
 
 
 
 
254
 
255
+ # ───────────────────────────────────────────────────────────────────
256
+ # FASTAPI APP
257
+ # ───────────────────────────────────────────────────────────────────
258
  app = FastAPI(
259
  title="Universal AI Gateway",
260
+ version="5.0.0",
261
+ description="Advanced g4f gateway with live provider health checks and proxy rotation",
262
+ lifespan=lifespan,
263
  )
264
 
 
 
 
 
 
265
  app.add_middleware(
266
  CORSMiddleware,
267
  allow_origins=["*"],
 
271
  )
272
 
273
 
274
+ # ───────────────────────────────────────────────────────────────────
275
  # MODELS
276
+ # ───────────────────────────────────────────────────────────────────
 
277
  class Message(BaseModel):
278
  role: str
279
  content: str
 
287
  max_tokens: Optional[int] = 4096
288
 
289
 
290
+ # ───────────────────────────────────────────────────────────────────
291
  # AUTH
292
+ # ───────────────────────────────────────────────────────────────────
 
293
  def verify_api_key(req: Request):
294
+ auth = req.headers.get("Authorization", "")
 
 
 
295
  if not auth:
296
+ return True # allow unauthenticated for testing
 
297
  if not auth.startswith("Bearer "):
298
+ raise HTTPException(status_code=401, detail="Invalid Authorization format")
299
+ token = auth.removeprefix("Bearer ").strip()
 
 
 
 
 
300
  if token != API_KEY:
301
+ raise HTTPException(status_code=403, detail="Invalid API key")
 
 
 
 
302
  return True
303
 
304
 
305
+ # ───────────────────────────────────────────────────────────────────
306
+ # SMART COMPLETION (provider fallback chain)
307
+ # ───────────────────────────────────────────────────────────────────
308
+ async def smart_completion(model: str, messages: list, stream: bool = False):
309
+ """
310
+ Try providers in this order:
311
+ 1. g4f auto (lets g4f pick the best_provider for the model)
312
+ 2. Each working provider from our health-checked pool
313
+ 3. Any g4f provider via RetryProvider as last resort
314
+ Rotates proxy on each attempt.
315
+ """
316
+ errors: List[str] = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
+ # Build candidate provider list: health-checked first
319
+ candidates = list(state.working_providers)
320
 
321
+ # Always include auto (None) as first try
322
+ provider_order = [None] + candidates
 
 
 
 
 
 
 
 
 
 
 
323
 
324
+ for attempt, provider_cls in enumerate(provider_order[:MAX_RETRIES + 1]):
325
+ proxy = get_next_proxy()
326
+ pname = getattr(provider_cls, "__name__", "auto") if provider_cls else "auto"
 
 
 
 
 
 
 
 
 
 
 
327
 
328
  try:
 
329
  logger.info(
330
+ f" attempt {attempt + 1} | provider={pname} | "
331
+ f"model={model} | proxy={'yes' if proxy else 'no'}"
332
  )
333
 
334
+ def _call():
335
+ kwargs = {"model": model, "messages": messages, "stream": stream}
336
+ if proxy:
337
+ kwargs["proxy"] = proxy
338
+ if provider_cls:
339
+ c = Client(provider=provider_cls)
340
+ else:
341
+ c = Client()
342
+ return c.chat.completions.create(**kwargs)
343
 
 
344
  response = await asyncio.wait_for(
345
+ asyncio.to_thread(_call),
346
+ timeout=REQUEST_TIMEOUT,
 
 
 
 
 
347
  )
348
 
349
+ # Update stats
350
+ if pname in state.provider_stats:
351
+ state.provider_stats[pname]["success"] = \
352
+ state.provider_stats[pname].get("success", 0) + 1
353
 
354
+ logger.info(f" ✅ success via {pname}")
355
  return response
356
 
357
  except asyncio.TimeoutError:
358
+ msg = f"{pname}: timeout after {REQUEST_TIMEOUT}s"
359
+ except Exception as e:
360
+ msg = f"{pname}: {type(e).__name__}: {e}"
361
+ if pname in state.provider_stats:
362
+ state.provider_stats[pname]["fail"] = \
363
+ state.provider_stats[pname].get("fail", 0) + 1
364
 
365
+ errors.append(msg)
366
+ logger.warning(f" ⚠️ {msg}")
367
+ await asyncio.sleep(0.5)
368
 
369
+ raise RuntimeError("All providers failed:\n" + "\n".join(errors))
 
 
370
 
 
371
 
372
+ # ───────────────────────────────────────────────────────────────────
373
+ # ROUTES
374
+ # ───────────────────────────────────────────────────────────────────
375
+
376
+ @app.get("/")
377
+ async def root():
378
+ return {
379
+ "service": "Universal AI Gateway",
380
+ "version": "5.0.0",
381
+ "status": "online",
382
+ "working_providers": len(state.working_providers),
383
+ "working_models": len(state.working_models),
384
+ "proxy_pool": len(PROXY_POOL),
385
+ "docs": "/docs",
386
+ }
387
 
 
 
 
388
 
389
+ @app.get("/status")
390
+ async def status():
391
+ """Live health dashboard for providers and models."""
392
+ provider_info = {}
393
+ for name, stats in state.provider_stats.items():
394
+ total = stats.get("success", 0) + stats.get("fail", 0)
395
+ success_rate = (
396
+ round(stats["success"] / total * 100, 1) if total else 0
397
+ )
398
+ last = stats.get("last_check")
399
+ provider_info[name] = {
400
+ "success": stats.get("success", 0),
401
+ "fail": stats.get("fail", 0),
402
+ "success_rate": f"{success_rate}%",
403
+ "last_check": time.strftime("%H:%M:%S", time.localtime(last)) if last else "—",
404
+ }
405
 
406
+ return {
407
+ "working_providers": [
408
+ getattr(p, "__name__", str(p)) for p in state.working_providers
409
+ ],
410
+ "working_providers_count": len(state.working_providers),
411
+ "working_models_sample": state.working_models[:30],
412
+ "working_models_count": len(state.working_models),
413
+ "proxy_pool_size": len(PROXY_POOL),
414
+ "last_health_check": (
415
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(state.last_probe))
416
+ if state.last_probe else "pending"
417
+ ),
418
+ "provider_stats": provider_info,
419
+ }
420
 
421
 
422
+ @app.post("/v1/providers/refresh")
423
+ async def refresh_providers(background_tasks: BackgroundTasks, req: Request):
424
+ """Trigger a manual provider health-check refresh."""
425
+ verify_api_key(req)
426
+ background_tasks.add_task(run_health_checks)
427
+ return {"message": "Provider health check started in background"}
428
 
 
 
 
 
 
429
 
430
+ @app.get("/v1/models")
431
+ async def get_models(req: Request):
432
+ """Return the list of currently working models."""
433
  verify_api_key(req)
434
 
435
+ # Merge remote working list with g4f's own model registry
436
+ model_set: set = set(state.working_models)
 
 
 
 
 
437
 
438
+ try:
439
+ if hasattr(g4f.models, "_all_models"):
440
+ for m in list(g4f.models._all_models)[:200]:
441
+ model_set.add(str(m))
442
+ except Exception:
443
+ pass
444
+
445
+ now = int(time.time())
446
+ data = [
447
+ {"id": mid, "object": "model", "created": now, "owned_by": "g4f"}
448
+ for mid in sorted(model_set)
449
+ ]
450
+ return {"object": "list", "data": data}
451
 
 
 
 
452
 
453
+ @app.post("/v1/chat/completions")
454
+ async def chat_completions(req: Request, body: ChatRequest):
455
+ verify_api_key(req)
456
 
457
+ messages = [{"role": m.role, "content": m.content} for m in body.messages]
458
+ logger.info(f"📨 Request → model={body.model} stream={body.stream}")
459
 
460
+ # ── STREAMING ──────────────────────────────────────────────────
461
+ if body.stream:
462
+ async def generate():
463
+ chunk_id = f"chatcmpl-{uuid.uuid4().hex}"
464
+ has_content = False
465
  try:
466
+ response = await smart_completion(
467
+ model=body.model, messages=messages, stream=True
 
 
 
468
  )
 
 
 
 
 
469
  for chunk in response:
 
470
  try:
 
471
  content = ""
 
472
  if (
473
  hasattr(chunk, "choices")
474
  and chunk.choices
 
476
  and chunk.choices[0].delta.content
477
  ):
478
  content = chunk.choices[0].delta.content
 
479
  if content:
 
480
  has_content = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  yield (
482
+ "data: "
483
+ + json.dumps({
484
+ "id": chunk_id,
485
+ "object": "chat.completion.chunk",
486
+ "created": int(time.time()),
487
+ "model": body.model,
488
+ "choices": [{
489
+ "index": 0,
490
+ "delta": {"content": content},
491
+ "finish_reason": None,
492
+ }],
493
+ }, ensure_ascii=False)
494
+ + "\n\n"
495
  )
 
496
  await asyncio.sleep(0)
497
+ except Exception as ce:
498
+ logger.error(f"Chunk error: {ce}")
499
 
 
 
 
 
 
 
 
500
  if not has_content:
 
 
 
 
 
 
 
 
501
  yield (
502
+ "data: "
503
+ + json.dumps({"error": {"message": "Provider returned empty stream", "type": "empty_stream"}})
504
+ + "\n\n"
505
  )
506
 
507
+ # Final stop chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  yield (
509
+ "data: "
510
+ + json.dumps({
511
+ "id": chunk_id,
512
+ "object": "chat.completion.chunk",
513
+ "created": int(time.time()),
514
+ "model": body.model,
515
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
516
+ })
517
+ + "\n\n"
518
  )
 
519
  yield "data: [DONE]\n\n"
520
 
521
  except Exception as e:
 
522
  logger.error(f"Streaming error: {e}")
 
 
 
 
 
 
 
 
523
  yield (
524
+ "data: "
525
+ + json.dumps({"error": {"message": str(e), "type": "server_error"}})
526
+ + "\n\n"
527
  )
528
 
529
  return StreamingResponse(
530
+ generate(),
531
  media_type="text/event-stream",
532
  headers={
533
  "Cache-Control": "no-cache",
534
  "Connection": "keep-alive",
535
+ "X-Accel-Buffering": "no",
536
+ },
537
  )
538
 
539
+ # ── NON-STREAMING ──────────────────────────────────────────────
 
 
 
540
  try:
541
+ response = await smart_completion(
542
+ model=body.model, messages=messages, stream=False
 
 
 
543
  )
 
 
 
544
  try:
545
+ content = response.choices[0].message.content
 
 
 
 
546
  except Exception:
547
+ content = str(response)
 
548
 
549
  return JSONResponse({
550
  "id": f"chatcmpl-{uuid.uuid4().hex}",
551
  "object": "chat.completion",
552
  "created": int(time.time()),
553
  "model": body.model,
554
+ "choices": [{
555
+ "index": 0,
556
+ "message": {"role": "assistant", "content": content},
557
+ "finish_reason": "stop",
558
+ }],
559
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
 
 
 
 
 
 
 
 
 
560
  })
561
 
562
  except Exception as e:
 
563
  logger.error(f"Chat error: {e}")
564
+ raise HTTPException(status_code=500, detail=str(e))
565
 
 
 
 
 
 
 
 
 
 
566
 
567
+ # ───────────────────────────────────────────────────────────────────
568
+ # ENTRY POINT
569
+ # ───────────────────────────────────────────────────────────────────
570
  if __name__ == "__main__":
 
571
  import uvicorn
572
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")