| """ |
| g4fpro β server.py v3.0 |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| LiteLLM (primary, 100+ providers via API keys) |
| β fallback on any exception |
| g4f (fallback, no-auth providers) |
| |
| HuggingFace Spaces compatible β port 7860 |
| Real streaming in both paths |
| Conversation memory via SQLite |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| """ |
| import os, re, sys, json, time, uuid, sqlite3, logging, inspect, threading |
| from datetime import datetime |
| from flask import Flask, request, jsonify, Response, send_from_directory |
| from flask_cors import CORS |
|
|
| |
| |
| |
| LITELLM_OK = False |
| litellm = None |
| try: |
| import litellm as _ll |
| |
| _ll.suppress_debug_info = True |
| _ll.set_verbose = False |
| litellm = _ll |
| LITELLM_OK = True |
| print("β
LiteLLM loaded") |
| except ImportError as e: |
| print(f"β οΈ LiteLLM not available: {e}") |
|
|
| |
| |
| |
| G4F_OK = False |
| g4f = None |
| g4f_prov = None |
| try: |
| import g4f as _g4f |
| import g4f.Provider as _prov |
| g4f = _g4f |
| g4f_prov = _prov |
| G4F_OK = True |
| print("β
g4f loaded") |
| except ImportError as e: |
| print(f"β οΈ g4f not available: {e}") |
|
|
| if not LITELLM_OK and not G4F_OK: |
| print("β Neither LiteLLM nor g4f available β install at least one.") |
|
|
| |
| |
| |
| app = Flask(__name__, static_folder='.', static_url_path='', template_folder='.') |
| CORS(app) |
|
|
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(asctime)s [%(levelname)s] %(message)s', |
| stream=sys.stdout, |
| ) |
| log = logging.getLogger('g4fpro') |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| PORT = int(os.getenv('PORT', 7860)) |
| MAX_LEN = int(os.getenv('MAX_LEN', 8000)) |
| RL_MAX = int(os.getenv('RATE_LIMIT', 20)) |
| MAX_HISTORY = int(os.getenv('MAX_HISTORY', 40)) |
| DB_PATH = os.getenv('DB_PATH', 'conversations.db') |
| API_KEY = os.getenv('API_KEY', '') |
| DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'groq/llama-3.3-70b-versatile') |
|
|
| BLOCKED_PROVIDERS = set( |
| x.strip() for x in os.getenv('BLOCKED_PROVIDERS', '').split(',') if x.strip() |
| ) |
|
|
| |
| |
| _LITELLM_KEY_MAP = { |
| 'GROQ_API_KEY': ('groq', 'GROQ_API_KEY'), |
| 'OPENAI_API_KEY': ('openai', 'OPENAI_API_KEY'), |
| 'ANTHROPIC_API_KEY': ('anthropic', 'ANTHROPIC_API_KEY'), |
| 'TOGETHERAI_API_KEY': ('together_ai', 'TOGETHERAI_API_KEY'), |
| 'OPENROUTER_API_KEY': ('openrouter', 'OPENROUTER_API_KEY'), |
| 'COHERE_API_KEY': ('cohere', 'COHERE_API_KEY'), |
| 'GEMINI_API_KEY': ('gemini', 'GEMINI_API_KEY'), |
| 'HF_TOKEN': ('huggingface', 'HUGGINGFACE_API_KEY'), |
| } |
|
|
| def _detect_available_litellm_providers(): |
| """Return list of (prefix, display_name) for keys that are set.""" |
| available = [] |
| for env_var, (prefix, _) in _LITELLM_KEY_MAP.items(): |
| if os.getenv(env_var): |
| available.append(prefix) |
| return available |
|
|
| LITELLM_AVAILABLE_PROVIDERS = _detect_available_litellm_providers() |
|
|
| |
| |
| |
| _db_lock = threading.Lock() |
|
|
| def _db(): |
| c = sqlite3.connect(DB_PATH, check_same_thread=False) |
| c.row_factory = sqlite3.Row |
| return c |
|
|
| def _db_init(): |
| with _db_lock: |
| con = _db() |
| con.execute(""" |
| CREATE TABLE IF NOT EXISTS messages ( |
| id INTEGER PRIMARY KEY AUTOINCREMENT, |
| conversation_id TEXT NOT NULL, |
| role TEXT NOT NULL, |
| content TEXT NOT NULL, |
| model TEXT DEFAULT '', |
| provider TEXT DEFAULT '', |
| ts REAL NOT NULL |
| )""") |
| con.execute("CREATE INDEX IF NOT EXISTS idx_cid ON messages(conversation_id, ts)") |
| con.commit(); con.close() |
|
|
| _db_init() |
|
|
| def db_add(cid, role, content, model='', provider=''): |
| with _db_lock: |
| con = _db() |
| con.execute( |
| "INSERT INTO messages(conversation_id,role,content,model,provider,ts) VALUES(?,?,?,?,?,?)", |
| (cid, role, content, model, provider, time.time()) |
| ) |
| con.commit(); con.close() |
|
|
| def db_history(cid, limit=MAX_HISTORY): |
| with _db_lock: |
| con = _db() |
| rows = con.execute( |
| "SELECT role,content,model,provider,ts FROM messages " |
| "WHERE conversation_id=? ORDER BY ts DESC LIMIT ?", |
| (cid, limit) |
| ).fetchall() |
| con.close() |
| return [dict(r) for r in reversed(rows)] |
|
|
| def db_list_convs(): |
| with _db_lock: |
| con = _db() |
| rows = con.execute(""" |
| SELECT conversation_id, MAX(ts) last_ts, COUNT(*) msg_count, |
| (SELECT content FROM messages m2 |
| WHERE m2.conversation_id=m.conversation_id |
| ORDER BY ts DESC LIMIT 1) last_msg |
| FROM messages m |
| GROUP BY conversation_id ORDER BY last_ts DESC |
| """).fetchall() |
| con.close() |
| return [dict(r) for r in rows] |
|
|
| def db_delete(cid): |
| with _db_lock: |
| con = _db() |
| con.execute("DELETE FROM messages WHERE conversation_id=?", (cid,)) |
| con.commit(); con.close() |
|
|
| |
| |
| |
| _SKIP = { |
| 'BaseProvider','BaseRetryProvider','IterListProvider','AsyncProvider', |
| 'AsyncGeneratorProvider','RetryProvider','ProviderType','CreateResult', |
| 'Custom','CachedSearch','MarkItDown','provider','AbstractProvider', |
| } |
|
|
| def _collect_models(cls): |
| seen, out = set(), [] |
| for attr in ('default_model','model','models','text_models', |
| 'vision_models','model_aliases','swap_model_aliases'): |
| v = getattr(cls, attr, None) |
| if v is None: continue |
| items = ( |
| [v] if isinstance(v, str) else |
| [str(x) for x in v] if isinstance(v, (list,tuple)) else |
| [str(k) for k in (v.keys() if isinstance(v, dict) else v)] |
| ) |
| for s in items: |
| s = s.strip() |
| if s and s not in seen and len(s) < 120: |
| seen.add(s); out.append(s) |
| return out[:25] |
|
|
| |
| |
| G4F_STATIC_PROVIDERS = { |
| "AnyProvider": {"m":["gpt-4","gpt-4o","gpt-4o-mini","o1","o1-mini","o3-mini","o3-mini-high","o4-mini","gpt-4.1","gpt-4.1-mini","gpt-4.1-nano","gpt-4.5","gpt-oss-120b","meta-ai","llama-2-70b","llama-3-8b","llama-3-70b","llama-3.1-8b","llama-3.1-70b","llama-3.1-405b"],"t":"both"}, |
| "ApiAirforce": {"m":["roleplay:free"],"t":"text"}, |
| "Azure": {"m":["gpt-4.1","o4-mini","flux.1-kontext-pro","flux-kontext"],"t":"both"}, |
| "BlackForestLabs_Flux1Dev": {"m":["black-forest-labs-flux-1-dev","flux-dev","flux"],"t":"both"}, |
| "BlackForestLabs_Flux1KontextDev":{"m":["flux-kontext-dev"],"t":"both"}, |
| "Chatai": {"m":["gpt-4o-mini-2024-07-18","gpt-4o-mini"],"t":"text"}, |
| "CohereForAI_C4AI_Command": {"m":["command-a-03-2025","command-r-plus-08-2024","command-r-08-2024","command-r-plus","command-r","command-r7b-12-2024","command-r7b-arabic-02-2025","command-a","command-r7b"],"t":"text"}, |
| "Copilot": {"m":["Copilot","Think Deeper","Smart (GPT-5)","Study","o1","gpt-4","gpt-4o","gpt-5"],"t":"text"}, |
| "DeepInfra": {"m":["MiniMaxAI/MiniMax-M2.5"],"t":"text"}, |
| "DeepseekAI_JanusPro7b": {"m":["janus-pro-7b","janus-pro-7b-image"],"t":"both"}, |
| "GeminiPro": {"m":["models/gemini-2.5-flash","gemini-2.5-pro","gemini-2.5-flash","gemini-2.0-flash","gemini-2.0-flash-thinking"],"t":"text"}, |
| "GradientNetwork": {"m":["GPT OSS 120B","Qwen3 235B","qwen-3-235b","qwen3-235b","gpt-oss-120b"],"t":"text"}, |
| "Groq": {"m":["openai/gpt-oss-120b","mixtral-8x7b","llama2-70b","moonshotai/Kimi-K2-Instruct"],"t":"text"}, |
| "HuggingFace": {"m":["openai/gpt-oss-120b","qwen-2.5-72b","llama-3","llama-3.3-70b","command-r-plus","deepseek-r1","qwq-32b","nemotron-70b","qwen-2.5-coder-32b","llama-3.2-11b","mistral-nemo","phi-3.5-mini","qwen-2-72b","qvq-72b","flux","flux-dev","flux-schnell","stable-diffusion-3.5-large","sdxl-1.0","sdxl-turbo"],"t":"text"}, |
| "HuggingSpace": {"m":["qwen-qwen2-72b-instruct"],"t":"text"}, |
| "ItalyGPT": {"m":["gpt-4o"],"t":"text"}, |
| "LMArena": {"m":["default"],"t":"text"}, |
| "MetaAI": {"m":["meta-ai"],"t":"text"}, |
| "Microsoft_Phi_4_Multimodal": {"m":["phi-4-multimodal","phi-4"],"t":"text"}, |
| "Nvidia": {"m":["openai/gpt-oss-120b"],"t":"text"}, |
| "Ollama": {"m":["gemini-3-flash-preview","gpt-oss-120b","gpt-oss-20b"],"t":"text"}, |
| "OpenAIFM": {"m":["coral","friendly","patient_teacher","noir_detective","cowboy","calm","scientific_style","alloy","ash","ballad","echo","fable","onyx","nova","sage","shimmer","verse","gpt-4o-mini-tts"],"t":"text"}, |
| "OpenRouterFree": {"m":["openrouter/free"],"t":"text"}, |
| "OpenaiChat": {"m":["auto","gpt-5","gpt-5-instant","gpt-4","gpt-4.1","gpt-4.1-mini","gpt-4.5","gpt-4o","gpt-4o-mini","o1","o1-mini","o3-mini","o3-mini-high","o4-mini","o4-mini-high","gpt-image"],"t":"both"}, |
| "OperaAria": {"m":["aria"],"t":"both"}, |
| "Perplexity": {"m":["auto","turbo","gpt41","gpt5","gpt5_thinking","o3","o3pro","claude2","claude37sonnetthinking","claude40opus","claude40opusthinking","claude45sonnet","claude45sonnetthinking","experimental","grok","grok4","gemini2flash","pplx_pro","pplx_pro_upgraded","o4mini"],"t":"text"}, |
| "Pi": {"m":["pi"],"t":"text"}, |
| "PollinationsAI": {"m":["openai-fast","gpt-4.1-nano","llama-4-scout","deepseek-r1","mistral-small-3.1-24b","qwen-2.5-coder-32b","sdxl-turbo","gpt-image","flux-dev","flux-schnell","flux-pro","flux","flux-kontext","llamascout","deepseek-reasoning","mistral-small","qwen-3-coder","turbo","gptimage","kontext"],"t":"text"}, |
| "PollinationsImage": {"m":["flux","flux-dev","flux-schnell","flux-pro","flux-kontext","sdxl-turbo","turbo","kontext"],"t":"image"}, |
| "Qwen": {"m":["qwen3-235b-a22b","qwen3-max-preview","qwen-plus-2025-09-11","qwen3-coder-plus","qwen3-30b-a3b","qwen3-coder-30b-a3b-instruct","qwen-max-latest","qwq-32b","qwen-turbo-2025-02-11","qwen2.5-omni-7b","qvq-72b-preview-0310","qwen2.5-vl-32b-instruct","qwen2.5-14b-instruct-1m","qwen2.5-coder-32b-instruct","qwen2.5-72b-instruct"],"t":"both"}, |
| "Qwen_Qwen_2_5": {"m":["qwen-qwen2-5","qwen-2.5"],"t":"text"}, |
| "Qwen_Qwen_2_5M": {"m":["qwen-2.5-1m-demo","qwen-2.5-1m"],"t":"text"}, |
| "Qwen_Qwen_2_5_Max": {"m":["qwen-qwen2-5-max","qwen-2.5-max"],"t":"text"}, |
| "Qwen_Qwen_2_72B": {"m":["qwen-qwen2-72b-instruct","qwen-2-72b"],"t":"text"}, |
| "Qwen_Qwen_3": {"m":["qwen-3-235b","qwen-3-30b","qwen-3-32b","qwen-3-14b","qwen-3-4b","qwen-3-1.7b","qwen-3-0.6b"],"t":"text"}, |
| "StabilityAI_SD35Large": {"m":["stabilityai-stable-diffusion-3-5-large","sd-3.5-large"],"t":"both"}, |
| "TeachAnything": {"m":["gemma"],"t":"text"}, |
| "WeWordle": {"m":["gpt-4"],"t":"text"}, |
| "Yqcloud": {"m":["gpt-4"],"t":"text"}, |
| } |
|
|
| |
| |
| LITELLM_PROVIDERS = { |
| "groq": { |
| "label": "Groq (Ω
Ψ¬Ψ§ΩΩ Ψ³Ψ±ΩΨΉ)", |
| "models": [ |
| "groq/llama-3.3-70b-versatile", |
| "groq/llama-3.1-70b-versatile", |
| "groq/llama-3.1-8b-instant", |
| "groq/llama3-70b-8192", |
| "groq/llama3-8b-8192", |
| "groq/gemma2-9b-it", |
| "groq/deepseek-r1-distill-llama-70b", |
| "groq/mixtral-8x7b-32768", |
| ], |
| "env_key": "GROQ_API_KEY", |
| "needs_auth": True, |
| "free_tier": True, |
| }, |
| "openai": { |
| "label": "OpenAI", |
| "models": [ |
| "openai/gpt-4o", |
| "openai/gpt-4o-mini", |
| "openai/gpt-4-turbo", |
| "openai/gpt-4.1", |
| "openai/gpt-4.1-mini", |
| "openai/gpt-4.1-nano", |
| "openai/o1", |
| "openai/o1-mini", |
| "openai/o3-mini", |
| "openai/o4-mini", |
| ], |
| "env_key": "OPENAI_API_KEY", |
| "needs_auth": True, |
| "free_tier": False, |
| }, |
| "anthropic": { |
| "label": "Anthropic (Claude)", |
| "models": [ |
| "anthropic/claude-3-5-sonnet-20241022", |
| "anthropic/claude-3-5-haiku-20241022", |
| "anthropic/claude-3-opus-20240229", |
| "anthropic/claude-3-haiku-20240307", |
| "anthropic/claude-3-7-sonnet-20250219", |
| ], |
| "env_key": "ANTHROPIC_API_KEY", |
| "needs_auth": True, |
| "free_tier": False, |
| }, |
| "together_ai": { |
| "label": "Together AI", |
| "models": [ |
| "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo", |
| "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", |
| "together_ai/meta-llama/Llama-4-Scout-17B-16E-Instruct", |
| "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", |
| "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1", |
| "together_ai/deepseek-ai/DeepSeek-R1", |
| "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo", |
| ], |
| "env_key": "TOGETHERAI_API_KEY", |
| "needs_auth": True, |
| "free_tier": True, |
| }, |
| "openrouter": { |
| "label": "OpenRouter", |
| "models": [ |
| "openrouter/meta-llama/llama-3.3-70b-instruct", |
| "openrouter/deepseek/deepseek-chat", |
| "openrouter/deepseek/deepseek-r1", |
| "openrouter/mistralai/mistral-large", |
| "openrouter/qwen/qwen-2.5-72b-instruct", |
| "openrouter/google/gemma-3-27b-it", |
| "openrouter/microsoft/phi-4", |
| ], |
| "env_key": "OPENROUTER_API_KEY", |
| "needs_auth": True, |
| "free_tier": True, |
| }, |
| "gemini": { |
| "label": "Google Gemini", |
| "models": [ |
| "gemini/gemini-2.0-flash", |
| "gemini/gemini-2.0-flash-lite", |
| "gemini/gemini-1.5-flash", |
| "gemini/gemini-1.5-flash-8b", |
| "gemini/gemini-2.5-flash-preview-04-17", |
| ], |
| "env_key": "GEMINI_API_KEY", |
| "needs_auth": True, |
| "free_tier": True, |
| }, |
| "cohere": { |
| "label": "Cohere", |
| "models": [ |
| "cohere/command-r-plus", |
| "cohere/command-r", |
| "cohere/command-a-03-2025", |
| ], |
| "env_key": "COHERE_API_KEY", |
| "needs_auth": True, |
| "free_tier": True, |
| }, |
| "huggingface": { |
| "label": "HuggingFace Inference", |
| "models": [ |
| "huggingface/meta-llama/Llama-3.3-70B-Instruct", |
| "huggingface/Qwen/Qwen2.5-72B-Instruct", |
| "huggingface/mistralai/Mistral-7B-Instruct-v0.3", |
| "huggingface/microsoft/Phi-3.5-mini-instruct", |
| ], |
| "env_key": "HF_TOKEN", |
| "needs_auth": True, |
| "free_tier": True, |
| }, |
| } |
|
|
| PROVIDERS = {} |
|
|
| def build_providers(): |
| """Build the PROVIDERS dict shown in /api/providers.""" |
| global PROVIDERS |
| p = {} |
|
|
| |
| for prefix, info in LITELLM_PROVIDERS.items(): |
| env_key = info["env_key"] |
| key_set = bool(os.getenv(env_key)) |
| label = f"π {info['label']}" if key_set else f"π {info['label']} (ΩΨΨͺΨ§Ψ¬ {env_key})" |
| p[f"litellm:{prefix}"] = { |
| "models": info["models"], |
| "type": "text", |
| "needs_auth": True, |
| "key_set": key_set, |
| "source": "litellm", |
| "label": label, |
| "free_tier": info.get("free_tier", False), |
| } |
|
|
| |
| if G4F_OK and g4f_prov: |
| for name in sorted(dir(g4f_prov)): |
| if name in _SKIP or name.startswith('_'): |
| continue |
| if name in BLOCKED_PROVIDERS: |
| continue |
| cls = getattr(g4f_prov, name, None) |
| if not inspect.isclass(cls): |
| continue |
| if not (hasattr(cls,'create_completion') or hasattr(cls,'create_async_generator')): |
| continue |
| if not bool(getattr(cls,'working', False)): |
| continue |
| if bool(getattr(cls,'needs_auth', False)): |
| continue |
|
|
| models = _collect_models(cls) |
| img = list(getattr(cls,'image_models',None) or []) |
| ptype = 'image' if (img and not models) else 'both' if img else 'text' |
|
|
| p[f"g4f:{name}"] = { |
| "models": models if models else ["default"], |
| "type": ptype, |
| "needs_auth": False, |
| "source": "g4f", |
| "label": f"β‘ {name}", |
| } |
| else: |
| |
| for name, info in G4F_STATIC_PROVIDERS.items(): |
| if name in BLOCKED_PROVIDERS: |
| continue |
| p[f"g4f:{name}"] = { |
| "models": info["m"], |
| "type": info["t"], |
| "needs_auth": False, |
| "source": "g4f", |
| "label": f"β‘ {name}", |
| } |
|
|
| |
| PROVIDERS = { |
| "Auto": { |
| "models": [DEFAULT_MODEL, "groq/llama-3.3-70b-versatile", |
| "groq/llama-3.1-8b-instant", "openai/gpt-4o", |
| "openai/gpt-4o-mini"], |
| "type": "text", |
| "needs_auth": False, |
| "source": "auto", |
| "label": "β‘ Auto β ΨͺΩΩΨ§Ψ¦Ω", |
| } |
| } |
| PROVIDERS.update(p) |
| log.info(f"Providers built: {len(PROVIDERS)} " |
| f"(litellm={len(LITELLM_AVAILABLE_PROVIDERS)} keys, " |
| f"g4f={'ok' if G4F_OK else 'missing'})") |
|
|
| build_providers() |
|
|
| |
| |
| |
| _rl: dict = {} |
| _rl_lock = threading.Lock() |
|
|
| def _check_rl(ip: str) -> bool: |
| now = time.time() |
| with _rl_lock: |
| ts = [t for t in _rl.get(ip, []) if now - t < 60] |
| if len(ts) >= RL_MAX: return False |
| ts.append(now); _rl[ip] = ts |
| return True |
|
|
| def _get_ip() -> str: |
| return request.headers.get('X-Forwarded-For', request.remote_addr or '?') |
|
|
| def _check_auth() -> bool: |
| if not API_KEY: return True |
| return request.headers.get('Authorization', '') == f'Bearer {API_KEY}' |
|
|
| |
| |
| |
| _RE_THINKING = re.compile( |
| r'<(?:antml:)?thinking>(.*?)</(?:antml:)?thinking>', re.DOTALL | re.IGNORECASE) |
| _RE_THINK = re.compile(r'<think>(.*?)</think>', re.DOTALL | re.IGNORECASE) |
|
|
| def _fix_unicode(text: str) -> str: |
| if '\\u' not in text: return text |
| try: |
| return text.encode('utf-8').decode('unicode_escape', errors='replace') |
| except Exception: |
| return re.sub(r'\\u([0-9a-fA-F]{4})', |
| lambda m: chr(int(m.group(1),16)), text) |
|
|
| def _extract_thinking(text: str): |
| for pat in (_RE_THINKING, _RE_THINK): |
| m = pat.search(text) |
| if m: |
| raw = m.group(1).strip() |
| lines = [l.strip() for l in raw.splitlines() if l.strip()][:30] |
| text = (text[:m.start()] + text[m.end():]).strip() |
| return lines, text |
| return [], text |
|
|
| def clean_response(raw: str): |
| text = _fix_unicode(raw or '').replace('\\n','\n').replace('\\t','\t') |
| thinking, answer = _extract_thinking(text) |
| return {'thinking': thinking, 'answer': answer.strip()} |
|
|
| |
| |
| |
| def build_messages(message, system_prompt, history, conv_id): |
| msgs = [] |
| if system_prompt: |
| msgs.append({'role':'system','content':system_prompt}) |
| rows = db_history(conv_id) if conv_id else history |
| for h in rows[-20:]: |
| role = h.get('role','user') |
| if role not in ('user','assistant','system'): role = 'user' |
| if h.get('content',''): |
| msgs.append({'role':role,'content':h['content']}) |
| msgs.append({'role':'user','content':message}) |
| return msgs |
|
|
| def extra_kwargs(data): |
| kw = {} |
| for k in ('temperature','top_p','max_tokens'): |
| v = data.get(k) |
| if v is not None: |
| try: kw[k] = float(v) if k != 'max_tokens' else int(v) |
| except: pass |
| return kw |
|
|
| |
| |
| |
| def _parse_provider(provider: str): |
| """ |
| Returns (source, name_or_model). |
| 'Auto' β ('auto', DEFAULT_MODEL) |
| 'litellm:groq' β ('litellm', 'groq') |
| 'g4f:OpenaiChat'β ('g4f', 'OpenaiChat') |
| legacy bare nameβ ('g4f', name) # backward compat |
| """ |
| if provider == 'Auto' or not provider: |
| return 'auto', DEFAULT_MODEL |
| if ':' in provider: |
| src, name = provider.split(':', 1) |
| return src, name |
| |
| return 'g4f', provider |
|
|
| def _get_g4f_cls(name: str): |
| if not G4F_OK or not g4f_prov: return None |
| return getattr(g4f_prov, name, None) |
|
|
| def _litellm_model_for(source, name, client_model): |
| """ |
| Resolve the final litellm model string. |
| source='litellm', name='groq' β use client_model (e.g. groq/llama-3.3-70b-versatile) |
| source='litellm', name='openai' β use client_model |
| source='auto' β use DEFAULT_MODEL (override with client_model if it looks like provider/model) |
| """ |
| if source == 'auto': |
| |
| if client_model and '/' in client_model: |
| return client_model |
| return DEFAULT_MODEL |
| |
| if client_model and '/' in client_model: |
| return client_model |
| |
| info = LITELLM_PROVIDERS.get(name) |
| if info and info['models']: |
| return info['models'][0] |
| return DEFAULT_MODEL |
|
|
| |
| |
| |
| def litellm_sync(messages, model, extra=None) -> dict: |
| """Call LiteLLM synchronously. Returns {thinking, answer}.""" |
| if not LITELLM_OK: |
| raise RuntimeError("LiteLLM not available") |
| kw = {'model': model, 'messages': messages, 'stream': False} |
| if extra: kw.update(extra) |
| resp = litellm.completion(**kw) |
| raw = resp.choices[0].message.content or '' |
| return clean_response(raw) |
|
|
| |
| |
| |
| def litellm_stream(messages, model, conv_id, user_msg, extra=None): |
| """Real SSE generator using LiteLLM streaming.""" |
| if not LITELLM_OK: |
| raise RuntimeError("LiteLLM not available") |
|
|
| kw = {'model': model, 'messages': messages, 'stream': True} |
| if extra: kw.update(extra) |
|
|
| yield f"data: {json.dumps({'type':'start','conversation_id':conv_id,'model':model,'provider':'litellm'})}\n\n" |
|
|
| |
| OPEN_RE = re.compile(r'<(?:antml:)?think(?:ing)?>', re.I) |
| CLOSE_RE = re.compile(r'</(?:antml:)?think(?:ing)?>', re.I) |
|
|
| buf = '' |
| in_think = False |
| think_done = False |
| full_answer = '' |
| DETECT_LIMIT = 2000 |
|
|
| for chunk in litellm.completion(**kw): |
| delta = chunk.choices[0].delta if chunk.choices else None |
| text = (delta.content if delta and delta.content else '') or '' |
| if not text: continue |
|
|
| if not think_done: |
| buf += text |
| if not in_think: |
| om = OPEN_RE.search(buf) |
| if om: |
| in_think = True |
| before = _fix_unicode(buf[:om.start()]) |
| if before: |
| full_answer += before |
| yield f"data: {json.dumps({'type':'chunk','content':before})}\n\n" |
| buf = buf[om.end():] |
| elif len(buf) > DETECT_LIMIT: |
| think_done = True |
| t = _fix_unicode(buf) |
| full_answer += t |
| yield f"data: {json.dumps({'type':'chunk','content':t})}\n\n" |
| buf = '' |
|
|
| if in_think: |
| cm = CLOSE_RE.search(buf) |
| if cm: |
| lines = [l.strip() for l in buf[:cm.start()].strip().splitlines() if l.strip()][:30] |
| if lines: |
| yield f"data: {json.dumps({'type':'thinking','lines':lines})}\n\n" |
| remainder = _fix_unicode(buf[cm.end():]) |
| buf = ''; in_think = False; think_done = True |
| if remainder: |
| full_answer += remainder |
| yield f"data: {json.dumps({'type':'chunk','content':remainder})}\n\n" |
| else: |
| t = _fix_unicode(text) |
| full_answer += t |
| yield f"data: {json.dumps({'type':'chunk','content':t})}\n\n" |
|
|
| |
| if buf: |
| if in_think: |
| lines = [l.strip() for l in buf.strip().splitlines() if l.strip()][:30] |
| if lines: |
| yield f"data: {json.dumps({'type':'thinking','lines':lines})}\n\n" |
| else: |
| t = _fix_unicode(buf) |
| full_answer += t |
| yield f"data: {json.dumps({'type':'chunk','content':t})}\n\n" |
|
|
| yield f"data: {json.dumps({'type':'done'})}\n\n" |
|
|
| |
| if conv_id and full_answer.strip(): |
| db_add(conv_id, 'user', user_msg, model, 'litellm') |
| db_add(conv_id, 'assistant', full_answer, model, 'litellm') |
|
|
| |
| |
| |
| def g4f_sync(messages, model, provider_name, extra=None) -> dict: |
| if not G4F_OK: |
| raise RuntimeError("g4f not available") |
| cls = _get_g4f_cls(provider_name) |
| kw = {'model': model or 'gpt-4o', 'messages': messages, 'stream': False} |
| if cls: kw['provider'] = cls |
| if extra: kw.update(extra) |
| resp = g4f.ChatCompletion.create(**kw) |
| raw = (resp if isinstance(resp,str) else |
| resp.choices[0].message.content if (hasattr(resp,'choices') and resp.choices) else |
| str(resp)) |
| return clean_response(raw) |
|
|
| |
| |
| |
| def g4f_stream(messages, model, provider_name, conv_id, user_msg, extra=None): |
| """Real SSE generator using g4f streaming with think-tag state machine.""" |
| if not G4F_OK: |
| raise RuntimeError("g4f not available") |
|
|
| cls = _get_g4f_cls(provider_name) |
| kw = {'model': model or 'gpt-4o', 'messages': messages, 'stream': True} |
| if cls: kw['provider'] = cls |
| if extra: kw.update(extra) |
|
|
| yield f"data: {json.dumps({'type':'start','conversation_id':conv_id,'model':model,'provider':f'g4f:{provider_name}'})}\n\n" |
|
|
| OPEN_RE = re.compile(r'<(?:antml:)?think(?:ing)?>', re.I) |
| CLOSE_RE = re.compile(r'</(?:antml:)?think(?:ing)?>', re.I) |
|
|
| buf, in_think, think_done, full_answer = '', False, False, '' |
| DETECT_LIMIT = 2000 |
|
|
| def _chunk_text(c): |
| if isinstance(c, str): return c |
| if hasattr(c,'choices') and c.choices: |
| return getattr(c.choices[0].delta,'content','') or '' |
| return str(c) if c else '' |
|
|
| try: |
| for raw_chunk in g4f.ChatCompletion.create(**kw): |
| text = _chunk_text(raw_chunk) |
| if not text: continue |
|
|
| if not think_done: |
| buf += text |
| if not in_think: |
| om = OPEN_RE.search(buf) |
| if om: |
| in_think = True |
| before = _fix_unicode(buf[:om.start()]) |
| if before: |
| full_answer += before |
| yield f"data: {json.dumps({'type':'chunk','content':before})}\n\n" |
| buf = buf[om.end():] |
| elif len(buf) > DETECT_LIMIT: |
| think_done = True |
| t = _fix_unicode(buf) |
| full_answer += t |
| yield f"data: {json.dumps({'type':'chunk','content':t})}\n\n" |
| buf = '' |
|
|
| if in_think: |
| cm = CLOSE_RE.search(buf) |
| if cm: |
| lines = [l.strip() for l in buf[:cm.start()].strip().splitlines() if l.strip()][:30] |
| if lines: |
| yield f"data: {json.dumps({'type':'thinking','lines':lines})}\n\n" |
| remainder = _fix_unicode(buf[cm.end():]) |
| buf = ''; in_think = False; think_done = True |
| if remainder: |
| full_answer += remainder |
| yield f"data: {json.dumps({'type':'chunk','content':remainder})}\n\n" |
| else: |
| t = _fix_unicode(text) |
| full_answer += t |
| yield f"data: {json.dumps({'type':'chunk','content':t})}\n\n" |
|
|
| except Exception as e: |
| log.warning(f"g4f stream error [{provider_name}]: {e}") |
| yield f"data: {json.dumps({'type':'error','content':str(e)[:200]})}\n\n" |
|
|
| if buf: |
| if in_think: |
| lines = [l.strip() for l in buf.strip().splitlines() if l.strip()][:30] |
| if lines: |
| yield f"data: {json.dumps({'type':'thinking','lines':lines})}\n\n" |
| else: |
| t = _fix_unicode(buf) |
| full_answer += t |
| yield f"data: {json.dumps({'type':'chunk','content':t})}\n\n" |
|
|
| yield f"data: {json.dumps({'type':'done'})}\n\n" |
|
|
| if conv_id and full_answer.strip(): |
| db_add(conv_id, 'user', user_msg, model, f'g4f:{provider_name}') |
| db_add(conv_id, 'assistant', full_answer, model, f'g4f:{provider_name}') |
|
|
| |
| |
| |
| def call_sync(messages, client_model, provider, extra=None) -> dict: |
| """ |
| Route: |
| Auto / litellm:* β try LiteLLM first, fallback to g4f |
| g4f:* β go directly to g4f |
| """ |
| source, name = _parse_provider(provider) |
|
|
| |
| if source == 'g4f': |
| return g4f_sync(messages, client_model, name, extra) |
|
|
| |
| if LITELLM_OK: |
| ll_model = _litellm_model_for(source, name, client_model) |
| try: |
| result = litellm_sync(messages, ll_model, extra) |
| log.info(f"LiteLLM sync OK [{ll_model}]") |
| return result |
| except Exception as e: |
| log.warning(f"LiteLLM failed [{ll_model}]: {e} β falling back to g4f") |
|
|
| |
| if G4F_OK: |
| return g4f_sync(messages, client_model or 'gpt-4o', 'Auto', extra) |
|
|
| return {'thinking':[], 'answer':'β ΩΨ§ ΩΩΨ¬Ψ― Ω
Ψ²ΩΨ― Ω
ΨͺΨ§Ψ β ΩΨ±Ψ¬Ω ΨΆΨ¨Ψ· API key'} |
|
|
| |
| |
| |
| def call_stream(messages, client_model, provider, conv_id, user_msg, extra=None): |
| """ |
| Yields SSE events. Same routing logic as call_sync. |
| Falls back from LiteLLM to g4f transparently. |
| """ |
| source, name = _parse_provider(provider) |
|
|
| |
| if source == 'g4f': |
| yield from g4f_stream(messages, client_model, name, conv_id, user_msg, extra) |
| return |
|
|
| |
| if LITELLM_OK: |
| ll_model = _litellm_model_for(source, name, client_model) |
| try: |
| yield from litellm_stream(messages, ll_model, conv_id, user_msg, extra) |
| log.info(f"LiteLLM stream OK [{ll_model}]") |
| return |
| except Exception as e: |
| log.warning(f"LiteLLM stream failed [{ll_model}]: {e} β falling back to g4f") |
| |
| yield f"data: {json.dumps({'type':'info','content':f'LiteLLM ΩΨ΄ΩΨ Ψ¬Ψ§Ψ±Ω Ψ§ΩΨͺΨΩΩΩ Ψ₯ΩΩ g4f...'})}\n\n" |
|
|
| |
| if G4F_OK: |
| yield from g4f_stream(messages, client_model or 'gpt-4o', 'Auto', conv_id, user_msg, extra) |
| return |
|
|
| yield f"data: {json.dumps({'type':'error','content':'ΩΨ§ ΩΩΨ¬Ψ― Ω
Ψ²ΩΨ― Ω
ΨͺΨ§Ψ'})}\n\n" |
| yield f"data: {json.dumps({'type':'done'})}\n\n" |
|
|
| |
| |
| |
| @app.route('/') |
| def index(): return send_from_directory('.', 'index.html') |
|
|
| @app.route('/<path:fn>') |
| def statics(fn): return send_from_directory('.', fn) |
|
|
| |
| |
| |
| @app.route('/api/health') |
| def health(): |
| return jsonify({ |
| 'ok': True, |
| 'g4f': G4F_OK, |
| 'litellm': LITELLM_OK, |
| 'litellm_keys': LITELLM_AVAILABLE_PROVIDERS, |
| 'providers': len(PROVIDERS), |
| 'ts': datetime.utcnow().isoformat() + 'Z', |
| }) |
|
|
| @app.route('/api/config') |
| def api_config(): |
| return jsonify({ |
| 'ok': True, |
| 'default_model': DEFAULT_MODEL, |
| 'max_len': MAX_LEN, |
| 'rate_limit': RL_MAX, |
| 'max_history': MAX_HISTORY, |
| 'auth_required': bool(API_KEY), |
| 'litellm': LITELLM_OK, |
| 'litellm_providers': LITELLM_AVAILABLE_PROVIDERS, |
| 'g4f': G4F_OK, |
| }) |
|
|
| @app.route('/api/providers') |
| def api_providers(): |
| out = {} |
| for pkey, info in PROVIDERS.items(): |
| out[pkey] = { |
| 'name': pkey, |
| 'label': info.get('label', pkey), |
| 'models': info['models'], |
| 'type': info.get('type', 'text'), |
| 'needs_auth': info.get('needs_auth', False), |
| 'source': info.get('source', 'g4f'), |
| 'key_set': info.get('key_set'), |
| } |
| return jsonify({'ok': True, 'providers': out, 'total': len(out)}) |
|
|
| @app.route('/api/models/<path:pname>') |
| def api_models(pname): |
| info = PROVIDERS.get(pname) |
| if info: |
| return jsonify({'ok': True, 'models': info['models']}) |
|
|
| |
| if G4F_OK and g4f_prov: |
| bare = pname.replace('g4f:', '') |
| cls = getattr(g4f_prov, bare, None) |
| if cls: |
| models = _collect_models(cls) |
| if models: |
| return jsonify({'ok': True, 'models': models}) |
|
|
| return jsonify({'ok': False, 'error': f'{pname} not found'}), 404 |
|
|
| @app.route('/api/reload', methods=['POST']) |
| def api_reload(): |
| build_providers() |
| return jsonify({'ok': True, 'providers': len(PROVIDERS)}) |
|
|
| |
| |
| |
| @app.route('/api/conversations') |
| def api_list_convs(): |
| return jsonify({'ok': True, 'conversations': db_list_convs()}) |
|
|
| @app.route('/api/conversation/<cid>') |
| def api_get_conv(cid): |
| return jsonify({'ok': True, 'conversation_id': cid, 'messages': db_history(cid, 50)}) |
|
|
| @app.route('/api/conversation/<cid>', methods=['DELETE']) |
| def api_del_conv(cid): |
| db_delete(cid) |
| return jsonify({'ok': True, 'conversation_id': cid}) |
|
|
| |
| |
| |
| @app.route('/chat', methods=['POST']) |
| def chat(): |
| if not _check_auth(): return jsonify({'ok':False,'error':'Unauthorized'}), 401 |
| if not _check_rl(_get_ip()): return jsonify({'ok':False,'error':'Rate limit'}), 429 |
|
|
| data = request.get_json(silent=True) or {} |
| message = (data.get('message') or '').strip() |
| model = (data.get('model') or DEFAULT_MODEL).strip() |
| provider = (data.get('provider') or 'Auto').strip() |
| sys_p = (data.get('system_prompt') or '').strip() |
| history = data.get('conversation_history') or [] |
| conv_id = (data.get('conversation_id') or '').strip() or str(uuid.uuid4()) |
|
|
| if not message: return jsonify({'ok':False,'error':'Ψ§ΩΨ±Ψ³Ψ§ΩΨ© Ω
Ψ·ΩΩΨ¨Ψ©'}), 400 |
| if len(message) > MAX_LEN: return jsonify({'ok':False,'error':f'Ψ§ΩΨ±Ψ³Ψ§ΩΨ© Ψ·ΩΩΩΨ© (max {MAX_LEN})'}), 400 |
|
|
| msgs = build_messages(message, sys_p, history, conv_id) |
| extra = extra_kwargs(data) |
|
|
| try: |
| t0 = time.time() |
| result = call_sync(msgs, model, provider, extra) |
| dur = round(time.time()-t0, 2) |
|
|
| |
| source, _ = _parse_provider(provider) |
| db_add(conv_id, 'user', message, model, provider) |
| db_add(conv_id, 'assistant', result['answer'], model, provider) |
|
|
| log.info(f"chat OK [{provider}/{model}] {dur}s conv={conv_id[:8]}") |
| return jsonify({ |
| 'ok': True, |
| 'reply': result['answer'], |
| 'thinking': result['thinking'], |
| 'model': model, |
| 'provider': provider, |
| 'time': dur, |
| 'conversation_id': conv_id, |
| }) |
| except Exception as e: |
| log.error(f"chat ERR [{provider}/{model}]: {e}") |
| return jsonify({'ok':False,'error':str(e)[:300]}), 502 |
|
|
| @app.route('/chat/stream', methods=['POST']) |
| def chat_stream(): |
| if not _check_auth(): |
| def _ua(): |
| yield f"data: {json.dumps({'type':'error','content':'Unauthorized'})}\n\n" |
| yield f"data: {json.dumps({'type':'done'})}\n\n" |
| return Response(_ua(), mimetype='text/event-stream'), 401 |
|
|
| if not _check_rl(_get_ip()): |
| def _rl(): |
| yield f"data: {json.dumps({'type':'error','content':'Rate limit'})}\n\n" |
| yield f"data: {json.dumps({'type':'done'})}\n\n" |
| return Response(_rl(), mimetype='text/event-stream'), 429 |
|
|
| data = request.get_json(silent=True) or {} |
| message = (data.get('message') or '').strip() |
| model = (data.get('model') or DEFAULT_MODEL).strip() |
| provider = (data.get('provider') or 'Auto').strip() |
| sys_p = (data.get('system_prompt') or '').strip() |
| history = data.get('conversation_history') or [] |
| conv_id = (data.get('conversation_id') or '').strip() or str(uuid.uuid4()) |
|
|
| msgs = build_messages(message, sys_p, history, conv_id) |
| extra = extra_kwargs(data) |
|
|
| log.info(f"stream [{provider}/{model}] conv={conv_id[:8]} len={len(message)}") |
|
|
| return Response( |
| call_stream(msgs, model, provider, conv_id, message, extra), |
| mimetype='text/event-stream', |
| headers={ |
| 'Cache-Control': 'no-cache', |
| 'X-Accel-Buffering': 'no', |
| 'Connection': 'keep-alive', |
| }, |
| ) |
|
|
| |
| |
| |
| @app.errorhandler(404) |
| def e404(e): |
| if request.path.startswith('/api/'): |
| return jsonify({'ok':False,'error':'Not found'}), 404 |
| return send_from_directory('.', 'index.html') |
|
|
| @app.errorhandler(500) |
| def e500(e): return jsonify({'ok':False,'error':'Server error'}), 500 |
|
|
| |
| |
| |
| if __name__ == '__main__': |
| print(f"\n{'='*52}") |
| print(f" g4fpro v3.0 β port {PORT}") |
| print(f" LiteLLM : {'OK' if LITELLM_OK else 'MISSING'}") |
| if LITELLM_OK: |
| print(f" Keys set: {', '.join(LITELLM_AVAILABLE_PROVIDERS) or 'none'}") |
| print(f" DEFAULT : {DEFAULT_MODEL}") |
| print(f" g4f : {'OK' if G4F_OK else 'MISSING'}") |
| print(f" providers: {len(PROVIDERS)}") |
| print(f" db : {DB_PATH}") |
| print(f" auth : {'enabled' if API_KEY else 'disabled'}") |
| print(f"{'='*52}\n") |
| app.run(host='0.0.0.0', port=PORT, debug=False, threaded=True) |