import datetime as dt import json import os from pathlib import Path import urllib.error import urllib.request import gradio as gr from huggingface_hub import InferenceClient def _build_label() -> str: version_file = Path("VERSION") version_from_file = "" if version_file.exists(): version_from_file = version_file.read_text(encoding="utf-8").strip() commit = ( os.getenv("GITHUB_SHA") or os.getenv("COMMIT_SHA") or os.getenv("SPACE_COMMIT_SHA") or version_from_file or "local" ) short_commit = commit[:7] if commit != "local" else commit version = os.getenv("APP_VERSION") or short_commit deployed_at = dt.datetime.now(dt.UTC).strftime("%Y-%m-%d %H:%M:%S UTC") return f"Version: {version} | Commit: {short_commit} | Loaded: {deployed_at}" def _env(name: str, default: str = "") -> str: return (os.getenv(name) or default).strip() def _csv_env(name: str, default_csv: str) -> list: raw = _env(name, default_csv) values = [item.strip() for item in raw.split(",") if item.strip()] return values HF_TOKEN = _env("HF_TOKEN") HF_MODEL = _env("HF_MODEL", "zai-org/GLM-5.1") AI_BACKEND = _env("AI_BACKEND", "hf").lower() AI_MAX_TOKENS = int(_env("AI_MAX_TOKENS", "512")) AI_FALLBACK_ORDER = [ p.strip().lower() for p in _env("AI_FALLBACK_ORDER", "hf,google,github,openrouter,fireworks").split(",") if p.strip() ] GITHUB_TOKEN = _env("GITHUB_TOKEN") GITHUB_MODEL = _env("GITHUB_MODEL") OPENROUTER_API_KEY = _env("OPENROUTER_API_KEY") OPENROUTER_MODEL = _env("OPENROUTER_MODEL") FIREWORKS_API_KEY = _env("FIREWORKS_API_KEY") FIREWORKS_MODEL = _env("FIREWORKS_MODEL") GOOGLE_API_KEY = _env("GOOGLE_API_KEY") GOOGLE_MODEL = _env("GOOGLE_MODEL", "gemini-2.0-flash") HF_MODELS = _csv_env("HF_MODELS", HF_MODEL or "zai-org/GLM-5.1") GOOGLE_MODELS = _csv_env( "GOOGLE_MODELS", GOOGLE_MODEL or "gemini-2.0-flash,gemini-2.0-flash-lite,gemini-1.5-flash", ) GITHUB_MODELS = _csv_env( "GITHUB_MODELS", GITHUB_MODEL or "openai/gpt-4.1-mini,meta/Llama-3.3-70B-Instruct,mistral-ai/Mistral-Nemo-Instruct-2407", ) PROVIDER_MODELS = { "hf": HF_MODELS, "google": GOOGLE_MODELS, "github": GITHUB_MODELS, } PROVIDER_LABELS = { "hf": "Hugging Face (z.ai)", "google": "Google Gemini", "github": "GitHub Models", } UI_PROVIDERS = ["hf", "google", "github"] PRESET_CONFIGS = { "ultra-cheap": { "label": "Ultra Cheap", "provider": "google", "preferred_models": [ "gemini-2.0-flash-lite", "gemini-1.5-flash", "gemini-2.0-flash", ], "note": "Minimum-cost setup for rapid dev testing.", }, "budget-dev": { "label": "Budget Dev", "provider": "github", "preferred_models": [ "mistral-ai/Mistral-Nemo-Instruct-2407", "openai/gpt-4.1-mini", "meta/Llama-3.3-70B-Instruct", ], "note": "Lowest-cost first for rapid iteration.", }, "balanced": { "label": "Balanced", "provider": "google", "preferred_models": [ "gemini-2.0-flash", "gemini-1.5-flash", "gemini-2.0-flash-lite", ], "note": "Good quality and speed for day-to-day testing.", }, "quality-check": { "label": "Quality Check", "provider": "hf", "preferred_models": [ "zai-org/GLM-5.1", "zai-org/GLM-5", "zai-org/GLM-4.7", ], "note": "Higher-quality pass for final validation.", }, } # Explicit token passing helps avoid auth ambiguity across local and Space runtimes. hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient() def _runtime_label() -> str: active_model = { "hf": HF_MODEL, "google": GOOGLE_MODEL, "github": GITHUB_MODEL, "openrouter": OPENROUTER_MODEL, "fireworks": FIREWORKS_MODEL, }.get(AI_BACKEND, "") backend_name = AI_BACKEND.upper() model_text = active_model or "not-set" return f"Backend: {backend_name} | Model: {model_text}" def _default_model_for(provider: str) -> str: provider = (provider or "").lower() models = PROVIDER_MODELS.get(provider, []) if models: return models[0] if provider == "hf": return HF_MODEL if provider == "google": return GOOGLE_MODEL if provider == "github": return GITHUB_MODEL return "" def _model_choices_for(provider: str) -> tuple: provider = (provider or "").lower() choices = PROVIDER_MODELS.get(provider, []) selected = _default_model_for(provider) if selected and selected not in choices: choices = [selected] + choices return choices, selected def _on_provider_change(provider: str): choices, selected = _model_choices_for(provider) return gr.update(choices=choices, value=selected) def _pick_model_for_preset(provider: str, preferred_models: list) -> str: choices, selected = _model_choices_for(provider) for model in preferred_models: if model in choices: return model if choices: return choices[0] return selected def _apply_preset(preset_key: str): preset = PRESET_CONFIGS.get(preset_key) if not preset: return ( gr.update(), gr.update(), "Preset not found.", ) provider = preset["provider"] model_choices, _ = _model_choices_for(provider) selected_model = _pick_model_for_preset(provider, preset["preferred_models"]) if selected_model and selected_model not in model_choices: model_choices = [selected_model] + model_choices status = ( f"Applied preset: {preset['label']} | " f"Provider: {PROVIDER_LABELS.get(provider, provider)} | " f"Model: {selected_model or 'not-set'} | " f"{preset['note']}" ) return ( gr.update(value=provider), gr.update(choices=model_choices, value=selected_model), status, ) def _history_to_messages(history: list, user_message: str) -> list: messages = [] for item in history or []: if isinstance(item, dict): role = item.get("role") content = item.get("content") if role in {"user", "assistant", "system"} and content: messages.append({"role": role, "content": str(content)}) continue if isinstance(item, (list, tuple)) and len(item) == 2: user_msg, assistant_msg = item if user_msg: messages.append({"role": "user", "content": str(user_msg)}) if assistant_msg: messages.append({"role": "assistant", "content": str(assistant_msg)}) messages.append({"role": "user", "content": user_message}) return messages def _extract_content(choice_message: dict) -> str: content = choice_message.get("content", "") if isinstance(content, str): return content if isinstance(content, list): chunks = [] for part in content: if isinstance(part, dict) and part.get("type") == "text": chunks.append(str(part.get("text", ""))) return "".join(chunks).strip() return str(content) def _chat_openai_compatible( endpoint: str, api_key: str, model: str, messages: list, extra_headers=None, ) -> str: if not api_key: raise ValueError("API key is missing.") if not model: raise ValueError("Model is not configured.") payload = { "model": model, "messages": messages, "max_tokens": AI_MAX_TOKENS, } headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } if extra_headers: headers.update(extra_headers) request = urllib.request.Request( endpoint, data=json.dumps(payload).encode("utf-8"), headers=headers, method="POST", ) try: with urllib.request.urlopen(request, timeout=90) as response: body = json.loads(response.read().decode("utf-8")) except urllib.error.HTTPError as exc: details = exc.read().decode("utf-8", errors="ignore") raise RuntimeError(f"HTTP {exc.code}: {details[:300]}") from exc choices = body.get("choices") or [] if not choices: raise RuntimeError("No choices returned from provider.") message = choices[0].get("message") or {} return _extract_content(message) or "(empty response)" def _chat_hf(messages: list, model_override: str = "") -> str: model = model_override or _default_model_for("hf") response = hf_client.chat_completion( model=model, messages=messages, max_tokens=AI_MAX_TOKENS, ) return response.choices[0].message.content or "(empty response)" def _chat_github(messages: list, model_override: str = "") -> str: model = model_override or _default_model_for("github") return _chat_openai_compatible( endpoint="https://models.github.ai/inference/chat/completions", api_key=GITHUB_TOKEN, model=model, messages=messages, ) def _chat_openrouter(messages: list) -> str: return _chat_openai_compatible( endpoint="https://openrouter.ai/api/v1/chat/completions", api_key=OPENROUTER_API_KEY, model=OPENROUTER_MODEL, messages=messages, extra_headers={ "HTTP-Referer": _env("OPENROUTER_REFERER", "https://huggingface.co"), "X-Title": _env("OPENROUTER_APP_NAME", "hf-multi-provider-chat"), }, ) def _chat_fireworks(messages: list) -> str: return _chat_openai_compatible( endpoint="https://api.fireworks.ai/inference/v1/chat/completions", api_key=FIREWORKS_API_KEY, model=FIREWORKS_MODEL, messages=messages, ) def _chat_google(messages: list, model_override: str = "") -> str: model = model_override or _default_model_for("google") if not GOOGLE_API_KEY: raise ValueError("GOOGLE_API_KEY is missing.") if not model: raise ValueError("GOOGLE_MODEL is not configured.") contents = [] for msg in messages: role = msg.get("role") text = str(msg.get("content", "")) if not text: continue if role == "assistant": contents.append({"role": "model", "parts": [{"text": text}]}) elif role in {"user", "system"}: contents.append({"role": "user", "parts": [{"text": text}]}) endpoint = ( f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent" f"?key={GOOGLE_API_KEY}" ) payload = { "contents": contents, "generationConfig": {"maxOutputTokens": AI_MAX_TOKENS}, } request = urllib.request.Request( endpoint, data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST", ) try: with urllib.request.urlopen(request, timeout=90) as response: body = json.loads(response.read().decode("utf-8")) except urllib.error.HTTPError as exc: details = exc.read().decode("utf-8", errors="ignore") raise RuntimeError(f"HTTP {exc.code}: {details[:300]}") from exc candidates = body.get("candidates") or [] if not candidates: raise RuntimeError("No candidates returned from Gemini.") parts = (candidates[0].get("content") or {}).get("parts") or [] text_chunks = [str(part.get("text", "")) for part in parts if isinstance(part, dict)] result = "".join(text_chunks).strip() return result or "(empty response)" def _chat_once(backend: str, messages: list, model_override: str = "") -> str: if backend == "hf": return _chat_hf(messages, model_override) if backend == "google": return _chat_google(messages, model_override) if backend == "github": return _chat_github(messages, model_override) if backend == "openrouter": return _chat_openrouter(messages) if backend == "fireworks": return _chat_fireworks(messages) raise ValueError( f"Unsupported AI_BACKEND='{backend}'. Use one of: hf, google, github, openrouter, fireworks, auto" ) def chat_response(message: str, history: list, selected_provider: str, selected_model: str) -> str: """Send a user message using the configured backend and return assistant text.""" if not message or not message.strip(): return "Please enter a message." messages = _history_to_messages(history, message.strip()) provider = (selected_provider or AI_BACKEND or "hf").lower().strip() selected_model = (selected_model or "").strip() try: if provider == "auto": errors = [] for backend in AI_FALLBACK_ORDER: try: return _chat_once(backend, messages) except Exception as exc: # noqa: BLE001 errors.append(f"{backend}: {exc}") return "All providers failed. " + " | ".join(errors) return _chat_once(provider, messages, model_override=selected_model) except Exception as e: return f"Error: {str(e)}" with gr.Blocks(title="GitHub + HuggingFace + AI Chat Demo") as demo: gr.Markdown("# GitHub → HuggingFace → AI Chat") gr.Markdown(f"**{_build_label()}**") gr.Markdown( "Multi-provider chat app for learning and testing across HF, Gemini, GitHub Models, OpenRouter, and Fireworks." ) gr.Markdown(f"**{_runtime_label()}**") initial_provider = AI_BACKEND if AI_BACKEND in UI_PROVIDERS else "hf" initial_model_choices, initial_model = _model_choices_for(initial_provider) preset_dropdown = gr.Dropdown( label="Quick Preset", choices=[(v["label"], k) for k, v in PRESET_CONFIGS.items()], value="ultra-cheap", ) preset_apply_btn = gr.Button("Apply Preset") preset_status = gr.Markdown("Preset tip: start with Ultra Cheap while iterating.") provider_dropdown = gr.Dropdown( label="Provider", choices=[(PROVIDER_LABELS[p], p) for p in UI_PROVIDERS], value=initial_provider, ) model_dropdown = gr.Dropdown( label="Model", choices=initial_model_choices, value=initial_model, allow_custom_value=True, ) provider_dropdown.change( fn=_on_provider_change, inputs=[provider_dropdown], outputs=[model_dropdown], ) preset_apply_btn.click( fn=_apply_preset, inputs=[preset_dropdown], outputs=[provider_dropdown, model_dropdown, preset_status], ) gr.ChatInterface( chat_response, examples=[ ["What is the capital of France?", "google", "gemini-2.0-flash-lite"], ["Explain quantum computing in simple terms.", "github", "openai/gpt-4.1-mini"], ["Give me a low-cost model selection strategy for dev vs prod.", "hf", "zai-org/GLM-4.7"], ], additional_inputs=[provider_dropdown, model_dropdown], title=None, description="Ask me anything!", ) if __name__ == "__main__": # server_name="0.0.0.0" is required inside HF Space containers. # root_path ensures Gradio resolves JS/CSS assets correctly when running # behind a reverse proxy or custom domain. _root_path = os.getenv("GRADIO_ROOT_PATH", "").rstrip("/") demo.launch(server_name="0.0.0.0", root_path=_root_path)