Spaces:
Running
Running
| import datetime as dt | |
| import json | |
| import os | |
| from pathlib import Path | |
| import urllib.error | |
| import urllib.request | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| def _build_label() -> str: | |
| version_file = Path("VERSION") | |
| version_from_file = "" | |
| if version_file.exists(): | |
| version_from_file = version_file.read_text(encoding="utf-8").strip() | |
| commit = ( | |
| os.getenv("GITHUB_SHA") | |
| or os.getenv("COMMIT_SHA") | |
| or os.getenv("SPACE_COMMIT_SHA") | |
| or version_from_file | |
| or "local" | |
| ) | |
| short_commit = commit[:7] if commit != "local" else commit | |
| version = os.getenv("APP_VERSION") or short_commit | |
| deployed_at = dt.datetime.now(dt.UTC).strftime("%Y-%m-%d %H:%M:%S UTC") | |
| return f"Version: {version} | Commit: {short_commit} | Loaded: {deployed_at}" | |
| def _env(name: str, default: str = "") -> str: | |
| return (os.getenv(name) or default).strip() | |
| def _csv_env(name: str, default_csv: str) -> list: | |
| raw = _env(name, default_csv) | |
| values = [item.strip() for item in raw.split(",") if item.strip()] | |
| return values | |
| HF_TOKEN = _env("HF_TOKEN") | |
| HF_MODEL = _env("HF_MODEL", "zai-org/GLM-5.1") | |
| AI_BACKEND = _env("AI_BACKEND", "hf").lower() | |
| AI_MAX_TOKENS = int(_env("AI_MAX_TOKENS", "512")) | |
| AI_FALLBACK_ORDER = [ | |
| p.strip().lower() | |
| for p in _env("AI_FALLBACK_ORDER", "hf,google,github,openrouter,fireworks").split(",") | |
| if p.strip() | |
| ] | |
| GITHUB_TOKEN = _env("GITHUB_TOKEN") | |
| GITHUB_MODEL = _env("GITHUB_MODEL") | |
| OPENROUTER_API_KEY = _env("OPENROUTER_API_KEY") | |
| OPENROUTER_MODEL = _env("OPENROUTER_MODEL") | |
| FIREWORKS_API_KEY = _env("FIREWORKS_API_KEY") | |
| FIREWORKS_MODEL = _env("FIREWORKS_MODEL") | |
| GOOGLE_API_KEY = _env("GOOGLE_API_KEY") | |
| GOOGLE_MODEL = _env("GOOGLE_MODEL", "gemini-2.0-flash") | |
| HF_MODELS = _csv_env("HF_MODELS", HF_MODEL or "zai-org/GLM-5.1") | |
| GOOGLE_MODELS = _csv_env( | |
| "GOOGLE_MODELS", | |
| GOOGLE_MODEL or "gemini-2.0-flash,gemini-2.0-flash-lite,gemini-1.5-flash", | |
| ) | |
| GITHUB_MODELS = _csv_env( | |
| "GITHUB_MODELS", | |
| GITHUB_MODEL or "openai/gpt-4.1-mini,meta/Llama-3.3-70B-Instruct,mistral-ai/Mistral-Nemo-Instruct-2407", | |
| ) | |
| PROVIDER_MODELS = { | |
| "hf": HF_MODELS, | |
| "google": GOOGLE_MODELS, | |
| "github": GITHUB_MODELS, | |
| } | |
| PROVIDER_LABELS = { | |
| "hf": "Hugging Face (z.ai)", | |
| "google": "Google Gemini", | |
| "github": "GitHub Models", | |
| } | |
| UI_PROVIDERS = ["hf", "google", "github"] | |
| PRESET_CONFIGS = { | |
| "ultra-cheap": { | |
| "label": "Ultra Cheap", | |
| "provider": "google", | |
| "preferred_models": [ | |
| "gemini-2.0-flash-lite", | |
| "gemini-1.5-flash", | |
| "gemini-2.0-flash", | |
| ], | |
| "note": "Minimum-cost setup for rapid dev testing.", | |
| }, | |
| "budget-dev": { | |
| "label": "Budget Dev", | |
| "provider": "github", | |
| "preferred_models": [ | |
| "mistral-ai/Mistral-Nemo-Instruct-2407", | |
| "openai/gpt-4.1-mini", | |
| "meta/Llama-3.3-70B-Instruct", | |
| ], | |
| "note": "Lowest-cost first for rapid iteration.", | |
| }, | |
| "balanced": { | |
| "label": "Balanced", | |
| "provider": "google", | |
| "preferred_models": [ | |
| "gemini-2.0-flash", | |
| "gemini-1.5-flash", | |
| "gemini-2.0-flash-lite", | |
| ], | |
| "note": "Good quality and speed for day-to-day testing.", | |
| }, | |
| "quality-check": { | |
| "label": "Quality Check", | |
| "provider": "hf", | |
| "preferred_models": [ | |
| "zai-org/GLM-5.1", | |
| "zai-org/GLM-5", | |
| "zai-org/GLM-4.7", | |
| ], | |
| "note": "Higher-quality pass for final validation.", | |
| }, | |
| } | |
| # Explicit token passing helps avoid auth ambiguity across local and Space runtimes. | |
| hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient() | |
| def _runtime_label() -> str: | |
| active_model = { | |
| "hf": HF_MODEL, | |
| "google": GOOGLE_MODEL, | |
| "github": GITHUB_MODEL, | |
| "openrouter": OPENROUTER_MODEL, | |
| "fireworks": FIREWORKS_MODEL, | |
| }.get(AI_BACKEND, "") | |
| backend_name = AI_BACKEND.upper() | |
| model_text = active_model or "not-set" | |
| return f"Backend: {backend_name} | Model: {model_text}" | |
| def _default_model_for(provider: str) -> str: | |
| provider = (provider or "").lower() | |
| models = PROVIDER_MODELS.get(provider, []) | |
| if models: | |
| return models[0] | |
| if provider == "hf": | |
| return HF_MODEL | |
| if provider == "google": | |
| return GOOGLE_MODEL | |
| if provider == "github": | |
| return GITHUB_MODEL | |
| return "" | |
| def _model_choices_for(provider: str) -> tuple: | |
| provider = (provider or "").lower() | |
| choices = PROVIDER_MODELS.get(provider, []) | |
| selected = _default_model_for(provider) | |
| if selected and selected not in choices: | |
| choices = [selected] + choices | |
| return choices, selected | |
| def _on_provider_change(provider: str): | |
| choices, selected = _model_choices_for(provider) | |
| return gr.update(choices=choices, value=selected) | |
| def _pick_model_for_preset(provider: str, preferred_models: list) -> str: | |
| choices, selected = _model_choices_for(provider) | |
| for model in preferred_models: | |
| if model in choices: | |
| return model | |
| if choices: | |
| return choices[0] | |
| return selected | |
| def _apply_preset(preset_key: str): | |
| preset = PRESET_CONFIGS.get(preset_key) | |
| if not preset: | |
| return ( | |
| gr.update(), | |
| gr.update(), | |
| "Preset not found.", | |
| ) | |
| provider = preset["provider"] | |
| model_choices, _ = _model_choices_for(provider) | |
| selected_model = _pick_model_for_preset(provider, preset["preferred_models"]) | |
| if selected_model and selected_model not in model_choices: | |
| model_choices = [selected_model] + model_choices | |
| status = ( | |
| f"Applied preset: {preset['label']} | " | |
| f"Provider: {PROVIDER_LABELS.get(provider, provider)} | " | |
| f"Model: {selected_model or 'not-set'} | " | |
| f"{preset['note']}" | |
| ) | |
| return ( | |
| gr.update(value=provider), | |
| gr.update(choices=model_choices, value=selected_model), | |
| status, | |
| ) | |
| def _history_to_messages(history: list, user_message: str) -> list: | |
| messages = [] | |
| for item in history or []: | |
| if isinstance(item, dict): | |
| role = item.get("role") | |
| content = item.get("content") | |
| if role in {"user", "assistant", "system"} and content: | |
| messages.append({"role": role, "content": str(content)}) | |
| continue | |
| if isinstance(item, (list, tuple)) and len(item) == 2: | |
| user_msg, assistant_msg = item | |
| if user_msg: | |
| messages.append({"role": "user", "content": str(user_msg)}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": str(assistant_msg)}) | |
| messages.append({"role": "user", "content": user_message}) | |
| return messages | |
| def _extract_content(choice_message: dict) -> str: | |
| content = choice_message.get("content", "") | |
| if isinstance(content, str): | |
| return content | |
| if isinstance(content, list): | |
| chunks = [] | |
| for part in content: | |
| if isinstance(part, dict) and part.get("type") == "text": | |
| chunks.append(str(part.get("text", ""))) | |
| return "".join(chunks).strip() | |
| return str(content) | |
| def _chat_openai_compatible( | |
| endpoint: str, | |
| api_key: str, | |
| model: str, | |
| messages: list, | |
| extra_headers=None, | |
| ) -> str: | |
| if not api_key: | |
| raise ValueError("API key is missing.") | |
| if not model: | |
| raise ValueError("Model is not configured.") | |
| payload = { | |
| "model": model, | |
| "messages": messages, | |
| "max_tokens": AI_MAX_TOKENS, | |
| } | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json", | |
| } | |
| if extra_headers: | |
| headers.update(extra_headers) | |
| request = urllib.request.Request( | |
| endpoint, | |
| data=json.dumps(payload).encode("utf-8"), | |
| headers=headers, | |
| method="POST", | |
| ) | |
| try: | |
| with urllib.request.urlopen(request, timeout=90) as response: | |
| body = json.loads(response.read().decode("utf-8")) | |
| except urllib.error.HTTPError as exc: | |
| details = exc.read().decode("utf-8", errors="ignore") | |
| raise RuntimeError(f"HTTP {exc.code}: {details[:300]}") from exc | |
| choices = body.get("choices") or [] | |
| if not choices: | |
| raise RuntimeError("No choices returned from provider.") | |
| message = choices[0].get("message") or {} | |
| return _extract_content(message) or "(empty response)" | |
| def _chat_hf(messages: list, model_override: str = "") -> str: | |
| model = model_override or _default_model_for("hf") | |
| response = hf_client.chat_completion( | |
| model=model, | |
| messages=messages, | |
| max_tokens=AI_MAX_TOKENS, | |
| ) | |
| return response.choices[0].message.content or "(empty response)" | |
| def _chat_github(messages: list, model_override: str = "") -> str: | |
| model = model_override or _default_model_for("github") | |
| return _chat_openai_compatible( | |
| endpoint="https://models.github.ai/inference/chat/completions", | |
| api_key=GITHUB_TOKEN, | |
| model=model, | |
| messages=messages, | |
| ) | |
| def _chat_openrouter(messages: list) -> str: | |
| return _chat_openai_compatible( | |
| endpoint="https://openrouter.ai/api/v1/chat/completions", | |
| api_key=OPENROUTER_API_KEY, | |
| model=OPENROUTER_MODEL, | |
| messages=messages, | |
| extra_headers={ | |
| "HTTP-Referer": _env("OPENROUTER_REFERER", "https://huggingface.co"), | |
| "X-Title": _env("OPENROUTER_APP_NAME", "hf-multi-provider-chat"), | |
| }, | |
| ) | |
| def _chat_fireworks(messages: list) -> str: | |
| return _chat_openai_compatible( | |
| endpoint="https://api.fireworks.ai/inference/v1/chat/completions", | |
| api_key=FIREWORKS_API_KEY, | |
| model=FIREWORKS_MODEL, | |
| messages=messages, | |
| ) | |
| def _chat_google(messages: list, model_override: str = "") -> str: | |
| model = model_override or _default_model_for("google") | |
| if not GOOGLE_API_KEY: | |
| raise ValueError("GOOGLE_API_KEY is missing.") | |
| if not model: | |
| raise ValueError("GOOGLE_MODEL is not configured.") | |
| contents = [] | |
| for msg in messages: | |
| role = msg.get("role") | |
| text = str(msg.get("content", "")) | |
| if not text: | |
| continue | |
| if role == "assistant": | |
| contents.append({"role": "model", "parts": [{"text": text}]}) | |
| elif role in {"user", "system"}: | |
| contents.append({"role": "user", "parts": [{"text": text}]}) | |
| endpoint = ( | |
| f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent" | |
| f"?key={GOOGLE_API_KEY}" | |
| ) | |
| payload = { | |
| "contents": contents, | |
| "generationConfig": {"maxOutputTokens": AI_MAX_TOKENS}, | |
| } | |
| request = urllib.request.Request( | |
| endpoint, | |
| data=json.dumps(payload).encode("utf-8"), | |
| headers={"Content-Type": "application/json"}, | |
| method="POST", | |
| ) | |
| try: | |
| with urllib.request.urlopen(request, timeout=90) as response: | |
| body = json.loads(response.read().decode("utf-8")) | |
| except urllib.error.HTTPError as exc: | |
| details = exc.read().decode("utf-8", errors="ignore") | |
| raise RuntimeError(f"HTTP {exc.code}: {details[:300]}") from exc | |
| candidates = body.get("candidates") or [] | |
| if not candidates: | |
| raise RuntimeError("No candidates returned from Gemini.") | |
| parts = (candidates[0].get("content") or {}).get("parts") or [] | |
| text_chunks = [str(part.get("text", "")) for part in parts if isinstance(part, dict)] | |
| result = "".join(text_chunks).strip() | |
| return result or "(empty response)" | |
| def _chat_once(backend: str, messages: list, model_override: str = "") -> str: | |
| if backend == "hf": | |
| return _chat_hf(messages, model_override) | |
| if backend == "google": | |
| return _chat_google(messages, model_override) | |
| if backend == "github": | |
| return _chat_github(messages, model_override) | |
| if backend == "openrouter": | |
| return _chat_openrouter(messages) | |
| if backend == "fireworks": | |
| return _chat_fireworks(messages) | |
| raise ValueError( | |
| f"Unsupported AI_BACKEND='{backend}'. Use one of: hf, google, github, openrouter, fireworks, auto" | |
| ) | |
| def chat_response(message: str, history: list, selected_provider: str, selected_model: str) -> str: | |
| """Send a user message using the configured backend and return assistant text.""" | |
| if not message or not message.strip(): | |
| return "Please enter a message." | |
| messages = _history_to_messages(history, message.strip()) | |
| provider = (selected_provider or AI_BACKEND or "hf").lower().strip() | |
| selected_model = (selected_model or "").strip() | |
| try: | |
| if provider == "auto": | |
| errors = [] | |
| for backend in AI_FALLBACK_ORDER: | |
| try: | |
| return _chat_once(backend, messages) | |
| except Exception as exc: # noqa: BLE001 | |
| errors.append(f"{backend}: {exc}") | |
| return "All providers failed. " + " | ".join(errors) | |
| return _chat_once(provider, messages, model_override=selected_model) | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| with gr.Blocks(title="GitHub + HuggingFace + AI Chat Demo") as demo: | |
| gr.Markdown("# GitHub → HuggingFace → AI Chat") | |
| gr.Markdown(f"**{_build_label()}**") | |
| gr.Markdown( | |
| "Multi-provider chat app for learning and testing across HF, Gemini, GitHub Models, OpenRouter, and Fireworks." | |
| ) | |
| gr.Markdown(f"**{_runtime_label()}**") | |
| initial_provider = AI_BACKEND if AI_BACKEND in UI_PROVIDERS else "hf" | |
| initial_model_choices, initial_model = _model_choices_for(initial_provider) | |
| preset_dropdown = gr.Dropdown( | |
| label="Quick Preset", | |
| choices=[(v["label"], k) for k, v in PRESET_CONFIGS.items()], | |
| value="ultra-cheap", | |
| ) | |
| preset_apply_btn = gr.Button("Apply Preset") | |
| preset_status = gr.Markdown("Preset tip: start with Ultra Cheap while iterating.") | |
| provider_dropdown = gr.Dropdown( | |
| label="Provider", | |
| choices=[(PROVIDER_LABELS[p], p) for p in UI_PROVIDERS], | |
| value=initial_provider, | |
| ) | |
| model_dropdown = gr.Dropdown( | |
| label="Model", | |
| choices=initial_model_choices, | |
| value=initial_model, | |
| allow_custom_value=True, | |
| ) | |
| provider_dropdown.change( | |
| fn=_on_provider_change, | |
| inputs=[provider_dropdown], | |
| outputs=[model_dropdown], | |
| ) | |
| preset_apply_btn.click( | |
| fn=_apply_preset, | |
| inputs=[preset_dropdown], | |
| outputs=[provider_dropdown, model_dropdown, preset_status], | |
| ) | |
| gr.ChatInterface( | |
| chat_response, | |
| examples=[ | |
| ["What is the capital of France?", "google", "gemini-2.0-flash-lite"], | |
| ["Explain quantum computing in simple terms.", "github", "openai/gpt-4.1-mini"], | |
| ["Give me a low-cost model selection strategy for dev vs prod.", "hf", "zai-org/GLM-4.7"], | |
| ], | |
| additional_inputs=[provider_dropdown, model_dropdown], | |
| title=None, | |
| description="Ask me anything!", | |
| ) | |
| if __name__ == "__main__": | |
| # server_name="0.0.0.0" is required inside HF Space containers. | |
| # root_path ensures Gradio resolves JS/CSS assets correctly when running | |
| # behind a reverse proxy or custom domain. | |
| _root_path = os.getenv("GRADIO_ROOT_PATH", "").rstrip("/") | |
| demo.launch(server_name="0.0.0.0", root_path=_root_path) |