Spaces:
Running
Running
Sync from GitHub via hub-sync
Browse files
VERSION
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
e53591dafabc331639f010ada7aeec3330d50800
|
main.py
CHANGED
|
@@ -39,7 +39,7 @@ AI_BACKEND = _env("AI_BACKEND", "hf").lower()
|
|
| 39 |
AI_MAX_TOKENS = int(_env("AI_MAX_TOKENS", "512"))
|
| 40 |
AI_FALLBACK_ORDER = [
|
| 41 |
p.strip().lower()
|
| 42 |
-
for p in _env("AI_FALLBACK_ORDER", "hf,github,openrouter,fireworks").split(",")
|
| 43 |
if p.strip()
|
| 44 |
]
|
| 45 |
|
|
@@ -52,6 +52,9 @@ OPENROUTER_MODEL = _env("OPENROUTER_MODEL")
|
|
| 52 |
FIREWORKS_API_KEY = _env("FIREWORKS_API_KEY")
|
| 53 |
FIREWORKS_MODEL = _env("FIREWORKS_MODEL")
|
| 54 |
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
# Explicit token passing helps avoid auth ambiguity across local and Space runtimes.
|
| 57 |
hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
|
|
@@ -60,6 +63,7 @@ hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
|
|
| 60 |
def _runtime_label() -> str:
|
| 61 |
active_model = {
|
| 62 |
"hf": HF_MODEL,
|
|
|
|
| 63 |
"github": GITHUB_MODEL,
|
| 64 |
"openrouter": OPENROUTER_MODEL,
|
| 65 |
"fireworks": FIREWORKS_MODEL,
|
|
@@ -188,9 +192,61 @@ def _chat_fireworks(messages: list) -> str:
|
|
| 188 |
)
|
| 189 |
|
| 190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
def _chat_once(backend: str, messages: list) -> str:
|
| 192 |
if backend == "hf":
|
| 193 |
return _chat_hf(messages)
|
|
|
|
|
|
|
| 194 |
if backend == "github":
|
| 195 |
return _chat_github(messages)
|
| 196 |
if backend == "openrouter":
|
|
@@ -198,7 +254,7 @@ def _chat_once(backend: str, messages: list) -> str:
|
|
| 198 |
if backend == "fireworks":
|
| 199 |
return _chat_fireworks(messages)
|
| 200 |
raise ValueError(
|
| 201 |
-
f"Unsupported AI_BACKEND='{backend}'. Use one of: hf, github, openrouter, fireworks, auto"
|
| 202 |
)
|
| 203 |
|
| 204 |
|
|
@@ -228,7 +284,7 @@ with gr.Blocks(title="GitHub + HuggingFace + AI Chat Demo") as demo:
|
|
| 228 |
gr.Markdown("# GitHub → HuggingFace → AI Chat")
|
| 229 |
gr.Markdown(f"**{_build_label()}**")
|
| 230 |
gr.Markdown(
|
| 231 |
-
"Multi-provider chat app for learning and testing across HF, GitHub Models, OpenRouter, and Fireworks."
|
| 232 |
)
|
| 233 |
gr.Markdown(f"**{_runtime_label()}**")
|
| 234 |
|
|
|
|
| 39 |
AI_MAX_TOKENS = int(_env("AI_MAX_TOKENS", "512"))
|
| 40 |
AI_FALLBACK_ORDER = [
|
| 41 |
p.strip().lower()
|
| 42 |
+
for p in _env("AI_FALLBACK_ORDER", "hf,google,github,openrouter,fireworks").split(",")
|
| 43 |
if p.strip()
|
| 44 |
]
|
| 45 |
|
|
|
|
| 52 |
FIREWORKS_API_KEY = _env("FIREWORKS_API_KEY")
|
| 53 |
FIREWORKS_MODEL = _env("FIREWORKS_MODEL")
|
| 54 |
|
| 55 |
+
GOOGLE_API_KEY = _env("GOOGLE_API_KEY")
|
| 56 |
+
GOOGLE_MODEL = _env("GOOGLE_MODEL", "gemini-2.0-flash")
|
| 57 |
+
|
| 58 |
|
| 59 |
# Explicit token passing helps avoid auth ambiguity across local and Space runtimes.
|
| 60 |
hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
|
|
|
|
| 63 |
def _runtime_label() -> str:
|
| 64 |
active_model = {
|
| 65 |
"hf": HF_MODEL,
|
| 66 |
+
"google": GOOGLE_MODEL,
|
| 67 |
"github": GITHUB_MODEL,
|
| 68 |
"openrouter": OPENROUTER_MODEL,
|
| 69 |
"fireworks": FIREWORKS_MODEL,
|
|
|
|
| 192 |
)
|
| 193 |
|
| 194 |
|
| 195 |
+
def _chat_google(messages: list) -> str:
|
| 196 |
+
if not GOOGLE_API_KEY:
|
| 197 |
+
raise ValueError("GOOGLE_API_KEY is missing.")
|
| 198 |
+
if not GOOGLE_MODEL:
|
| 199 |
+
raise ValueError("GOOGLE_MODEL is not configured.")
|
| 200 |
+
|
| 201 |
+
contents = []
|
| 202 |
+
for msg in messages:
|
| 203 |
+
role = msg.get("role")
|
| 204 |
+
text = str(msg.get("content", ""))
|
| 205 |
+
if not text:
|
| 206 |
+
continue
|
| 207 |
+
|
| 208 |
+
if role == "assistant":
|
| 209 |
+
contents.append({"role": "model", "parts": [{"text": text}]})
|
| 210 |
+
elif role in {"user", "system"}:
|
| 211 |
+
contents.append({"role": "user", "parts": [{"text": text}]})
|
| 212 |
+
|
| 213 |
+
endpoint = (
|
| 214 |
+
f"https://generativelanguage.googleapis.com/v1beta/models/{GOOGLE_MODEL}:generateContent"
|
| 215 |
+
f"?key={GOOGLE_API_KEY}"
|
| 216 |
+
)
|
| 217 |
+
payload = {
|
| 218 |
+
"contents": contents,
|
| 219 |
+
"generationConfig": {"maxOutputTokens": AI_MAX_TOKENS},
|
| 220 |
+
}
|
| 221 |
+
request = urllib.request.Request(
|
| 222 |
+
endpoint,
|
| 223 |
+
data=json.dumps(payload).encode("utf-8"),
|
| 224 |
+
headers={"Content-Type": "application/json"},
|
| 225 |
+
method="POST",
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
try:
|
| 229 |
+
with urllib.request.urlopen(request, timeout=90) as response:
|
| 230 |
+
body = json.loads(response.read().decode("utf-8"))
|
| 231 |
+
except urllib.error.HTTPError as exc:
|
| 232 |
+
details = exc.read().decode("utf-8", errors="ignore")
|
| 233 |
+
raise RuntimeError(f"HTTP {exc.code}: {details[:300]}") from exc
|
| 234 |
+
|
| 235 |
+
candidates = body.get("candidates") or []
|
| 236 |
+
if not candidates:
|
| 237 |
+
raise RuntimeError("No candidates returned from Gemini.")
|
| 238 |
+
|
| 239 |
+
parts = (candidates[0].get("content") or {}).get("parts") or []
|
| 240 |
+
text_chunks = [str(part.get("text", "")) for part in parts if isinstance(part, dict)]
|
| 241 |
+
result = "".join(text_chunks).strip()
|
| 242 |
+
return result or "(empty response)"
|
| 243 |
+
|
| 244 |
+
|
| 245 |
def _chat_once(backend: str, messages: list) -> str:
|
| 246 |
if backend == "hf":
|
| 247 |
return _chat_hf(messages)
|
| 248 |
+
if backend == "google":
|
| 249 |
+
return _chat_google(messages)
|
| 250 |
if backend == "github":
|
| 251 |
return _chat_github(messages)
|
| 252 |
if backend == "openrouter":
|
|
|
|
| 254 |
if backend == "fireworks":
|
| 255 |
return _chat_fireworks(messages)
|
| 256 |
raise ValueError(
|
| 257 |
+
f"Unsupported AI_BACKEND='{backend}'. Use one of: hf, google, github, openrouter, fireworks, auto"
|
| 258 |
)
|
| 259 |
|
| 260 |
|
|
|
|
| 284 |
gr.Markdown("# GitHub → HuggingFace → AI Chat")
|
| 285 |
gr.Markdown(f"**{_build_label()}**")
|
| 286 |
gr.Markdown(
|
| 287 |
+
"Multi-provider chat app for learning and testing across HF, Gemini, GitHub Models, OpenRouter, and Fireworks."
|
| 288 |
)
|
| 289 |
gr.Markdown(f"**{_runtime_label()}**")
|
| 290 |
|