billyaungmyint commited on
Commit
7e506ce
·
verified ·
1 Parent(s): a8a8068

Sync from GitHub via hub-sync

Browse files
Files changed (2) hide show
  1. VERSION +1 -1
  2. main.py +191 -29
VERSION CHANGED
@@ -1 +1 @@
1
- 47e58f33bfd7b92740359aa226ca40a0e7e482c6
 
1
+ 1e80a581d25c8d350735743e0580c28fdf3fe594
main.py CHANGED
@@ -1,6 +1,9 @@
1
  import datetime as dt
 
2
  import os
3
  from pathlib import Path
 
 
4
 
5
  import gradio as gr
6
  from huggingface_hub import InferenceClient
@@ -25,40 +28,198 @@ def _build_label() -> str:
25
  return f"Version: {version} | Commit: {short_commit} | Loaded: {deployed_at}"
26
 
27
 
28
- # Initialize HF Inference Client with Z.ai provider (via HF_TOKEN)
29
- # Explicitly pass token for HF Space compatibility
30
- hf_token = os.getenv("HF_TOKEN")
31
- client = InferenceClient(token=hf_token) if hf_token else InferenceClient()
32
- MODEL = "zai-org/GLM-5.1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
 
35
  def chat_response(message: str, history: list) -> str:
36
- """
37
- Send a message to the GLM-5.1 model and get a response.
38
- history is a list of [user_msg, assistant_msg] pairs.
39
- """
40
  if not message or not message.strip():
41
  return "Please enter a message."
42
 
 
 
43
  try:
44
- # Convert Gradio chat history format to messages for the API
45
- messages = []
46
- for user_msg, assistant_msg in history:
47
- messages.append({"role": "user", "content": user_msg})
48
- if assistant_msg:
49
- messages.append({"role": "assistant", "content": assistant_msg})
50
-
51
- # Add current user message
52
- messages.append({"role": "user", "content": message})
53
-
54
- # Call the model via HF Inference API
55
- response = client.chat_completion(
56
- model=MODEL,
57
- messages=messages,
58
- max_tokens=512,
59
- )
60
-
61
- return response.choices[0].message.content
62
  except Exception as e:
63
  return f"Error: {str(e)}"
64
 
@@ -67,15 +228,16 @@ with gr.Blocks(title="GitHub + HuggingFace + AI Chat Demo") as demo:
67
  gr.Markdown("# GitHub → HuggingFace → AI Chat")
68
  gr.Markdown(f"**{_build_label()}**")
69
  gr.Markdown(
70
- f"Powered by **{MODEL}** via Z.ai on HuggingFace. Push to GitHub, auto-syncs here."
71
  )
 
72
 
73
  gr.ChatInterface(
74
  chat_response,
75
  examples=[
76
  "What is the capital of France?",
77
  "Explain quantum computing in simple terms.",
78
- "Write a short poem about the moon.",
79
  ],
80
  title=None,
81
  description="Ask me anything!",
 
1
  import datetime as dt
2
+ import json
3
  import os
4
  from pathlib import Path
5
+ import urllib.error
6
+ import urllib.request
7
 
8
  import gradio as gr
9
  from huggingface_hub import InferenceClient
 
28
  return f"Version: {version} | Commit: {short_commit} | Loaded: {deployed_at}"
29
 
30
 
31
+ def _env(name: str, default: str = "") -> str:
32
+ return (os.getenv(name) or default).strip()
33
+
34
+
35
+ HF_TOKEN = _env("HF_TOKEN")
36
+ HF_MODEL = _env("HF_MODEL", "zai-org/GLM-5.1")
37
+
38
+ AI_BACKEND = _env("AI_BACKEND", "hf").lower()
39
+ AI_MAX_TOKENS = int(_env("AI_MAX_TOKENS", "512"))
40
+ AI_FALLBACK_ORDER = [
41
+ p.strip().lower()
42
+ for p in _env("AI_FALLBACK_ORDER", "hf,github,openrouter,fireworks").split(",")
43
+ if p.strip()
44
+ ]
45
+
46
+ GITHUB_TOKEN = _env("GITHUB_TOKEN")
47
+ GITHUB_MODEL = _env("GITHUB_MODEL")
48
+
49
+ OPENROUTER_API_KEY = _env("OPENROUTER_API_KEY")
50
+ OPENROUTER_MODEL = _env("OPENROUTER_MODEL")
51
+
52
+ FIREWORKS_API_KEY = _env("FIREWORKS_API_KEY")
53
+ FIREWORKS_MODEL = _env("FIREWORKS_MODEL")
54
+
55
+
56
+ # Explicit token passing helps avoid auth ambiguity across local and Space runtimes.
57
+ hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
58
+
59
+
60
+ def _runtime_label() -> str:
61
+ active_model = {
62
+ "hf": HF_MODEL,
63
+ "github": GITHUB_MODEL,
64
+ "openrouter": OPENROUTER_MODEL,
65
+ "fireworks": FIREWORKS_MODEL,
66
+ }.get(AI_BACKEND, "")
67
+ backend_name = AI_BACKEND.upper()
68
+ model_text = active_model or "not-set"
69
+ return f"Backend: {backend_name} | Model: {model_text}"
70
+
71
+
72
+ def _history_to_messages(history: list, user_message: str) -> list:
73
+ messages = []
74
+
75
+ for item in history or []:
76
+ if isinstance(item, dict):
77
+ role = item.get("role")
78
+ content = item.get("content")
79
+ if role in {"user", "assistant", "system"} and content:
80
+ messages.append({"role": role, "content": str(content)})
81
+ continue
82
+
83
+ if isinstance(item, (list, tuple)) and len(item) == 2:
84
+ user_msg, assistant_msg = item
85
+ if user_msg:
86
+ messages.append({"role": "user", "content": str(user_msg)})
87
+ if assistant_msg:
88
+ messages.append({"role": "assistant", "content": str(assistant_msg)})
89
+
90
+ messages.append({"role": "user", "content": user_message})
91
+ return messages
92
+
93
+
94
+ def _extract_content(choice_message: dict) -> str:
95
+ content = choice_message.get("content", "")
96
+ if isinstance(content, str):
97
+ return content
98
+ if isinstance(content, list):
99
+ chunks = []
100
+ for part in content:
101
+ if isinstance(part, dict) and part.get("type") == "text":
102
+ chunks.append(str(part.get("text", "")))
103
+ return "".join(chunks).strip()
104
+ return str(content)
105
+
106
+
107
+ def _chat_openai_compatible(
108
+ endpoint: str,
109
+ api_key: str,
110
+ model: str,
111
+ messages: list,
112
+ extra_headers=None,
113
+ ) -> str:
114
+ if not api_key:
115
+ raise ValueError("API key is missing.")
116
+ if not model:
117
+ raise ValueError("Model is not configured.")
118
+
119
+ payload = {
120
+ "model": model,
121
+ "messages": messages,
122
+ "max_tokens": AI_MAX_TOKENS,
123
+ }
124
+ headers = {
125
+ "Authorization": f"Bearer {api_key}",
126
+ "Content-Type": "application/json",
127
+ }
128
+ if extra_headers:
129
+ headers.update(extra_headers)
130
+
131
+ request = urllib.request.Request(
132
+ endpoint,
133
+ data=json.dumps(payload).encode("utf-8"),
134
+ headers=headers,
135
+ method="POST",
136
+ )
137
+ try:
138
+ with urllib.request.urlopen(request, timeout=90) as response:
139
+ body = json.loads(response.read().decode("utf-8"))
140
+ except urllib.error.HTTPError as exc:
141
+ details = exc.read().decode("utf-8", errors="ignore")
142
+ raise RuntimeError(f"HTTP {exc.code}: {details[:300]}") from exc
143
+
144
+ choices = body.get("choices") or []
145
+ if not choices:
146
+ raise RuntimeError("No choices returned from provider.")
147
+ message = choices[0].get("message") or {}
148
+ return _extract_content(message) or "(empty response)"
149
+
150
+
151
+ def _chat_hf(messages: list) -> str:
152
+ response = hf_client.chat_completion(
153
+ model=HF_MODEL,
154
+ messages=messages,
155
+ max_tokens=AI_MAX_TOKENS,
156
+ )
157
+ return response.choices[0].message.content or "(empty response)"
158
+
159
+
160
+ def _chat_github(messages: list) -> str:
161
+ return _chat_openai_compatible(
162
+ endpoint="https://models.github.ai/inference/chat/completions",
163
+ api_key=GITHUB_TOKEN,
164
+ model=GITHUB_MODEL,
165
+ messages=messages,
166
+ )
167
+
168
+
169
+ def _chat_openrouter(messages: list) -> str:
170
+ return _chat_openai_compatible(
171
+ endpoint="https://openrouter.ai/api/v1/chat/completions",
172
+ api_key=OPENROUTER_API_KEY,
173
+ model=OPENROUTER_MODEL,
174
+ messages=messages,
175
+ extra_headers={
176
+ "HTTP-Referer": _env("OPENROUTER_REFERER", "https://huggingface.co"),
177
+ "X-Title": _env("OPENROUTER_APP_NAME", "hf-multi-provider-chat"),
178
+ },
179
+ )
180
+
181
+
182
+ def _chat_fireworks(messages: list) -> str:
183
+ return _chat_openai_compatible(
184
+ endpoint="https://api.fireworks.ai/inference/v1/chat/completions",
185
+ api_key=FIREWORKS_API_KEY,
186
+ model=FIREWORKS_MODEL,
187
+ messages=messages,
188
+ )
189
+
190
+
191
+ def _chat_once(backend: str, messages: list) -> str:
192
+ if backend == "hf":
193
+ return _chat_hf(messages)
194
+ if backend == "github":
195
+ return _chat_github(messages)
196
+ if backend == "openrouter":
197
+ return _chat_openrouter(messages)
198
+ if backend == "fireworks":
199
+ return _chat_fireworks(messages)
200
+ raise ValueError(
201
+ f"Unsupported AI_BACKEND='{backend}'. Use one of: hf, github, openrouter, fireworks, auto"
202
+ )
203
 
204
 
205
  def chat_response(message: str, history: list) -> str:
206
+ """Send a user message using the configured backend and return assistant text."""
 
 
 
207
  if not message or not message.strip():
208
  return "Please enter a message."
209
 
210
+ messages = _history_to_messages(history, message.strip())
211
+
212
  try:
213
+ if AI_BACKEND == "auto":
214
+ errors = []
215
+ for backend in AI_FALLBACK_ORDER:
216
+ try:
217
+ return _chat_once(backend, messages)
218
+ except Exception as exc: # noqa: BLE001
219
+ errors.append(f"{backend}: {exc}")
220
+ return "All providers failed. " + " | ".join(errors)
221
+
222
+ return _chat_once(AI_BACKEND, messages)
 
 
 
 
 
 
 
 
223
  except Exception as e:
224
  return f"Error: {str(e)}"
225
 
 
228
  gr.Markdown("# GitHub → HuggingFace → AI Chat")
229
  gr.Markdown(f"**{_build_label()}**")
230
  gr.Markdown(
231
+ "Multi-provider chat app for learning and testing across HF, GitHub Models, OpenRouter, and Fireworks."
232
  )
233
+ gr.Markdown(f"**{_runtime_label()}**")
234
 
235
  gr.ChatInterface(
236
  chat_response,
237
  examples=[
238
  "What is the capital of France?",
239
  "Explain quantum computing in simple terms.",
240
+ "Give me a low-cost model selection strategy for dev vs prod.",
241
  ],
242
  title=None,
243
  description="Ask me anything!",