nickyni commited on
Commit
de5f9cf
Β·
verified Β·
1 Parent(s): a35ae64

Add Qwen3.5-9B Claude Opus Reasoning demo app

Browse files
Files changed (1) hide show
  1. qwen35_reasoning_app.py +287 -0
qwen35_reasoning_app.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HuggingFace Gradio Space: Qwen3.5-9B Claude Opus Reasoning Demo
3
+ Space: nickyni/qwen35-claude-reasoning-demo
4
+
5
+ This Gradio app demonstrates Claude 4.6 Opus-level reasoning via NexaAPI.
6
+ The underlying model is inspired by Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled.
7
+ """
8
+
9
+ import gradio as gr
10
+ import os
11
+ from typing import Iterator
12
+
13
+ # Try to import nexaapi, fall back to openai with custom base_url
14
+ try:
15
+ from nexaapi import NexaAPI
16
+ USE_NEXAAPI = True
17
+ except ImportError:
18
+ from openai import OpenAI
19
+ USE_NEXAAPI = False
20
+
21
+ # Get API key from environment variable (set in HF Space secrets)
22
+ API_KEY = os.environ.get("NEXAAPI_KEY", "")
23
+
24
+ # Model configuration
25
+ MODEL_ID = "claude-sonnet-4" # Best available reasoning model on NexaAPI
26
+ NEXAAPI_BASE_URL = "https://api.nexa-api.com/v1"
27
+
28
+ def get_client():
29
+ """Initialize the API client."""
30
+ if not API_KEY:
31
+ raise ValueError("NEXAAPI_KEY environment variable not set. Get your key at https://nexa-api.com")
32
+
33
+ if USE_NEXAAPI:
34
+ return NexaAPI(api_key=API_KEY)
35
+ else:
36
+ return OpenAI(api_key=API_KEY, base_url=NEXAAPI_BASE_URL)
37
+
38
+
39
+ def format_system_prompt(mode: str) -> str:
40
+ """Return appropriate system prompt based on reasoning mode."""
41
+ prompts = {
42
+ "General Reasoning": (
43
+ "You are an expert reasoning assistant. Think carefully and systematically "
44
+ "before answering. Break complex problems into clear steps."
45
+ ),
46
+ "Math & Logic": (
47
+ "You are a mathematics and logic expert. Solve problems step-by-step, "
48
+ "showing all work. Verify your answers. Use clear notation."
49
+ ),
50
+ "Code Review": (
51
+ "You are a senior software engineer. Review code for bugs, security issues, "
52
+ "performance problems, and style. Provide improved versions with explanations."
53
+ ),
54
+ "Chain-of-Thought": (
55
+ "Solve problems using this exact structure:\n"
56
+ "ANALYSIS: What is being asked? What information do I have?\n"
57
+ "REASONING: Step-by-step logical deduction\n"
58
+ "VERIFICATION: Does the answer make sense?\n"
59
+ "ANSWER: Clear, concise final answer"
60
+ ),
61
+ }
62
+ return prompts.get(mode, prompts["General Reasoning"])
63
+
64
+
65
+ def stream_response(
66
+ message: str,
67
+ history: list,
68
+ reasoning_mode: str,
69
+ temperature: float,
70
+ max_tokens: int,
71
+ ) -> Iterator[str]:
72
+ """
73
+ Stream a response from NexaAPI.
74
+
75
+ Args:
76
+ message: User's input message
77
+ history: Chat history in Gradio format
78
+ reasoning_mode: Selected reasoning mode
79
+ temperature: Model temperature (0.0-1.0)
80
+ max_tokens: Maximum tokens to generate
81
+
82
+ Yields:
83
+ Partial response strings for streaming
84
+ """
85
+ if not API_KEY:
86
+ yield "⚠️ **API key not configured.** Please set NEXAAPI_KEY in Space secrets.\n\nGet your key at [nexa-api.com](https://nexa-api.com)"
87
+ return
88
+
89
+ if not message.strip():
90
+ yield "Please enter a question or problem to solve."
91
+ return
92
+
93
+ try:
94
+ client = get_client()
95
+
96
+ # Build messages list
97
+ messages = [{"role": "system", "content": format_system_prompt(reasoning_mode)}]
98
+
99
+ # Add history
100
+ for human_msg, assistant_msg in history:
101
+ if human_msg:
102
+ messages.append({"role": "user", "content": human_msg})
103
+ if assistant_msg:
104
+ messages.append({"role": "assistant", "content": assistant_msg})
105
+
106
+ # Add current message
107
+ messages.append({"role": "user", "content": message})
108
+
109
+ # Stream the response
110
+ stream = client.chat.completions.create(
111
+ model=MODEL_ID,
112
+ messages=messages,
113
+ temperature=temperature,
114
+ max_tokens=max_tokens,
115
+ stream=True,
116
+ )
117
+
118
+ partial_response = ""
119
+ for chunk in stream:
120
+ delta = chunk.choices[0].delta
121
+ if hasattr(delta, "content") and delta.content:
122
+ partial_response += delta.content
123
+ yield partial_response
124
+
125
+ except Exception as e:
126
+ error_msg = str(e)
127
+ if "401" in error_msg or "unauthorized" in error_msg.lower():
128
+ yield "❌ **Authentication failed.** Check your NEXAAPI_KEY is correct.\n\nGet a key at [nexa-api.com](https://nexa-api.com)"
129
+ elif "429" in error_msg or "rate limit" in error_msg.lower():
130
+ yield "⏳ **Rate limit reached.** Please wait a moment and try again."
131
+ else:
132
+ yield f"❌ **Error:** {error_msg}\n\nIf this persists, check [nexa-api.com](https://nexa-api.com) for status."
133
+
134
+
135
+ # Example prompts for the UI
136
+ EXAMPLE_PROMPTS = [
137
+ ["A snail climbs 3 feet up a 10-foot wall each day but slides back 2 feet each night. How many days to reach the top?", "Math & Logic"],
138
+ ["Review this code for bugs:\n```python\ndef divide(a, b):\n return a/b\nresult = divide(10, 0)\n```", "Code Review"],
139
+ ["Explain the difference between supervised and unsupervised learning with real-world examples.", "General Reasoning"],
140
+ ["If I invest $1000 at 7% annual compound interest, how much will I have after 10 years? Show the formula.", "Math & Logic"],
141
+ ["Design a simple rate limiter for an API. What data structures would you use?", "Chain-of-Thought"],
142
+ ]
143
+
144
+ # Build the Gradio interface
145
+ with gr.Blocks(
146
+ title="Qwen3.5-9B Claude Opus Reasoning Demo | NexaAPI",
147
+ theme=gr.themes.Soft(primary_hue="blue"),
148
+ css="""
149
+ .header-text { text-align: center; margin-bottom: 20px; }
150
+ .model-badge { background: #e8f4f8; padding: 8px 16px; border-radius: 20px; display: inline-block; }
151
+ footer { display: none !important; }
152
+ """
153
+ ) as demo:
154
+
155
+ gr.HTML("""
156
+ <div class="header-text">
157
+ <h1>🧠 Qwen3.5-9B Claude Opus Reasoning Demo</h1>
158
+ <p>Experience Claude 4.6 Opus-level reasoning via <strong>NexaAPI</strong> β€” 5Γ— cheaper than official pricing</p>
159
+ <div class="model-badge">
160
+ Powered by <a href="https://nexa-api.com" target="_blank">NexaAPI</a> Β·
161
+ Model: Claude Sonnet 4 (Opus-distilled reasoning) Β·
162
+ <a href="https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF" target="_blank">Original Model</a>
163
+ </div>
164
+ </div>
165
+ """)
166
+
167
+ with gr.Row():
168
+ with gr.Column(scale=3):
169
+ chatbot = gr.Chatbot(
170
+ label="Reasoning Assistant",
171
+ height=500,
172
+ show_label=True,
173
+ bubble_full_width=False,
174
+ )
175
+
176
+ with gr.Row():
177
+ msg_input = gr.Textbox(
178
+ placeholder="Ask a reasoning question, math problem, or paste code to review...",
179
+ label="Your Question",
180
+ lines=3,
181
+ scale=4,
182
+ )
183
+ submit_btn = gr.Button("🧠 Reason", variant="primary", scale=1)
184
+
185
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary")
186
+
187
+ with gr.Column(scale=1):
188
+ gr.Markdown("### βš™οΈ Settings")
189
+
190
+ reasoning_mode = gr.Radio(
191
+ choices=["General Reasoning", "Math & Logic", "Code Review", "Chain-of-Thought"],
192
+ value="General Reasoning",
193
+ label="Reasoning Mode",
194
+ )
195
+
196
+ temperature = gr.Slider(
197
+ minimum=0.0,
198
+ maximum=1.0,
199
+ value=0.7,
200
+ step=0.1,
201
+ label="Temperature",
202
+ info="Lower = more focused, Higher = more creative"
203
+ )
204
+
205
+ max_tokens = gr.Slider(
206
+ minimum=256,
207
+ maximum=4096,
208
+ value=1024,
209
+ step=256,
210
+ label="Max Tokens",
211
+ )
212
+
213
+ gr.Markdown("""
214
+ ### πŸ”— Links
215
+ - [NexaAPI](https://nexa-api.com)
216
+ - [Get API Key](https://rapidapi.com/user/nexaquency)
217
+ - [Python SDK](https://pypi.org/project/nexaapi)
218
+ - [npm Package](https://npmjs.com/package/nexaapi)
219
+ - [Original Model](https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF)
220
+ """)
221
+
222
+ gr.Markdown("### πŸ’‘ Try These Examples")
223
+
224
+ examples = gr.Examples(
225
+ examples=EXAMPLE_PROMPTS,
226
+ inputs=[msg_input, reasoning_mode],
227
+ label="Example Prompts",
228
+ )
229
+
230
+ gr.HTML("""
231
+ <div style="text-align: center; margin-top: 20px; padding: 16px; background: #f0f7ff; border-radius: 8px;">
232
+ <strong>πŸ’° Cost Comparison:</strong>
233
+ Official Claude API ~$15/M tokens β†’
234
+ <strong>NexaAPI ~$0.50/M tokens</strong> (5Γ— cheaper!)
235
+ <br>
236
+ <a href="https://nexa-api.com" target="_blank">Get started free at nexa-api.com β†’</a>
237
+ </div>
238
+ """)
239
+
240
+ # Event handlers
241
+ def user_submit(message, history):
242
+ return "", history + [[message, None]]
243
+
244
+ def bot_respond(history, reasoning_mode, temperature, max_tokens):
245
+ if not history or history[-1][1] is not None:
246
+ return history
247
+
248
+ message = history[-1][0]
249
+ history[-1][1] = ""
250
+
251
+ for partial in stream_response(message, history[:-1], reasoning_mode, temperature, max_tokens):
252
+ history[-1][1] = partial
253
+ yield history
254
+
255
+ # Wire up events
256
+ msg_input.submit(
257
+ user_submit,
258
+ inputs=[msg_input, chatbot],
259
+ outputs=[msg_input, chatbot],
260
+ queue=False
261
+ ).then(
262
+ bot_respond,
263
+ inputs=[chatbot, reasoning_mode, temperature, max_tokens],
264
+ outputs=chatbot,
265
+ )
266
+
267
+ submit_btn.click(
268
+ user_submit,
269
+ inputs=[msg_input, chatbot],
270
+ outputs=[msg_input, chatbot],
271
+ queue=False
272
+ ).then(
273
+ bot_respond,
274
+ inputs=[chatbot, reasoning_mode, temperature, max_tokens],
275
+ outputs=chatbot,
276
+ )
277
+
278
+ clear_btn.click(lambda: [], outputs=chatbot)
279
+
280
+
281
+ if __name__ == "__main__":
282
+ demo.queue(max_size=10)
283
+ demo.launch(
284
+ server_name="0.0.0.0",
285
+ server_port=7860,
286
+ show_error=True,
287
+ )