nexaapi-tutorial-2026 / qwen35_reasoning_app.py
nickyni's picture
Add Qwen3.5-9B Claude Opus Reasoning demo app
de5f9cf verified
"""
HuggingFace Gradio Space: Qwen3.5-9B Claude Opus Reasoning Demo
Space: nickyni/qwen35-claude-reasoning-demo
This Gradio app demonstrates Claude 4.6 Opus-level reasoning via NexaAPI.
The underlying model is inspired by Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled.
"""
import gradio as gr
import os
from typing import Iterator
# Try to import nexaapi, fall back to openai with custom base_url
try:
from nexaapi import NexaAPI
USE_NEXAAPI = True
except ImportError:
from openai import OpenAI
USE_NEXAAPI = False
# Get API key from environment variable (set in HF Space secrets)
API_KEY = os.environ.get("NEXAAPI_KEY", "")
# Model configuration
MODEL_ID = "claude-sonnet-4" # Best available reasoning model on NexaAPI
NEXAAPI_BASE_URL = "https://api.nexa-api.com/v1"
def get_client():
"""Initialize the API client."""
if not API_KEY:
raise ValueError("NEXAAPI_KEY environment variable not set. Get your key at https://nexa-api.com")
if USE_NEXAAPI:
return NexaAPI(api_key=API_KEY)
else:
return OpenAI(api_key=API_KEY, base_url=NEXAAPI_BASE_URL)
def format_system_prompt(mode: str) -> str:
"""Return appropriate system prompt based on reasoning mode."""
prompts = {
"General Reasoning": (
"You are an expert reasoning assistant. Think carefully and systematically "
"before answering. Break complex problems into clear steps."
),
"Math & Logic": (
"You are a mathematics and logic expert. Solve problems step-by-step, "
"showing all work. Verify your answers. Use clear notation."
),
"Code Review": (
"You are a senior software engineer. Review code for bugs, security issues, "
"performance problems, and style. Provide improved versions with explanations."
),
"Chain-of-Thought": (
"Solve problems using this exact structure:\n"
"ANALYSIS: What is being asked? What information do I have?\n"
"REASONING: Step-by-step logical deduction\n"
"VERIFICATION: Does the answer make sense?\n"
"ANSWER: Clear, concise final answer"
),
}
return prompts.get(mode, prompts["General Reasoning"])
def stream_response(
message: str,
history: list,
reasoning_mode: str,
temperature: float,
max_tokens: int,
) -> Iterator[str]:
"""
Stream a response from NexaAPI.
Args:
message: User's input message
history: Chat history in Gradio format
reasoning_mode: Selected reasoning mode
temperature: Model temperature (0.0-1.0)
max_tokens: Maximum tokens to generate
Yields:
Partial response strings for streaming
"""
if not API_KEY:
yield "⚠️ **API key not configured.** Please set NEXAAPI_KEY in Space secrets.\n\nGet your key at [nexa-api.com](https://nexa-api.com)"
return
if not message.strip():
yield "Please enter a question or problem to solve."
return
try:
client = get_client()
# Build messages list
messages = [{"role": "system", "content": format_system_prompt(reasoning_mode)}]
# Add history
for human_msg, assistant_msg in history:
if human_msg:
messages.append({"role": "user", "content": human_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Stream the response
stream = client.chat.completions.create(
model=MODEL_ID,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
stream=True,
)
partial_response = ""
for chunk in stream:
delta = chunk.choices[0].delta
if hasattr(delta, "content") and delta.content:
partial_response += delta.content
yield partial_response
except Exception as e:
error_msg = str(e)
if "401" in error_msg or "unauthorized" in error_msg.lower():
yield "❌ **Authentication failed.** Check your NEXAAPI_KEY is correct.\n\nGet a key at [nexa-api.com](https://nexa-api.com)"
elif "429" in error_msg or "rate limit" in error_msg.lower():
yield "⏳ **Rate limit reached.** Please wait a moment and try again."
else:
yield f"❌ **Error:** {error_msg}\n\nIf this persists, check [nexa-api.com](https://nexa-api.com) for status."
# Example prompts for the UI
EXAMPLE_PROMPTS = [
["A snail climbs 3 feet up a 10-foot wall each day but slides back 2 feet each night. How many days to reach the top?", "Math & Logic"],
["Review this code for bugs:\n```python\ndef divide(a, b):\n return a/b\nresult = divide(10, 0)\n```", "Code Review"],
["Explain the difference between supervised and unsupervised learning with real-world examples.", "General Reasoning"],
["If I invest $1000 at 7% annual compound interest, how much will I have after 10 years? Show the formula.", "Math & Logic"],
["Design a simple rate limiter for an API. What data structures would you use?", "Chain-of-Thought"],
]
# Build the Gradio interface
with gr.Blocks(
title="Qwen3.5-9B Claude Opus Reasoning Demo | NexaAPI",
theme=gr.themes.Soft(primary_hue="blue"),
css="""
.header-text { text-align: center; margin-bottom: 20px; }
.model-badge { background: #e8f4f8; padding: 8px 16px; border-radius: 20px; display: inline-block; }
footer { display: none !important; }
"""
) as demo:
gr.HTML("""
<div class="header-text">
<h1>🧠 Qwen3.5-9B Claude Opus Reasoning Demo</h1>
<p>Experience Claude 4.6 Opus-level reasoning via <strong>NexaAPI</strong> β€” 5Γ— cheaper than official pricing</p>
<div class="model-badge">
Powered by <a href="https://nexa-api.com" target="_blank">NexaAPI</a> Β·
Model: Claude Sonnet 4 (Opus-distilled reasoning) Β·
<a href="https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF" target="_blank">Original Model</a>
</div>
</div>
""")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(
label="Reasoning Assistant",
height=500,
show_label=True,
bubble_full_width=False,
)
with gr.Row():
msg_input = gr.Textbox(
placeholder="Ask a reasoning question, math problem, or paste code to review...",
label="Your Question",
lines=3,
scale=4,
)
submit_btn = gr.Button("🧠 Reason", variant="primary", scale=1)
clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary")
with gr.Column(scale=1):
gr.Markdown("### βš™οΈ Settings")
reasoning_mode = gr.Radio(
choices=["General Reasoning", "Math & Logic", "Code Review", "Chain-of-Thought"],
value="General Reasoning",
label="Reasoning Mode",
)
temperature = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.7,
step=0.1,
label="Temperature",
info="Lower = more focused, Higher = more creative"
)
max_tokens = gr.Slider(
minimum=256,
maximum=4096,
value=1024,
step=256,
label="Max Tokens",
)
gr.Markdown("""
### πŸ”— Links
- [NexaAPI](https://nexa-api.com)
- [Get API Key](https://rapidapi.com/user/nexaquency)
- [Python SDK](https://pypi.org/project/nexaapi)
- [npm Package](https://npmjs.com/package/nexaapi)
- [Original Model](https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF)
""")
gr.Markdown("### πŸ’‘ Try These Examples")
examples = gr.Examples(
examples=EXAMPLE_PROMPTS,
inputs=[msg_input, reasoning_mode],
label="Example Prompts",
)
gr.HTML("""
<div style="text-align: center; margin-top: 20px; padding: 16px; background: #f0f7ff; border-radius: 8px;">
<strong>πŸ’° Cost Comparison:</strong>
Official Claude API ~$15/M tokens β†’
<strong>NexaAPI ~$0.50/M tokens</strong> (5Γ— cheaper!)
<br>
<a href="https://nexa-api.com" target="_blank">Get started free at nexa-api.com β†’</a>
</div>
""")
# Event handlers
def user_submit(message, history):
return "", history + [[message, None]]
def bot_respond(history, reasoning_mode, temperature, max_tokens):
if not history or history[-1][1] is not None:
return history
message = history[-1][0]
history[-1][1] = ""
for partial in stream_response(message, history[:-1], reasoning_mode, temperature, max_tokens):
history[-1][1] = partial
yield history
# Wire up events
msg_input.submit(
user_submit,
inputs=[msg_input, chatbot],
outputs=[msg_input, chatbot],
queue=False
).then(
bot_respond,
inputs=[chatbot, reasoning_mode, temperature, max_tokens],
outputs=chatbot,
)
submit_btn.click(
user_submit,
inputs=[msg_input, chatbot],
outputs=[msg_input, chatbot],
queue=False
).then(
bot_respond,
inputs=[chatbot, reasoning_mode, temperature, max_tokens],
outputs=chatbot,
)
clear_btn.click(lambda: [], outputs=chatbot)
if __name__ == "__main__":
demo.queue(max_size=10)
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
)