Spaces:

nickyni
/

nexaapi-tutorial-2026

Running

App Files Files Community

nexaapi-tutorial-2026 / qwen35_reasoning_app.py

nickyni

Add Qwen3.5-9B Claude Opus Reasoning demo app

de5f9cf verified 25 days ago

raw

history blame contribute delete

10.3 kB

	"""
	HuggingFace Gradio Space: Qwen3.5-9B Claude Opus Reasoning Demo
	Space: nickyni/qwen35-claude-reasoning-demo

	This Gradio app demonstrates Claude 4.6 Opus-level reasoning via NexaAPI.
	The underlying model is inspired by Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled.
	"""

	import gradio as gr
	import os
	from typing import Iterator

	# Try to import nexaapi, fall back to openai with custom base_url
	try:
	from nexaapi import NexaAPI
	USE_NEXAAPI = True
	except ImportError:
	from openai import OpenAI
	USE_NEXAAPI = False

	# Get API key from environment variable (set in HF Space secrets)
	API_KEY = os.environ.get("NEXAAPI_KEY", "")

	# Model configuration
	MODEL_ID = "claude-sonnet-4" # Best available reasoning model on NexaAPI
	NEXAAPI_BASE_URL = "https://api.nexa-api.com/v1"

	def get_client():
	"""Initialize the API client."""
	if not API_KEY:
	raise ValueError("NEXAAPI_KEY environment variable not set. Get your key at https://nexa-api.com")

	if USE_NEXAAPI:
	return NexaAPI(api_key=API_KEY)
	else:
	return OpenAI(api_key=API_KEY, base_url=NEXAAPI_BASE_URL)


	def format_system_prompt(mode: str) -> str:
	"""Return appropriate system prompt based on reasoning mode."""
	prompts = {
	"General Reasoning": (
	"You are an expert reasoning assistant. Think carefully and systematically "
	"before answering. Break complex problems into clear steps."
	),
	"Math & Logic": (
	"You are a mathematics and logic expert. Solve problems step-by-step, "
	"showing all work. Verify your answers. Use clear notation."
	),
	"Code Review": (
	"You are a senior software engineer. Review code for bugs, security issues, "
	"performance problems, and style. Provide improved versions with explanations."
	),
	"Chain-of-Thought": (
	"Solve problems using this exact structure:\n"
	"ANALYSIS: What is being asked? What information do I have?\n"
	"REASONING: Step-by-step logical deduction\n"
	"VERIFICATION: Does the answer make sense?\n"
	"ANSWER: Clear, concise final answer"
	),
	}
	return prompts.get(mode, prompts["General Reasoning"])


	def stream_response(
	message: str,
	history: list,
	reasoning_mode: str,
	temperature: float,
	max_tokens: int,
	) -> Iterator[str]:
	"""
	Stream a response from NexaAPI.

	Args:
	message: User's input message
	history: Chat history in Gradio format
	reasoning_mode: Selected reasoning mode
	temperature: Model temperature (0.0-1.0)
	max_tokens: Maximum tokens to generate

	Yields:
	Partial response strings for streaming
	"""
	if not API_KEY:
	yield "⚠️ API key not configured. Please set NEXAAPI_KEY in Space secrets.\n\nGet your key at [nexa-api.com](https://nexa-api.com)"
	return

	if not message.strip():
	yield "Please enter a question or problem to solve."
	return

	try:
	client = get_client()

	# Build messages list
	messages = [{"role": "system", "content": format_system_prompt(reasoning_mode)}]

	# Add history
	for human_msg, assistant_msg in history:
	if human_msg:
	messages.append({"role": "user", "content": human_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	# Add current message
	messages.append({"role": "user", "content": message})

	# Stream the response
	stream = client.chat.completions.create(
	model=MODEL_ID,
	messages=messages,
	temperature=temperature,
	max_tokens=max_tokens,
	stream=True,
	)

	partial_response = ""
	for chunk in stream:
	delta = chunk.choices[0].delta
	if hasattr(delta, "content") and delta.content:
	partial_response += delta.content
	yield partial_response

	except Exception as e:
	error_msg = str(e)
	if "401" in error_msg or "unauthorized" in error_msg.lower():
	yield "❌ Authentication failed. Check your NEXAAPI_KEY is correct.\n\nGet a key at [nexa-api.com](https://nexa-api.com)"
	elif "429" in error_msg or "rate limit" in error_msg.lower():
	yield "⏳ Rate limit reached. Please wait a moment and try again."
	else:
	yield f"❌ Error: {error_msg}\n\nIf this persists, check [nexa-api.com](https://nexa-api.com) for status."


	# Example prompts for the UI
	EXAMPLE_PROMPTS = [
	["A snail climbs 3 feet up a 10-foot wall each day but slides back 2 feet each night. How many days to reach the top?", "Math & Logic"],
	["Review this code for bugs:\n```python\ndef divide(a, b):\n return a/b\nresult = divide(10, 0)\n```", "Code Review"],
	["Explain the difference between supervised and unsupervised learning with real-world examples.", "General Reasoning"],
	["If I invest $1000 at 7% annual compound interest, how much will I have after 10 years? Show the formula.", "Math & Logic"],
	["Design a simple rate limiter for an API. What data structures would you use?", "Chain-of-Thought"],
	]

	# Build the Gradio interface
	with gr.Blocks(
	title="Qwen3.5-9B Claude Opus Reasoning Demo \| NexaAPI",
	theme=gr.themes.Soft(primary_hue="blue"),
	css="""
	.header-text { text-align: center; margin-bottom: 20px; }
	.model-badge { background: #e8f4f8; padding: 8px 16px; border-radius: 20px; display: inline-block; }
	footer { display: none !important; }
	"""
	) as demo:

	gr.HTML("""
	<div class="header-text">
	<h1>🧠 Qwen3.5-9B Claude Opus Reasoning Demo</h1>
	<p>Experience Claude 4.6 Opus-level reasoning via <strong>NexaAPI</strong> — 5× cheaper than official pricing</p>
	<div class="model-badge">
	Powered by <a href="https://nexa-api.com" target="_blank">NexaAPI</a> ·
	Model: Claude Sonnet 4 (Opus-distilled reasoning) ·
	<a href="https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF" target="_blank">Original Model</a>
	</div>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	label="Reasoning Assistant",
	height=500,
	show_label=True,
	bubble_full_width=False,
	)

	with gr.Row():
	msg_input = gr.Textbox(
	placeholder="Ask a reasoning question, math problem, or paste code to review...",
	label="Your Question",
	lines=3,
	scale=4,
	)
	submit_btn = gr.Button("🧠 Reason", variant="primary", scale=1)

	clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")

	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ Settings")

	reasoning_mode = gr.Radio(
	choices=["General Reasoning", "Math & Logic", "Code Review", "Chain-of-Thought"],
	value="General Reasoning",
	label="Reasoning Mode",
	)

	temperature = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Temperature",
	info="Lower = more focused, Higher = more creative"
	)

	max_tokens = gr.Slider(
	minimum=256,
	maximum=4096,
	value=1024,
	step=256,
	label="Max Tokens",
	)

	gr.Markdown("""
	### 🔗 Links
	- [NexaAPI](https://nexa-api.com)
	- [Get API Key](https://rapidapi.com/user/nexaquency)
	- [Python SDK](https://pypi.org/project/nexaapi)
	- [npm Package](https://npmjs.com/package/nexaapi)
	- [Original Model](https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF)
	""")

	gr.Markdown("### 💡 Try These Examples")

	examples = gr.Examples(
	examples=EXAMPLE_PROMPTS,
	inputs=[msg_input, reasoning_mode],
	label="Example Prompts",
	)

	gr.HTML("""
	<div style="text-align: center; margin-top: 20px; padding: 16px; background: #f0f7ff; border-radius: 8px;">
	<strong>💰 Cost Comparison:</strong>
	Official Claude API ~$15/M tokens →
	<strong>NexaAPI ~$0.50/M tokens</strong> (5× cheaper!)
	<br>
	<a href="https://nexa-api.com" target="_blank">Get started free at nexa-api.com →</a>
	</div>
	""")

	# Event handlers
	def user_submit(message, history):
	return "", history + [[message, None]]

	def bot_respond(history, reasoning_mode, temperature, max_tokens):
	if not history or history[-1][1] is not None:
	return history

	message = history[-1][0]
	history[-1][1] = ""

	for partial in stream_response(message, history[:-1], reasoning_mode, temperature, max_tokens):
	history[-1][1] = partial
	yield history

	# Wire up events
	msg_input.submit(
	user_submit,
	inputs=[msg_input, chatbot],
	outputs=[msg_input, chatbot],
	queue=False
	).then(
	bot_respond,
	inputs=[chatbot, reasoning_mode, temperature, max_tokens],
	outputs=chatbot,
	)

	submit_btn.click(
	user_submit,
	inputs=[msg_input, chatbot],
	outputs=[msg_input, chatbot],
	queue=False
	).then(
	bot_respond,
	inputs=[chatbot, reasoning_mode, temperature, max_tokens],
	outputs=chatbot,
	)

	clear_btn.click(lambda: [], outputs=chatbot)


	if __name__ == "__main__":
	demo.queue(max_size=10)
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True,
	)