""" HuggingFace Gradio Space: Qwen3.5-9B Claude Opus Reasoning Demo Space: nickyni/qwen35-claude-reasoning-demo This Gradio app demonstrates Claude 4.6 Opus-level reasoning via NexaAPI. The underlying model is inspired by Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled. """ import gradio as gr import os from typing import Iterator # Try to import nexaapi, fall back to openai with custom base_url try: from nexaapi import NexaAPI USE_NEXAAPI = True except ImportError: from openai import OpenAI USE_NEXAAPI = False # Get API key from environment variable (set in HF Space secrets) API_KEY = os.environ.get("NEXAAPI_KEY", "") # Model configuration MODEL_ID = "claude-sonnet-4" # Best available reasoning model on NexaAPI NEXAAPI_BASE_URL = "https://api.nexa-api.com/v1" def get_client(): """Initialize the API client.""" if not API_KEY: raise ValueError("NEXAAPI_KEY environment variable not set. Get your key at https://nexa-api.com") if USE_NEXAAPI: return NexaAPI(api_key=API_KEY) else: return OpenAI(api_key=API_KEY, base_url=NEXAAPI_BASE_URL) def format_system_prompt(mode: str) -> str: """Return appropriate system prompt based on reasoning mode.""" prompts = { "General Reasoning": ( "You are an expert reasoning assistant. Think carefully and systematically " "before answering. Break complex problems into clear steps." ), "Math & Logic": ( "You are a mathematics and logic expert. Solve problems step-by-step, " "showing all work. Verify your answers. Use clear notation." ), "Code Review": ( "You are a senior software engineer. Review code for bugs, security issues, " "performance problems, and style. Provide improved versions with explanations." ), "Chain-of-Thought": ( "Solve problems using this exact structure:\n" "ANALYSIS: What is being asked? What information do I have?\n" "REASONING: Step-by-step logical deduction\n" "VERIFICATION: Does the answer make sense?\n" "ANSWER: Clear, concise final answer" ), } return prompts.get(mode, prompts["General Reasoning"]) def stream_response( message: str, history: list, reasoning_mode: str, temperature: float, max_tokens: int, ) -> Iterator[str]: """ Stream a response from NexaAPI. Args: message: User's input message history: Chat history in Gradio format reasoning_mode: Selected reasoning mode temperature: Model temperature (0.0-1.0) max_tokens: Maximum tokens to generate Yields: Partial response strings for streaming """ if not API_KEY: yield "⚠️ **API key not configured.** Please set NEXAAPI_KEY in Space secrets.\n\nGet your key at [nexa-api.com](https://nexa-api.com)" return if not message.strip(): yield "Please enter a question or problem to solve." return try: client = get_client() # Build messages list messages = [{"role": "system", "content": format_system_prompt(reasoning_mode)}] # Add history for human_msg, assistant_msg in history: if human_msg: messages.append({"role": "user", "content": human_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) # Stream the response stream = client.chat.completions.create( model=MODEL_ID, messages=messages, temperature=temperature, max_tokens=max_tokens, stream=True, ) partial_response = "" for chunk in stream: delta = chunk.choices[0].delta if hasattr(delta, "content") and delta.content: partial_response += delta.content yield partial_response except Exception as e: error_msg = str(e) if "401" in error_msg or "unauthorized" in error_msg.lower(): yield "❌ **Authentication failed.** Check your NEXAAPI_KEY is correct.\n\nGet a key at [nexa-api.com](https://nexa-api.com)" elif "429" in error_msg or "rate limit" in error_msg.lower(): yield "⏳ **Rate limit reached.** Please wait a moment and try again." else: yield f"❌ **Error:** {error_msg}\n\nIf this persists, check [nexa-api.com](https://nexa-api.com) for status." # Example prompts for the UI EXAMPLE_PROMPTS = [ ["A snail climbs 3 feet up a 10-foot wall each day but slides back 2 feet each night. How many days to reach the top?", "Math & Logic"], ["Review this code for bugs:\n```python\ndef divide(a, b):\n return a/b\nresult = divide(10, 0)\n```", "Code Review"], ["Explain the difference between supervised and unsupervised learning with real-world examples.", "General Reasoning"], ["If I invest $1000 at 7% annual compound interest, how much will I have after 10 years? Show the formula.", "Math & Logic"], ["Design a simple rate limiter for an API. What data structures would you use?", "Chain-of-Thought"], ] # Build the Gradio interface with gr.Blocks( title="Qwen3.5-9B Claude Opus Reasoning Demo | NexaAPI", theme=gr.themes.Soft(primary_hue="blue"), css=""" .header-text { text-align: center; margin-bottom: 20px; } .model-badge { background: #e8f4f8; padding: 8px 16px; border-radius: 20px; display: inline-block; } footer { display: none !important; } """ ) as demo: gr.HTML("""

🧠 Qwen3.5-9B Claude Opus Reasoning Demo

Experience Claude 4.6 Opus-level reasoning via NexaAPI — 5× cheaper than official pricing

Powered by NexaAPI · Model: Claude Sonnet 4 (Opus-distilled reasoning) · Original Model
""") with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot( label="Reasoning Assistant", height=500, show_label=True, bubble_full_width=False, ) with gr.Row(): msg_input = gr.Textbox( placeholder="Ask a reasoning question, math problem, or paste code to review...", label="Your Question", lines=3, scale=4, ) submit_btn = gr.Button("🧠 Reason", variant="primary", scale=1) clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary") with gr.Column(scale=1): gr.Markdown("### ⚙️ Settings") reasoning_mode = gr.Radio( choices=["General Reasoning", "Math & Logic", "Code Review", "Chain-of-Thought"], value="General Reasoning", label="Reasoning Mode", ) temperature = gr.Slider( minimum=0.0, maximum=1.0, value=0.7, step=0.1, label="Temperature", info="Lower = more focused, Higher = more creative" ) max_tokens = gr.Slider( minimum=256, maximum=4096, value=1024, step=256, label="Max Tokens", ) gr.Markdown(""" ### 🔗 Links - [NexaAPI](https://nexa-api.com) - [Get API Key](https://rapidapi.com/user/nexaquency) - [Python SDK](https://pypi.org/project/nexaapi) - [npm Package](https://npmjs.com/package/nexaapi) - [Original Model](https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF) """) gr.Markdown("### 💡 Try These Examples") examples = gr.Examples( examples=EXAMPLE_PROMPTS, inputs=[msg_input, reasoning_mode], label="Example Prompts", ) gr.HTML("""
💰 Cost Comparison: Official Claude API ~$15/M tokens → NexaAPI ~$0.50/M tokens (5× cheaper!)
Get started free at nexa-api.com →
""") # Event handlers def user_submit(message, history): return "", history + [[message, None]] def bot_respond(history, reasoning_mode, temperature, max_tokens): if not history or history[-1][1] is not None: return history message = history[-1][0] history[-1][1] = "" for partial in stream_response(message, history[:-1], reasoning_mode, temperature, max_tokens): history[-1][1] = partial yield history # Wire up events msg_input.submit( user_submit, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot], queue=False ).then( bot_respond, inputs=[chatbot, reasoning_mode, temperature, max_tokens], outputs=chatbot, ) submit_btn.click( user_submit, inputs=[msg_input, chatbot], outputs=[msg_input, chatbot], queue=False ).then( bot_respond, inputs=[chatbot, reasoning_mode, temperature, max_tokens], outputs=chatbot, ) clear_btn.click(lambda: [], outputs=chatbot) if __name__ == "__main__": demo.queue(max_size=10) demo.launch( server_name="0.0.0.0", server_port=7860, show_error=True, )