Spaces:

nickyni
/

nexaapi-tutorial-2026

Running

App Files Files Community

nickyni commited on 25 days ago

Commit

de5f9cf

verified ·

1 Parent(s): a35ae64

Add Qwen3.5-9B Claude Opus Reasoning demo app

Browse files

Files changed (1) hide show

qwen35_reasoning_app.py +287 -0

qwen35_reasoning_app.py ADDED Viewed

	@@ -0,0 +1,287 @@

+"""
+HuggingFace Gradio Space: Qwen3.5-9B Claude Opus Reasoning Demo
+Space: nickyni/qwen35-claude-reasoning-demo
+This Gradio app demonstrates Claude 4.6 Opus-level reasoning via NexaAPI.
+The underlying model is inspired by Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled.
+"""
+import gradio as gr
+import os
+from typing import Iterator
+# Try to import nexaapi, fall back to openai with custom base_url
+try:
+    from nexaapi import NexaAPI
+    USE_NEXAAPI = True
+except ImportError:
+    from openai import OpenAI
+    USE_NEXAAPI = False
+# Get API key from environment variable (set in HF Space secrets)
+API_KEY = os.environ.get("NEXAAPI_KEY", "")
+# Model configuration
+MODEL_ID = "claude-sonnet-4"  # Best available reasoning model on NexaAPI
+NEXAAPI_BASE_URL = "https://api.nexa-api.com/v1"
+def get_client():
+    """Initialize the API client."""
+    if not API_KEY:
+        raise ValueError("NEXAAPI_KEY environment variable not set. Get your key at https://nexa-api.com")
+    if USE_NEXAAPI:
+        return NexaAPI(api_key=API_KEY)
+    else:
+        return OpenAI(api_key=API_KEY, base_url=NEXAAPI_BASE_URL)
+def format_system_prompt(mode: str) -> str:
+    """Return appropriate system prompt based on reasoning mode."""
+    prompts = {
+        "General Reasoning": (
+            "You are an expert reasoning assistant. Think carefully and systematically "
+            "before answering. Break complex problems into clear steps."
+        ),
+        "Math & Logic": (
+            "You are a mathematics and logic expert. Solve problems step-by-step, "
+            "showing all work. Verify your answers. Use clear notation."
+        ),
+        "Code Review": (
+            "You are a senior software engineer. Review code for bugs, security issues, "
+            "performance problems, and style. Provide improved versions with explanations."
+        ),
+        "Chain-of-Thought": (
+            "Solve problems using this exact structure:\n"
+            "ANALYSIS: What is being asked? What information do I have?\n"
+            "REASONING: Step-by-step logical deduction\n"
+            "VERIFICATION: Does the answer make sense?\n"
+            "ANSWER: Clear, concise final answer"
+        ),
+    }
+    return prompts.get(mode, prompts["General Reasoning"])
+def stream_response(
+    message: str,
+    history: list,
+    reasoning_mode: str,
+    temperature: float,
+    max_tokens: int,
+) -> Iterator[str]:
+    """
+    Stream a response from NexaAPI.
+    Args:
+        message: User's input message
+        history: Chat history in Gradio format
+        reasoning_mode: Selected reasoning mode
+        temperature: Model temperature (0.0-1.0)
+        max_tokens: Maximum tokens to generate
+    Yields:
+        Partial response strings for streaming
+    """
+    if not API_KEY:
+        yield "⚠️ **API key not configured.** Please set NEXAAPI_KEY in Space secrets.\n\nGet your key at [nexa-api.com](https://nexa-api.com)"
+        return
+    if not message.strip():
+        yield "Please enter a question or problem to solve."
+        return
+    try:
+        client = get_client()
+        # Build messages list
+        messages = [{"role": "system", "content": format_system_prompt(reasoning_mode)}]
+        # Add history
+        for human_msg, assistant_msg in history:
+            if human_msg:
+                messages.append({"role": "user", "content": human_msg})
+            if assistant_msg:
+                messages.append({"role": "assistant", "content": assistant_msg})
+        # Add current message
+        messages.append({"role": "user", "content": message})
+        # Stream the response
+        stream = client.chat.completions.create(
+            model=MODEL_ID,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            stream=True,
+        )
+        partial_response = ""
+        for chunk in stream:
+            delta = chunk.choices[0].delta
+            if hasattr(delta, "content") and delta.content:
+                partial_response += delta.content
+                yield partial_response
+    except Exception as e:
+        error_msg = str(e)
+        if "401" in error_msg or "unauthorized" in error_msg.lower():
+            yield "❌ **Authentication failed.** Check your NEXAAPI_KEY is correct.\n\nGet a key at [nexa-api.com](https://nexa-api.com)"
+        elif "429" in error_msg or "rate limit" in error_msg.lower():
+            yield "⏳ **Rate limit reached.** Please wait a moment and try again."
+        else:
+            yield f"❌ **Error:** {error_msg}\n\nIf this persists, check [nexa-api.com](https://nexa-api.com) for status."
+# Example prompts for the UI
+EXAMPLE_PROMPTS = [
+    ["A snail climbs 3 feet up a 10-foot wall each day but slides back 2 feet each night. How many days to reach the top?", "Math & Logic"],
+    ["Review this code for bugs:\n```python\ndef divide(a, b):\n    return a/b\nresult = divide(10, 0)\n```", "Code Review"],
+    ["Explain the difference between supervised and unsupervised learning with real-world examples.", "General Reasoning"],
+    ["If I invest $1000 at 7% annual compound interest, how much will I have after 10 years? Show the formula.", "Math & Logic"],
+    ["Design a simple rate limiter for an API. What data structures would you use?", "Chain-of-Thought"],
+]
+# Build the Gradio interface
+with gr.Blocks(
+    title="Qwen3.5-9B Claude Opus Reasoning Demo | NexaAPI",
+    theme=gr.themes.Soft(primary_hue="blue"),
+    css="""
+    .header-text { text-align: center; margin-bottom: 20px; }
+    .model-badge { background: #e8f4f8; padding: 8px 16px; border-radius: 20px; display: inline-block; }
+    footer { display: none !important; }
+    """
+) as demo:
+    gr.HTML("""
+    <div class="header-text">
+        <h1>🧠 Qwen3.5-9B Claude Opus Reasoning Demo</h1>
+        <p>Experience Claude 4.6 Opus-level reasoning via <strong>NexaAPI</strong> — 5× cheaper than official pricing</p>
+        <div class="model-badge">
+            Powered by <a href="https://nexa-api.com" target="_blank">NexaAPI</a> ·
+            Model: Claude Sonnet 4 (Opus-distilled reasoning) ·
+            <a href="https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF" target="_blank">Original Model</a>
+        </div>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(
+                label="Reasoning Assistant",
+                height=500,
+                show_label=True,
+                bubble_full_width=False,
+            )
+            with gr.Row():
+                msg_input = gr.Textbox(
+                    placeholder="Ask a reasoning question, math problem, or paste code to review...",
+                    label="Your Question",
+                    lines=3,
+                    scale=4,
+                )
+                submit_btn = gr.Button("🧠 Reason", variant="primary", scale=1)
+            clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Settings")
+            reasoning_mode = gr.Radio(
+                choices=["General Reasoning", "Math & Logic", "Code Review", "Chain-of-Thought"],
+                value="General Reasoning",
+                label="Reasoning Mode",
+            )
+            temperature = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature",
+                info="Lower = more focused, Higher = more creative"
+            )
+            max_tokens = gr.Slider(
+                minimum=256,
+                maximum=4096,
+                value=1024,
+                step=256,
+                label="Max Tokens",
+            )
+            gr.Markdown("""
+            ### 🔗 Links
+            - [NexaAPI](https://nexa-api.com)
+            - [Get API Key](https://rapidapi.com/user/nexaquency)
+            - [Python SDK](https://pypi.org/project/nexaapi)
+            - [npm Package](https://npmjs.com/package/nexaapi)
+            - [Original Model](https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF)
+            """)
+    gr.Markdown("### 💡 Try These Examples")
+    examples = gr.Examples(
+        examples=EXAMPLE_PROMPTS,
+        inputs=[msg_input, reasoning_mode],
+        label="Example Prompts",
+    )
+    gr.HTML("""
+    <div style="text-align: center; margin-top: 20px; padding: 16px; background: #f0f7ff; border-radius: 8px;">
+        <strong>💰 Cost Comparison:</strong>
+        Official Claude API ~$15/M tokens →
+        <strong>NexaAPI ~$0.50/M tokens</strong> (5× cheaper!)
+        <br>
+        <a href="https://nexa-api.com" target="_blank">Get started free at nexa-api.com →</a>
+    </div>
+    """)
+    # Event handlers
+    def user_submit(message, history):
+        return "", history + [[message, None]]
+    def bot_respond(history, reasoning_mode, temperature, max_tokens):
+        if not history or history[-1][1] is not None:
+            return history
+        message = history[-1][0]
+        history[-1][1] = ""
+        for partial in stream_response(message, history[:-1], reasoning_mode, temperature, max_tokens):
+            history[-1][1] = partial
+            yield history
+    # Wire up events
+    msg_input.submit(
+        user_submit,
+        inputs=[msg_input, chatbot],
+        outputs=[msg_input, chatbot],
+        queue=False
+    ).then(
+        bot_respond,
+        inputs=[chatbot, reasoning_mode, temperature, max_tokens],
+        outputs=chatbot,
+    )
+    submit_btn.click(
+        user_submit,
+        inputs=[msg_input, chatbot],
+        outputs=[msg_input, chatbot],
+        queue=False
+    ).then(
+        bot_respond,
+        inputs=[chatbot, reasoning_mode, temperature, max_tokens],
+        outputs=chatbot,
+    )
+    clear_btn.click(lambda: [], outputs=chatbot)
+if __name__ == "__main__":
+    demo.queue(max_size=10)
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True,
+    )