"""
HuggingFace Gradio Space: Qwen3.5-9B Claude Opus Reasoning Demo
Space: nickyni/qwen35-claude-reasoning-demo

This Gradio app demonstrates Claude 4.6 Opus-level reasoning via NexaAPI.
The underlying model is inspired by Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled.
"""

import gradio as gr
import os
from typing import Iterator

# Try to import nexaapi, fall back to openai with custom base_url
try:
    from nexaapi import NexaAPI
    USE_NEXAAPI = True
except ImportError:
    from openai import OpenAI
    USE_NEXAAPI = False

# Get API key from environment variable (set in HF Space secrets)
API_KEY = os.environ.get("NEXAAPI_KEY", "")

# Model configuration
MODEL_ID = "claude-sonnet-4"  # Best available reasoning model on NexaAPI
NEXAAPI_BASE_URL = "https://api.nexa-api.com/v1"

def get_client():
    """Initialize the API client."""
    if not API_KEY:
        raise ValueError("NEXAAPI_KEY environment variable not set. Get your key at https://nexa-api.com")
    
    if USE_NEXAAPI:
        return NexaAPI(api_key=API_KEY)
    else:
        return OpenAI(api_key=API_KEY, base_url=NEXAAPI_BASE_URL)


def format_system_prompt(mode: str) -> str:
    """Return appropriate system prompt based on reasoning mode."""
    prompts = {
        "General Reasoning": (
            "You are an expert reasoning assistant. Think carefully and systematically "
            "before answering. Break complex problems into clear steps."
        ),
        "Math & Logic": (
            "You are a mathematics and logic expert. Solve problems step-by-step, "
            "showing all work. Verify your answers. Use clear notation."
        ),
        "Code Review": (
            "You are a senior software engineer. Review code for bugs, security issues, "
            "performance problems, and style. Provide improved versions with explanations."
        ),
        "Chain-of-Thought": (
            "Solve problems using this exact structure:\n"
            "ANALYSIS: What is being asked? What information do I have?\n"
            "REASONING: Step-by-step logical deduction\n"
            "VERIFICATION: Does the answer make sense?\n"
            "ANSWER: Clear, concise final answer"
        ),
    }
    return prompts.get(mode, prompts["General Reasoning"])


def stream_response(
    message: str,
    history: list,
    reasoning_mode: str,
    temperature: float,
    max_tokens: int,
) -> Iterator[str]:
    """
    Stream a response from NexaAPI.
    
    Args:
        message: User's input message
        history: Chat history in Gradio format
        reasoning_mode: Selected reasoning mode
        temperature: Model temperature (0.0-1.0)
        max_tokens: Maximum tokens to generate
    
    Yields:
        Partial response strings for streaming
    """
    if not API_KEY:
        yield "⚠️ **API key not configured.** Please set NEXAAPI_KEY in Space secrets.\n\nGet your key at [nexa-api.com](https://nexa-api.com)"
        return
    
    if not message.strip():
        yield "Please enter a question or problem to solve."
        return
    
    try:
        client = get_client()
        
        # Build messages list
        messages = [{"role": "system", "content": format_system_prompt(reasoning_mode)}]
        
        # Add history
        for human_msg, assistant_msg in history:
            if human_msg:
                messages.append({"role": "user", "content": human_msg})
            if assistant_msg:
                messages.append({"role": "assistant", "content": assistant_msg})
        
        # Add current message
        messages.append({"role": "user", "content": message})
        
        # Stream the response
        stream = client.chat.completions.create(
            model=MODEL_ID,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            stream=True,
        )
        
        partial_response = ""
        for chunk in stream:
            delta = chunk.choices[0].delta
            if hasattr(delta, "content") and delta.content:
                partial_response += delta.content
                yield partial_response
                
    except Exception as e:
        error_msg = str(e)
        if "401" in error_msg or "unauthorized" in error_msg.lower():
            yield "❌ **Authentication failed.** Check your NEXAAPI_KEY is correct.\n\nGet a key at [nexa-api.com](https://nexa-api.com)"
        elif "429" in error_msg or "rate limit" in error_msg.lower():
            yield "⏳ **Rate limit reached.** Please wait a moment and try again."
        else:
            yield f"❌ **Error:** {error_msg}\n\nIf this persists, check [nexa-api.com](https://nexa-api.com) for status."


# Example prompts for the UI
EXAMPLE_PROMPTS = [
    ["A snail climbs 3 feet up a 10-foot wall each day but slides back 2 feet each night. How many days to reach the top?", "Math & Logic"],
    ["Review this code for bugs:\n```python\ndef divide(a, b):\n    return a/b\nresult = divide(10, 0)\n```", "Code Review"],
    ["Explain the difference between supervised and unsupervised learning with real-world examples.", "General Reasoning"],
    ["If I invest $1000 at 7% annual compound interest, how much will I have after 10 years? Show the formula.", "Math & Logic"],
    ["Design a simple rate limiter for an API. What data structures would you use?", "Chain-of-Thought"],
]

# Build the Gradio interface
with gr.Blocks(
    title="Qwen3.5-9B Claude Opus Reasoning Demo | NexaAPI",
    theme=gr.themes.Soft(primary_hue="blue"),
    css="""
    .header-text { text-align: center; margin-bottom: 20px; }
    .model-badge { background: #e8f4f8; padding: 8px 16px; border-radius: 20px; display: inline-block; }
    footer { display: none !important; }
    """
) as demo:
    
    gr.HTML("""
    <div class="header-text">
        <h1>🧠 Qwen3.5-9B Claude Opus Reasoning Demo</h1>
        <p>Experience Claude 4.6 Opus-level reasoning via <strong>NexaAPI</strong> — 5× cheaper than official pricing</p>
        <div class="model-badge">
            Powered by <a href="https://nexa-api.com" target="_blank">NexaAPI</a> · 
            Model: Claude Sonnet 4 (Opus-distilled reasoning) · 
            <a href="https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF" target="_blank">Original Model</a>
        </div>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(
                label="Reasoning Assistant",
                height=500,
                show_label=True,
                bubble_full_width=False,
            )
            
            with gr.Row():
                msg_input = gr.Textbox(
                    placeholder="Ask a reasoning question, math problem, or paste code to review...",
                    label="Your Question",
                    lines=3,
                    scale=4,
                )
                submit_btn = gr.Button("🧠 Reason", variant="primary", scale=1)
            
            clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
        
        with gr.Column(scale=1):
            gr.Markdown("### ⚙️ Settings")
            
            reasoning_mode = gr.Radio(
                choices=["General Reasoning", "Math & Logic", "Code Review", "Chain-of-Thought"],
                value="General Reasoning",
                label="Reasoning Mode",
            )
            
            temperature = gr.Slider(
                minimum=0.0,
                maximum=1.0,
                value=0.7,
                step=0.1,
                label="Temperature",
                info="Lower = more focused, Higher = more creative"
            )
            
            max_tokens = gr.Slider(
                minimum=256,
                maximum=4096,
                value=1024,
                step=256,
                label="Max Tokens",
            )
            
            gr.Markdown("""
            ### 🔗 Links
            - [NexaAPI](https://nexa-api.com)
            - [Get API Key](https://rapidapi.com/user/nexaquency)
            - [Python SDK](https://pypi.org/project/nexaapi)
            - [npm Package](https://npmjs.com/package/nexaapi)
            - [Original Model](https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-GGUF)
            """)
    
    gr.Markdown("### 💡 Try These Examples")
    
    examples = gr.Examples(
        examples=EXAMPLE_PROMPTS,
        inputs=[msg_input, reasoning_mode],
        label="Example Prompts",
    )
    
    gr.HTML("""
    <div style="text-align: center; margin-top: 20px; padding: 16px; background: #f0f7ff; border-radius: 8px;">
        <strong>💰 Cost Comparison:</strong> 
        Official Claude API ~$15/M tokens → 
        <strong>NexaAPI ~$0.50/M tokens</strong> (5× cheaper!)
        <br>
        <a href="https://nexa-api.com" target="_blank">Get started free at nexa-api.com →</a>
    </div>
    """)
    
    # Event handlers
    def user_submit(message, history):
        return "", history + [[message, None]]
    
    def bot_respond(history, reasoning_mode, temperature, max_tokens):
        if not history or history[-1][1] is not None:
            return history
        
        message = history[-1][0]
        history[-1][1] = ""
        
        for partial in stream_response(message, history[:-1], reasoning_mode, temperature, max_tokens):
            history[-1][1] = partial
            yield history
    
    # Wire up events
    msg_input.submit(
        user_submit,
        inputs=[msg_input, chatbot],
        outputs=[msg_input, chatbot],
        queue=False
    ).then(
        bot_respond,
        inputs=[chatbot, reasoning_mode, temperature, max_tokens],
        outputs=chatbot,
    )
    
    submit_btn.click(
        user_submit,
        inputs=[msg_input, chatbot],
        outputs=[msg_input, chatbot],
        queue=False
    ).then(
        bot_respond,
        inputs=[chatbot, reasoning_mode, temperature, max_tokens],
        outputs=chatbot,
    )
    
    clear_btn.click(lambda: [], outputs=chatbot)


if __name__ == "__main__":
    demo.queue(max_size=10)
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True,
    )