Spaces:

wangsheng
/

DeepSeekV4Chat

Running

App Files Files Community

wangsheng commited on Apr 24

Commit

b920ac7

verified ·

1 Parent(s): 7bd0413

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -499

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 from openai import OpenAI
 import os
 import time
-from typing import List, Tuple, Optional
 # ==================== Configuration ====================
 DEFAULT_SYSTEM_PROMPT = "You are DeepSeek-V4, an advanced AI assistant with strong reasoning capabilities. Provide accurate, helpful, and well-reasoned responses."
@@ -32,8 +32,7 @@ def get_client():
             "Please set your API key:\n"
             "1. Get your key from: https://platform.deepseek.com/api_keys\n"
             "2. Set environment variable:\n"
-            "   export DEEPSEEK_API_KEY='your-api-key-here'\n"
-            "   or create a .env file with: DEEPSEEK_API_KEY=your-api-key-here"
         )
     return OpenAI(
@@ -51,114 +50,22 @@ def generate_response(
     top_p: float = 1.0,
     system_prompt: str = DEFAULT_SYSTEM_PROMPT,
     show_thinking: bool = True
-) -> Tuple[str, List[Tuple[str, str]], str, str]:
-    """
-    Generate response using DeepSeek API
-    Returns:
-        Tuple of (empty_message, updated_history, response_text, thinking_text, status)
-    """
-    if not message.strip():
-        return "", history, "", "", "Please enter a message."
-    client = get_client()
-    # Build messages array
-    messages = [{"role": "system", "content": system_prompt}]
-    # Add conversation history
-    for user_msg, assistant_msg in history:
-        messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    # Add current message
-    messages.append({"role": "user", "content": message})
-    # Prepare API parameters
-    reasoning_effort = REASONING_EFFORT_MAP.get(thinking_mode, "high")
-    thinking_type = THINKING_TYPE_MAP.get(thinking_mode, "enabled")
-    try:
-        start_time = time.time()
-        # Call DeepSeek API
-        response = client.chat.completions.create(
-            model="deepseek-v4-pro",
-            messages=messages,
-            stream=False,
-            max_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            reasoning_effort=reasoning_effort,
-            extra_body={
-                "thinking": {"type": thinking_type}
-            }
-        )
-        end_time = time.time()
-        generation_time = end_time - start_time
-        # Extract response content
-        choice = response.choices[0]
-        message_obj = choice.message
-        # Get main content
-        content = message_obj.content or ""
-        # Get reasoning/thinking content if available
-        thinking_content = ""
-        if hasattr(message_obj, 'reasoning_content') and message_obj.reasoning_content:
-            thinking_content = message_obj.reasoning_content
-        # Update history
-        full_response = content
-        if show_thinking and thinking_content:
-            full_response = f"{thinking_content}\n\n---\n\n{content}"
-        # Add usage info if available
-        if hasattr(response, 'usage') and response.usage:
-            usage = response.usage
-            tokens_info = f"📊 Input: {usage.prompt_tokens} tokens | Output: {usage.completion_tokens} tokens | Total: {usage.total_tokens} tokens"
-        else:
-            tokens_info = ""
-        status = f"✅ Generated in {generation_time:.2f}s | 🎯 Mode: {thinking_mode} | {tokens_info}"
-        return "", history + [(message, full_response)], content, thinking_content, status
-    except Exception as e:
-        error_msg = f"❌ Error: {str(e)}"
-        return "", history + [(message, error_msg)], "", "", error_msg
-def generate_response_stream(
-    message: str,
-    history: List[Tuple[str, str]],
-    thinking_mode: str = "Think High",
-    max_tokens: int = 4096,
-    temperature: float = 0.7,
-    top_p: float = 1.0,
-    system_prompt: str = DEFAULT_SYSTEM_PROMPT,
-    show_thinking: bool = True
 ):
-    """
-    Stream response from DeepSeek API
-    Yields:
-        Tuple of (empty_message, updated_history, content_so_far, thinking_so_far, status)
-    """
     if not message.strip():
-        yield "", history, "", "", "Please enter a message."
         return
     client = get_client()
     # Build messages array
     messages = [{"role": "system", "content": system_prompt}]
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     reasoning_effort = REASONING_EFFORT_MAP.get(thinking_mode, "high")
@@ -167,7 +74,7 @@ def generate_response_stream(
     try:
         start_time = time.time()
-        # Stream response
         stream = client.chat.completions.create(
             model="deepseek-v4-pro",
             messages=messages,
@@ -176,9 +83,7 @@ def generate_response_stream(
             temperature=temperature,
             top_p=top_p,
             reasoning_effort=reasoning_effort,
-            extra_body={
-                "thinking": {"type": thinking_type}
-            }
         )
         content_chunks = []
@@ -188,7 +93,6 @@ def generate_response_stream(
             if chunk.choices[0].delta.content:
                 content_chunks.append(chunk.choices[0].delta.content)
-            # Check for reasoning content in stream
             if hasattr(chunk.choices[0].delta, 'reasoning_content'):
                 if chunk.choices[0].delta.reasoning_content:
                     thinking_chunks.append(chunk.choices[0].delta.reasoning_content)
@@ -198,488 +102,188 @@ def generate_response_stream(
             full_response = current_content
             if show_thinking and current_thinking:
-                full_response = f"🧠 Thinking:\n{current_thinking}\n\n💬 Response:\n{current_content}"
             elapsed = time.time() - start_time
-            status = f"🔄 Streaming... ({elapsed:.1f}s) | Mode: {thinking_mode}"
-            yield "", history + [(message, full_response)], current_content, current_thinking, status
-        # Final yield with complete response
         end_time = time.time()
         final_content = ''.join(content_chunks)
         final_thinking = ''.join(thinking_chunks)
         full_response = final_content
         if show_thinking and final_thinking:
-            full_response = f"🧠 Thinking:\n{final_thinking}\n\n💬 Response:\n{final_content}"
-        status = f"✅ Done in {end_time - start_time:.2f}s | Mode: {thinking_mode}"
-        yield "", history + [(message, full_response)], final_content, final_thinking, status
     except Exception as e:
         error_msg = f"❌ Error: {str(e)}"
-        yield "", history + [(message, error_msg)], "", "", error_msg
 # ==================== Gradio Interface ====================
 def create_demo():
     """Create the Gradio interface"""
-    # Use gr.Blocks without theme and css (moved to launch)
-    with gr.Blocks(
-        title="DeepSeek-V4 Pro - API Demo",
-        analytics_enabled=False
-    ) as demo:
         # Header
-        gr.HTML("""
-            <div class="deepseek-header">
-                <h1>🚀 DeepSeek-V4 Pro</h1>
-                <p>Towards Highly Efficient Million-Token Context Intelligence</p>
-                <p style="font-size: 0.9em; opacity: 0.8;">Powered by DeepSeek API • 1.6T Parameters • 49B Activated</p>
-            </div>
         """)
-        # Main layout
         with gr.Row():
-            # Left sidebar - Configuration
-            with gr.Column(scale=1, min_width=350):
-                # Model Info Card
-                gr.HTML("""
-                    <div class="model-info">
-                        <h3 style="margin-top: 0;">📊 Model Specifications</h3>
-                        <div class="benchmark-grid">
-                            <div class="benchmark-item">
-                                <div class="value">1.6T</div>
-                                <div class="label">Total Parameters</div>
-                            </div>
-                            <div class="benchmark-item">
-                                <div class="value">49B</div>
-                                <div class="label">Activated Parameters</div>
-                            </div>
-                            <div class="benchmark-item">
-                                <div class="value">1M</div>
-                                <div class="label">Context Length</div>
-                            </div>
-                            <div class="benchmark-item">
-                                <div class="value">32T+</div>
-                                <div class="label">Training Tokens</div>
-                            </div>
-                        </div>
-                        <h3>🎯 Key Benchmarks</h3>
-                        <div class="benchmark-grid">
-                            <div class="benchmark-item">
-                                <div class="value">93.5</div>
-                                <div class="label">LiveCodeBench</div>
-                            </div>
-                            <div class="benchmark-item">
-                                <div class="value">3206</div>
-                                <div class="label">Codeforces Rating</div>
-                            </div>
-                            <div class="benchmark-item">
-                                <div class="value">87.5</div>
-                                <div class="label">MMLU-Pro</div>
-                            </div>
-                            <div class="benchmark-item">
-                                <div class="value">80.6%</div>
-                                <div class="label">SWE Verified</div>
-                            </div>
-                        </div>
-                        <h3>💡 Key Innovations</h3>
-                        <ul style="padding-left: 20px;">
-                            <li>Hybrid Attention (CSA + HCA)</li>
-                            <li>Manifold-Constrained Hyper-Connections</li>
-                            <li>Muon Optimizer</li>
-                            <li>Two-stage Post-training</li>
-                            <li>FP4 + FP8 Mixed Precision</li>
-                        </ul>
-                    </div>
-                """)
-                # Configuration Panel
-                with gr.Group():
-                    gr.Markdown("### ⚙️ Generation Settings")
-                    thinking_mode = gr.Radio(
-                        choices=["Non-think", "Think High", "Think Max"],
-                        value="Think High",
-                        label="🧠 Reasoning Mode",
-                        info="""
-                        • Non-think: Fast, intuitive responses for daily tasks
-                        • Think High: Deliberate reasoning for complex problems
-                        • Think Max: Maximum effort for hardest challenges
-                        """
-                    )
-                    show_thinking = gr.Checkbox(
-                        value=True,
-                        label="📝 Show Thinking Process",
-                        info="Display the model's reasoning steps"
-                    )
-                    system_prompt = gr.Textbox(
-                        label="📋 System Prompt",
-                        value=DEFAULT_SYSTEM_PROMPT,
-                        lines=3,
-                        max_lines=5
-                    )
-                    with gr.Accordion("🔧 Advanced Parameters", open=False):
-                        max_tokens = gr.Slider(
-                            minimum=64,
-                            maximum=32768,
-                            value=4096,
-                            step=64,
-                            label="Max Tokens"
-                        )
-                        temperature = gr.Slider(
-                            minimum=0.0,
-                            maximum=2.0,
-                            value=0.7,
-                            step=0.05,
-                            label="Temperature",
-                            info="0 = deterministic, 1+ = creative"
-                        )
-                        top_p = gr.Slider(
-                            minimum=0.0,
-                            maximum=1.0,
-                            value=1.0,
-                            step=0.05,
-                            label="Top P"
-                        )
-                        stream_output = gr.Checkbox(
-                            value=True,
-                            label="📡 Stream Output",
-                            info="Show response as it's generated"
-                        )
-                # Quick examples
-                gr.Markdown("### 💡 Quick Examples")
-                examples = [
-                    "Explain quantum computing to a 10-year-old",
-                    "Write a Python function for Fibonacci with memoization",
-                    "What are the key features of DeepSeek-V4?",
-                    "Solve: If x² + y² = 25 and x + y = 7, find x and y",
-                    "Design a REST API for a social media platform",
-                ]
                 gr.Examples(
-                    examples=examples,
-                    inputs=gr.Textbox(label="Click to try", visible=False),
                 )
-            # Right - Chat Interface
             with gr.Column(scale=2):
-                # Chatbot - Gradio 6.0 compatible (removed bubble_full_width)
                 chatbot = gr.Chatbot(
                     label="💬 Chat with DeepSeek-V4 Pro",
-                    height=550,
-                    type="messages"
                 )
-                # Thinking process display
                 with gr.Accordion("🧠 Thinking Process", open=True):
-                    thinking_display = gr.Markdown(
-                        value="*The model's reasoning will appear here...*",
-                        elem_classes="thinking-box"
-                    )
-                # Input area
                 with gr.Row():
                     message_input = gr.Textbox(
-                        label="Your Message",
-                        placeholder="Type your message here... (Shift+Enter for new line)",
                         lines=2,
-                        max_lines=5,
-                        scale=9,
-                        autofocus=True
-                    )
-                    send_btn = gr.Button(
-                        "🚀 Send",
-                        variant="primary",
-                        scale=1,
-                        size="lg"
                     )
-                # Control buttons
                 with gr.Row():
-                    clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
-                    retry_btn = gr.Button("🔄 Retry", size="sm", variant="secondary")
-                # Status bar
                 status_display = gr.Textbox(
                     label="Status",
-                    value="✅ Ready | Using DeepSeek API (deepseek-v4-pro)",
-                    interactive=False,
-                    elem_classes="status-bar"
                 )
         # Footer
-        gr.HTML("""
-            <div style="text-align: center; margin-top: 30px; padding: 20px; color: #666; border-top: 1px solid #e0e0e0;">
-                <p style="margin: 5px 0;">
-                    <a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro" target="_blank">📦 Model Card</a> |
-                    <a href="https://platform.deepseek.com/api_keys" target="_blank">🔑 Get API Key</a> |
-                    <a href="https://platform.deepseek.com/docs" target="_blank">📚 API Docs</a> |
-                    <a href="https://deepseek.ai" target="_blank">🌐 Homepage</a>
-                </p>
-                <p style="margin: 5px 0; font-size: 0.9em;">
-                    ⚡ Powered by DeepSeek API • Streaming Available • MIT License
-                </p>
-                <p style="margin: 5px 0; font-size: 0.8em; opacity: 0.7;">
-                    DeepSeek-AI © 2026 • All benchmarks are for reference only
-                </p>
-            </div>
         """)
         # ==================== Event Handlers ====================
         def process_message(
-            message: str,
-            history: list,
-            thinking_mode: str,
-            show_thinking: bool,
-            system_prompt: str,
-            max_tokens: int,
-            temperature: float,
-            top_p: float,
-            stream_output: bool
         ):
-            """Process message with streaming or non-streaming mode"""
             if not message.strip():
                 yield message, history, "", "Please enter a message."
                 return
-            # Check API key
             if not os.environ.get('DEEPSEEK_API_KEY'):
-                error_msg = "⚠️ **API Key Missing**\n\nPlease set your `DEEPSEEK_API_KEY` environment variable.\nGet one at: https://platform.deepseek.com/api_keys"
-                if history is None:
-                    history = []
-                history.append({"role": "user", "content": message})
-                history.append({"role": "assistant", "content": error_msg})
-                yield "", history, "", "❌ API Key not configured"
                 return
-            if stream_output:
-                # Use streaming
-                # Convert history format for internal use
-                internal_history = []
-                if history:
-                    for i in range(0, len(history), 2):
-                        if i + 1 < len(history):
-                            internal_history.append((history[i]["content"], history[i+1]["content"]))
-                for msg, hist, content, thinking, status in generate_response_stream(
-                    message, internal_history, thinking_mode, max_tokens,
-                    temperature, top_p, system_prompt, show_thinking
-                ):
-                    # Convert hist back to Gradio format
-                    gr_history = []
-                    for user_msg, assistant_msg in hist:
-                        gr_history.append({"role": "user", "content": user_msg})
-                        gr_history.append({"role": "assistant", "content": assistant_msg})
-                    yield msg, gr_history, thinking, status
-            else:
-                # Use non-streaming
-                internal_history = []
-                if history:
-                    for i in range(0, len(history), 2):
-                        if i + 1 < len(history):
-                            internal_history.append((history[i]["content"], history[i+1]["content"]))
-                msg, hist, content, thinking, status = generate_response(
-                    message, internal_history, thinking_mode, max_tokens,
-                    temperature, top_p, system_prompt, show_thinking
-                )
-                gr_history = []
-                for user_msg, assistant_msg in hist:
-                    gr_history.append({"role": "user", "content": user_msg})
-                    gr_history.append({"role": "assistant", "content": assistant_msg})
-                yield msg, gr_history, thinking, status
-        # Wire up send button
         send_btn.click(
             fn=process_message,
-            inputs=[
-                message_input, chatbot, thinking_mode, show_thinking,
-                system_prompt, max_tokens, temperature, top_p, stream_output
-            ],
-            outputs=[message_input, chatbot, thinking_display, status_display],
-            show_progress="hidden"
         )
-        # Wire up Enter key
         message_input.submit(
             fn=process_message,
-            inputs=[
-                message_input, chatbot, thinking_mode, show_thinking,
-                system_prompt, max_tokens, temperature, top_p, stream_output
-            ],
-            outputs=[message_input, chatbot, thinking_display, status_display],
-            show_progress="hidden"
         )
-        # Clear chat
-        def clear_chat():
-            return (
-                [],
-                "*The model's reasoning will appear here...*",
-                "✅ Chat cleared. Ready for new conversation."
-            )
         clear_btn.click(
-            fn=clear_chat,
             outputs=[chatbot, thinking_display, status_display]
         )
-        # Retry last message
-        def retry_last(history):
-            if not history or len(history) < 2:
-                return history, ""
-            # Remove last assistant message, keep last user message
-            last_user_msg = history[-2]["content"] if len(history) >= 2 else ""
-            new_history = history[:-2] if len(history) >= 2 else []
-            return new_history, last_user_msg
-        retry_btn.click(
-            fn=retry_last,
-            inputs=[chatbot],
-            outputs=[chatbot, message_input]
-        )
     return demo
 # ==================== Main ====================
 if __name__ == "__main__":
-    # Try to load .env file
     try:
         from dotenv import load_dotenv
         load_dotenv()
     except ImportError:
         pass
-    # Check environment
     api_key = os.environ.get('DEEPSEEK_API_KEY')
     if not api_key:
-        print("\n" + "=" * 60)
         print("⚠️  DEEPSEEK_API_KEY not found!")
-        print("=" * 60)
-        print("\nTo get started:")
-        print("1. Get your API key: https://platform.deepseek.com/api_keys")
-        print("2. Set the environment variable:")
-        print("   export DEEPSEEK_API_KEY='your-key-here'")
-        print("\nOr create a .env file:")
-        print('   echo DEEPSEEK_API_KEY=your-key-here > .env')
-        print("\n" + "=" * 60 + "\n")
-    # Create demo
     demo = create_demo()
-    # Custom CSS
-    custom_css = """
-    :root {
-        --primary: #667eea;
-        --secondary: #764ba2;
-    }
-    .deepseek-header {
-        text-align: center;
-        margin-bottom: 20px;
-        padding: 30px;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        border-radius: 16px;
-        color: white;
-    }
-    .deepseek-header h1 {
-        font-size: 2.8em;
-        font-weight: 800;
-        margin: 0;
-        text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
-    }
-    .deepseek-header p {
-        font-size: 1.2em;
-        opacity: 0.95;
-        margin: 10px 0 0 0;
-    }
-    .model-info {
-        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
-        padding: 20px;
-        border-radius: 12px;
-        margin-bottom: 20px;
-        border: 1px solid #e0e0e0;
-    }
-    .benchmark-grid {
-        display: grid;
-        grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
-        gap: 12px;
-        margin: 15px 0;
-    }
-    .benchmark-item {
-        background: white;
-        padding: 12px;
-        border-radius: 8px;
-        text-align: center;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-        transition: transform 0.2s;
-    }
-    .benchmark-item:hover {
-        transform: translateY(-2px);
-        box-shadow: 0 4px 8px rgba(0,0,0,0.15);
-    }
-    .benchmark-item .value {
-        font-size: 1.5em;
-        font-weight: 700;
-        color: #667eea;
-    }
-    .benchmark-item .label {
-        font-size: 0.85em;
-        color: #666;
-        margin-top: 4px;
-    }
-    .thinking-box {
-        background: #f8f9fa;
-        border-left: 4px solid #667eea;
-        padding: 15px;
-        margin: 10px 0;
-        border-radius: 8px;
-        font-style: italic;
-        color: #555;
-    }
-    .status-bar {
-        padding: 10px;
-        background: #f5f5f5;
-        border-radius: 8px;
-        font-family: monospace;
-        font-size: 0.9em;
-    }
-    """
-    # Launch with Gradio 6.0 compatible parameters
-    demo.queue(
-        max_size=50,
-        default_concurrency_limit=10
-    ).launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        debug=False,
-        show_error=True,
-        theme=gr.themes.Soft(),
-        css=custom_css
-    )

 from openai import OpenAI
 import os
 import time
+from typing import List, Tuple
 # ==================== Configuration ====================
 DEFAULT_SYSTEM_PROMPT = "You are DeepSeek-V4, an advanced AI assistant with strong reasoning capabilities. Provide accurate, helpful, and well-reasoned responses."
             "Please set your API key:\n"
             "1. Get your key from: https://platform.deepseek.com/api_keys\n"
             "2. Set environment variable:\n"
+            "   export DEEPSEEK_API_KEY='your-api-key-here'"
         )
     return OpenAI(
     top_p: float = 1.0,
     system_prompt: str = DEFAULT_SYSTEM_PROMPT,
     show_thinking: bool = True
 ):
+    """Generate response using DeepSeek API"""
     if not message.strip():
+        yield "", history, "", "Please enter a message."
         return
     client = get_client()
     # Build messages array
     messages = [{"role": "system", "content": system_prompt}]
     for user_msg, assistant_msg in history:
         messages.append({"role": "user", "content": user_msg})
         if assistant_msg:
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     reasoning_effort = REASONING_EFFORT_MAP.get(thinking_mode, "high")
     try:
         start_time = time.time()
+        # Streaming call
         stream = client.chat.completions.create(
             model="deepseek-v4-pro",
             messages=messages,
             temperature=temperature,
             top_p=top_p,
             reasoning_effort=reasoning_effort,
+            extra_body={"thinking": {"type": thinking_type}}
         )
         content_chunks = []
             if chunk.choices[0].delta.content:
                 content_chunks.append(chunk.choices[0].delta.content)
             if hasattr(chunk.choices[0].delta, 'reasoning_content'):
                 if chunk.choices[0].delta.reasoning_content:
                     thinking_chunks.append(chunk.choices[0].delta.reasoning_content)
             full_response = current_content
             if show_thinking and current_thinking:
+                full_response = f"[Thinking]\n{current_thinking}\n\n[Response]\n{current_content}"
             elapsed = time.time() - start_time
+            status = f"🔄 Streaming... ({elapsed:.1f}s)"
+            yield "", history + [(message, full_response)], current_thinking, status
+        # Final
         end_time = time.time()
         final_content = ''.join(content_chunks)
         final_thinking = ''.join(thinking_chunks)
         full_response = final_content
         if show_thinking and final_thinking:
+            full_response = f"[Thinking]\n{final_thinking}\n\n[Response]\n{final_content}"
+        status = f"✅ Done in {end_time - start_time:.2f}s"
+        yield "", history + [(message, full_response)], final_thinking, status
     except Exception as e:
         error_msg = f"❌ Error: {str(e)}"
+        yield "", history + [(message, error_msg)], "", error_msg
 # ==================== Gradio Interface ====================
 def create_demo():
     """Create the Gradio interface"""
+    with gr.Blocks(title="DeepSeek-V4 Pro Demo") as demo:
         # Header
+        gr.Markdown("""
+        # 🚀 DeepSeek-V4 Pro
+        **Towards Highly Efficient Million-Token Context Intelligence**
+        Powered by DeepSeek API • 1.6T Parameters • 49B Activated • 1M Context
         """)
         with gr.Row():
+            # Left sidebar
+            with gr.Column(scale=1, min_width=300):
+                gr.Markdown("### ⚙️ Settings")
+                thinking_mode = gr.Radio(
+                    choices=["Non-think", "Think High", "Think Max"],
+                    value="Think High",
+                    label="🧠 Reasoning Mode"
+                )
+                show_thinking = gr.Checkbox(
+                    value=True,
+                    label="📝 Show Thinking Process"
+                )
+                system_prompt = gr.Textbox(
+                    label="📋 System Prompt",
+                    value=DEFAULT_SYSTEM_PROMPT,
+                    lines=3
+                )
+                with gr.Accordion("🔧 Advanced", open=False):
+                    max_tokens = gr.Slider(64, 32768, value=4096, step=64, label="Max Tokens")
+                    temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="Temperature")
+                    top_p = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Top P")
+                gr.Markdown("### 💡 Examples")
                 gr.Examples(
+                    examples=[
+                        "Explain quantum computing simply",
+                        "Write a Python Fibonacci function",
+                        "What's new in DeepSeek-V4?",
+                    ],
+                    inputs=gr.Textbox(label="Try an example", visible=False),
                 )
+            # Right - Chat
             with gr.Column(scale=2):
                 chatbot = gr.Chatbot(
                     label="💬 Chat with DeepSeek-V4 Pro",
+                    height=500
                 )
                 with gr.Accordion("🧠 Thinking Process", open=True):
+                    thinking_display = gr.Markdown("*Reasoning will appear here...*")
                 with gr.Row():
                     message_input = gr.Textbox(
+                        label="Message",
+                        placeholder="Type your message...",
                         lines=2,
+                        scale=9
                     )
+                    send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
                 with gr.Row():
+                    clear_btn = gr.Button("🗑️ Clear", size="sm")
                 status_display = gr.Textbox(
                     label="Status",
+                    value="✅ Ready",
+                    interactive=False
                 )
         # Footer
+        gr.Markdown("""
+        ---
+        [📦 Model Card](https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro) |
+        [🔑 Get API Key](https://platform.deepseek.com/api_keys) |
+        [📚 Docs](https://platform.deepseek.com/docs)
         """)
         # ==================== Event Handlers ====================
         def process_message(
+            message, history, thinking_mode, show_thinking,
+            system_prompt, max_tokens, temperature, top_p
         ):
             if not message.strip():
                 yield message, history, "", "Please enter a message."
                 return
             if not os.environ.get('DEEPSEEK_API_KEY'):
+                error = "⚠️ Please set DEEPSEEK_API_KEY environment variable"
+                history = history or []
+                history.append((message, error))
+                yield "", history, "", "❌ API Key missing"
                 return
+            history = history or []
+            for msg, hist, thinking, status in generate_response(
+                message, history, thinking_mode, max_tokens,
+                temperature, top_p, system_prompt, show_thinking
+            ):
+                yield msg, hist, thinking, status
+        # Events
         send_btn.click(
             fn=process_message,
+            inputs=[message_input, chatbot, thinking_mode, show_thinking,
+                    system_prompt, max_tokens, temperature, top_p],
+            outputs=[message_input, chatbot, thinking_display, status_display]
         )
         message_input.submit(
             fn=process_message,
+            inputs=[message_input, chatbot, thinking_mode, show_thinking,
+                    system_prompt, max_tokens, temperature, top_p],
+            outputs=[message_input, chatbot, thinking_display, status_display]
         )
         clear_btn.click(
+            fn=lambda: ([], "*Reasoning will appear here...*", "✅ Cleared"),
             outputs=[chatbot, thinking_display, status_display]
         )
     return demo
 # ==================== Main ====================
 if __name__ == "__main__":
     try:
         from dotenv import load_dotenv
         load_dotenv()
     except ImportError:
         pass
     api_key = os.environ.get('DEEPSEEK_API_KEY')
     if not api_key:
+        print("\n" + "=" * 50)
         print("⚠️  DEEPSEEK_API_KEY not found!")
+        print("=" * 50)
+        print("Get one: https://platform.deepseek.com/api_keys")
+        print("Set it:  export DEEPSEEK_API_KEY='your-key'\n")
     demo = create_demo()
+    # Try launch without theme/css first (most compatible)
+    try:
+        demo.queue(max_size=50).launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False
+        )
+    except TypeError:
+        # Fallback: simplest launch
+        demo.queue().launch()