Spaces:

wangsheng
/

DeepSeekV4Chat

Running

App Files Files Community

wangsheng commited on about 1 month ago

Commit

a151dea

verified ·

1 Parent(s): 058be44

Create app.py

Browse files

Files changed (1) hide show

app.py +423 -0

app.py ADDED Viewed

	@@ -0,0 +1,423 @@

+## 2. Hugging Face Gradio Demo (app.py)
+```python
+# app.py
+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import os
+from pathlib import Path
+import json
+import time
+# Configuration
+MODEL_NAME = "deepseek-ai/DeepSeek-V4-Pro"
+MODEL_CACHE_DIR = "./model_cache"
+MAX_CONTEXT_LENGTH = 1000000  # 1M tokens
+DEFAULT_MAX_TOKENS = 2048
+DEFAULT_TEMPERATURE = 0.7
+# Model and tokenizer will be loaded lazily
+model = None
+tokenizer = None
+def load_model():
+    """Load model and tokenizer (lazy loading)"""
+    global model, tokenizer
+    if model is None:
+        print("Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_NAME,
+            cache_dir=MODEL_CACHE_DIR,
+            trust_remote_code=True
+        )
+        print("Loading model... This may take several minutes...")
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            cache_dir=MODEL_CACHE_DIR,
+            device_map="auto",
+            torch_dtype=torch.bfloat16,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
+        )
+        print("Model loaded successfully!")
+    return model, tokenizer
+def generate_response(
+    message,
+    history,
+    thinking_mode="Think High",
+    max_tokens=DEFAULT_MAX_TOKENS,
+    temperature=DEFAULT_TEMPERATURE,
+    top_p=1.0,
+    top_k=50,
+    system_prompt=""
+):
+    """Generate response from the model"""
+    # Load model if not loaded
+    model, tokenizer = load_model()
+    # Build conversation history
+    messages = []
+    # Add system prompt if provided
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    # Add chat history
+    for h in history:
+        messages.append({"role": "user", "content": h[0]})
+        if h[1]:
+            messages.append({"role": "assistant", "content": h[1]})
+    # Add current message
+    messages.append({"role": "user", "content": message})
+    # Map thinking mode to model format
+    thinking_mode_map = {
+        "Non-think": "non_thinking",
+        "Think High": "thinking",
+        "Think Max": "thinking_max"
+    }
+    try:
+        # Try to use the custom encoding if available
+        try:
+            from encoding_dsv4 import encode_messages
+            prompt = encode_messages(
+                messages,
+                thinking_mode=thinking_mode_map[thinking_mode]
+            )
+        except ImportError:
+            # Fallback: simple concatenation
+            prompt = ""
+            for msg in messages:
+                if msg["role"] == "system":
+                    prompt += f"System: {msg['content']}\n\n"
+                elif msg["role"] == "user":
+                    prompt += f"User: {msg['content']}\n\n"
+                elif msg["role"] == "assistant":
+                    prompt += f"Assistant: {msg['content']}\n\n"
+            prompt += "Assistant: "
+        # Tokenize input
+        inputs = tokenizer(prompt, return_tensors="pt")
+        # Move to appropriate device
+        if torch.cuda.is_available():
+            inputs = {k: v.cuda() for k, v in inputs.items()}
+        # Check context length
+        input_length = inputs['input_ids'].shape[1]
+        if input_length > MAX_CONTEXT_LENGTH:
+            raise gr.Error(f"Input too long: {input_length} tokens. Maximum: {MAX_CONTEXT_LENGTH}")
+        # Generate with streaming
+        start_time = time.time()
+        generation_config = {
+            "max_new_tokens": max_tokens,
+            "temperature": temperature,
+            "top_p": top_p,
+            "top_k": top_k,
+            "do_sample": True if temperature > 0 else False,
+            "pad_token_id": tokenizer.pad_token_id,
+            "eos_token_id": tokenizer.eos_token_id,
+        }
+        # For Think Max mode, adjust parameters
+        if thinking_mode == "Think Max":
+            generation_config["max_new_tokens"] = min(max_tokens * 2, 32768)
+        # Generate response
+        outputs = model.generate(**inputs, **generation_config)
+        # Decode response
+        full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = full_output[len(prompt):]
+        end_time = time.time()
+        generation_time = end_time - start_time
+        # Add generation info
+        response += f"\n\n---\n⚡ Generated in {generation_time:.2f}s | 📊 {len(outputs[0]) - input_length} tokens | 🌡️ Temperature: {temperature}"
+        return response
+    except Exception as e:
+        raise gr.Error(f"Generation failed: {str(e)}")
+def clear_chat():
+    """Clear chat history"""
+    return None, None
+# Create the Gradio interface
+with gr.Blocks(
+    title="DeepSeek-V4 Demo",
+    theme=gr.themes.Soft(),
+    css="""
+        .deepseek-header {
+            text-align: center;
+            margin-bottom: 20px;
+        }
+        .deepseek-header h1 {
+            background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            font-size: 2.5em;
+        }
+        .model-info {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 20px;
+            border-radius: 10px;
+            margin-bottom: 20px;
+        }
+        .benchmark-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+            gap: 10px;
+            margin: 10px 0;
+        }
+        .benchmark-item {
+            background: rgba(255,255,255,0.1);
+            padding: 10px;
+            border-radius: 5px;
+            text-align: center;
+        }
+    """
+) as demo:
+    gr.HTML("""
+        <div class="deepseek-header">
+            <h1>🚀 DeepSeek-V4</h1>
+            <p>Towards Highly Efficient Million-Token Context Intelligence</p>
+        </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Model info panel
+            gr.HTML("""
+                <div class="model-info">
+                    <h3>📊 Model Specifications</h3>
+                    <div class="benchmark-grid">
+                        <div class="benchmark-item">
+                            <b>1.6T</b><br>Total Parameters
+                        </div>
+                        <div class="benchmark-item">
+                            <b>49B</b><br>Activated Parameters
+                        </div>
+                        <div class="benchmark-item">
+                            <b>1M</b><br>Context Length
+                        </div>
+                        <div class="benchmark-item">
+                            <b>32T+</b><br>Training Tokens
+                        </div>
+                    </div>
+                    <h3>🎯 Key Benchmarks</h3>
+                    <div class="benchmark-grid">
+                        <div class="benchmark-item">
+                            <b>93.5</b><br>LiveCodeBench
+                        </div>
+                        <div class="benchmark-item">
+                            <b>3206</b><br>Codeforces Rating
+                        </div>
+                        <div class="benchmark-item">
+                            <b>87.5</b><br>MMLU-Pro
+                        </div>
+                        <div class="benchmark-item">
+                            <b>80.6%</b><br>SWE Verified
+                        </div>
+                    </div>
+                    <h3>💡 Innovation Highlights</h3>
+                    <ul>
+                        <li>Hybrid Attention (CSA + HCA)</li>
+                        <li>Manifold-Constrained Hyper-Connections</li>
+                        <li>Muon Optimizer</li>
+                        <li>Two-stage Post-training</li>
+                        <li>FP4 + FP8 Mixed Precision</li>
+                    </ul>
+                </div>
+            """)
+            # Configuration panel
+            with gr.Group():
+                gr.Markdown("### ⚙️ Configuration")
+                thinking_mode = gr.Radio(
+                    choices=["Non-think", "Think High", "Think Max"],
+                    value="Think High",
+                    label="Reasoning Mode",
+                    info="Non-think: Fast responses | Think High: Careful analysis | Think Max: Maximum reasoning"
+                )
+                system_prompt = gr.Textbox(
+                    label="System Prompt",
+                    placeholder="Enter system instructions...",
+                    lines=3,
+                    value="You are DeepSeek-V4, an advanced AI assistant with strong reasoning capabilities. Provide accurate and helpful responses."
+                )
+                with gr.Accordion("Advanced Parameters", open=False):
+                    max_tokens = gr.Slider(
+                        minimum=64,
+                        maximum=32768,
+                        value=2048,
+                        step=64,
+                        label="Max Tokens",
+                        info="Maximum number of tokens to generate"
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.0,
+                        maximum=2.0,
+                        value=0.7,
+                        step=0.1,
+                        label="Temperature",
+                        info="Higher values = more creative, lower = more focused"
+                    )
+                    top_p = gr.Slider(
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=1.0,
+                        step=0.05,
+                        label="Top P"
+                    )
+                    top_k = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        value=50,
+                        step=1,
+                        label="Top K"
+                    )
+            # Quick examples
+            gr.Markdown("### 💬 Example Prompts")
+            examples = gr.Examples(
+                examples=[
+                    ["Explain quantum entanglement like I'm 5 years old"],
+                    ["Write a Python function to find prime numbers using the Sieve of Eratosthenes"],
+                    ["What are the key differences between DeepSeek-V4 and previous versions?"],
+                    ["Solve this math problem: Find the derivative of f(x) = x³sin(x)"],
+                    ["Design a REST API for a todo application"],
+                ],
+                inputs=[message] if 'message' in locals() else None,
+            )
+        with gr.Column(scale=2):
+            # Chat interface
+            chatbot = gr.Chatbot(
+                label="Chat with DeepSeek-V4",
+                height=600,
+                show_copy_button=True,
+                avatar_images=(
+                    "https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-sm.svg",
+                    "https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-sm.svg"
+                )
+            )
+            with gr.Row():
+                message = gr.Textbox(
+                    label="Your Message",
+                    placeholder="Type your message here... (Shift+Enter for new line, Enter to send)",
+                    lines=3,
+                    scale=9
+                )
+                send_btn = gr.Button("Send", variant="primary", scale=1)
+            with gr.Row():
+                clear_btn = gr.Button("Clear Chat", size="sm")
+                stop_btn = gr.Button("Stop Generation", size="sm", variant="stop")
+            # Status indicator
+            status = gr.Textbox(
+                label="Status",
+                value="Ready to chat! Select your configuration and start a conversation.",
+                interactive=False
+            )
+    # Event handlers
+    def respond(message, history, thinking_mode, system_prompt, max_tokens, temperature, top_p, top_k):
+        """Main response handler"""
+        if not message.strip():
+            return "", history, "Please enter a message."
+        history = history or []
+        history.append([message, None])
+        yield "", history, "Generating..."
+        try:
+            response = generate_response(
+                message,
+                history[:-1],
+                thinking_mode,
+                max_tokens,
+                temperature,
+                top_p,
+                top_k,
+                system_prompt
+            )
+            history[-1][1] = response
+            yield "", history, "Ready"
+        except Exception as e:
+            history[-1][1] = f"Error: {str(e)}"
+            yield "", history, f"Error: {str(e)}"
+    # Wire up events
+    submit_event = message.submit(
+        respond,
+        inputs=[message, chatbot, thinking_mode, system_prompt, max_tokens, temperature, top_p, top_k],
+        outputs=[message, chatbot, status]
+    )
+    send_btn.click(
+        respond,
+        inputs=[message, chatbot, thinking_mode, system_prompt, max_tokens, temperature, top_p, top_k],
+        outputs=[message, chatbot, status]
+    )
+    clear_btn.click(
+        lambda: ([], "Chat cleared. Ready for new conversation."),
+        outputs=[chatbot, status]
+    )
+    # Stop generation
+    stop_btn.click(
+        lambda: "Generation stopped by user.",
+        outputs=[status]
+    )
+    # Footer
+    gr.HTML("""
+        <div style="text-align: center; margin-top: 20px; padding: 20px; color: #666;">
+            <p>
+                <a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro" target="_blank">📦 Model Card</a> |
+                <a href="https://github.com/deepseek-ai/DeepSeek-V4" target="_blank">📖 Documentation</a> |
+                <a href="https://deepseek.ai" target="_blank">🌐 Homepage</a>
+            </p>
+            <p>⚠️ This is a preview version. Results may vary. For production use, please deploy with proper infrastructure.</p>
+            <p>License: MIT | DeepSeek-AI © 2026</p>
+        </div>
+    """)
+if __name__ == "__main__":
+    # Launch the demo
+    demo.queue(max_size=20).launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,  # Set to True for temporary public link
+        debug=False,
+        show_error=True
+    )