wangsheng commited on
Commit
2867a45
·
verified ·
1 Parent(s): 67589cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +658 -359
app.py CHANGED
@@ -1,423 +1,722 @@
1
-
2
- ## 2. Hugging Face Gradio Demo (app.py)
3
-
4
- ```python
5
  # app.py
6
  import gradio as gr
7
- import torch
8
- from transformers import AutoTokenizer, AutoModelForCausalLM
9
  import os
10
- from pathlib import Path
11
- import json
12
  import time
 
13
 
14
- # Configuration
15
- MODEL_NAME = "deepseek-ai/DeepSeek-V4-Pro"
16
- MODEL_CACHE_DIR = "./model_cache"
17
- MAX_CONTEXT_LENGTH = 1000000 # 1M tokens
18
- DEFAULT_MAX_TOKENS = 2048
19
- DEFAULT_TEMPERATURE = 0.7
20
 
21
- # Model and tokenizer will be loaded lazily
22
- model = None
23
- tokenizer = None
 
 
 
24
 
25
- def load_model():
26
- """Load model and tokenizer (lazy loading)"""
27
- global model, tokenizer
28
-
29
- if model is None:
30
- print("Loading tokenizer...")
31
- tokenizer = AutoTokenizer.from_pretrained(
32
- MODEL_NAME,
33
- cache_dir=MODEL_CACHE_DIR,
34
- trust_remote_code=True
35
- )
36
-
37
- print("Loading model... This may take several minutes...")
38
- model = AutoModelForCausalLM.from_pretrained(
39
- MODEL_NAME,
40
- cache_dir=MODEL_CACHE_DIR,
41
- device_map="auto",
42
- torch_dtype=torch.bfloat16,
43
- trust_remote_code=True,
44
- low_cpu_mem_usage=True
45
  )
46
- print("Model loaded successfully!")
47
 
48
- return model, tokenizer
 
 
 
49
 
 
50
  def generate_response(
51
- message,
52
- history,
53
- thinking_mode="Think High",
54
- max_tokens=DEFAULT_MAX_TOKENS,
55
- temperature=DEFAULT_TEMPERATURE,
56
- top_p=1.0,
57
- top_k=50,
58
- system_prompt=""
59
- ):
60
- """Generate response from the model"""
 
61
 
62
- # Load model if not loaded
63
- model, tokenizer = load_model()
 
 
 
64
 
65
- # Build conversation history
66
- messages = []
67
 
68
- # Add system prompt if provided
69
- if system_prompt:
70
- messages.append({"role": "system", "content": system_prompt})
71
 
72
- # Add chat history
73
- for h in history:
74
- messages.append({"role": "user", "content": h[0]})
75
- if h[1]:
76
- messages.append({"role": "assistant", "content": h[1]})
77
 
78
  # Add current message
79
  messages.append({"role": "user", "content": message})
80
 
81
- # Map thinking mode to model format
82
- thinking_mode_map = {
83
- "Non-think": "non_thinking",
84
- "Think High": "thinking",
85
- "Think Max": "thinking_max"
86
- }
87
 
88
  try:
89
- # Try to use the custom encoding if available
90
- try:
91
- from encoding_dsv4 import encode_messages
92
- prompt = encode_messages(
93
- messages,
94
- thinking_mode=thinking_mode_map[thinking_mode]
95
- )
96
- except ImportError:
97
- # Fallback: simple concatenation
98
- prompt = ""
99
- for msg in messages:
100
- if msg["role"] == "system":
101
- prompt += f"System: {msg['content']}\n\n"
102
- elif msg["role"] == "user":
103
- prompt += f"User: {msg['content']}\n\n"
104
- elif msg["role"] == "assistant":
105
- prompt += f"Assistant: {msg['content']}\n\n"
106
- prompt += "Assistant: "
107
 
108
- # Tokenize input
109
- inputs = tokenizer(prompt, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- # Move to appropriate device
112
- if torch.cuda.is_available():
113
- inputs = {k: v.cuda() for k, v in inputs.items()}
114
 
115
- # Check context length
116
- input_length = inputs['input_ids'].shape[1]
117
- if input_length > MAX_CONTEXT_LENGTH:
118
- raise gr.Error(f"Input too long: {input_length} tokens. Maximum: {MAX_CONTEXT_LENGTH}")
119
 
120
- # Generate with streaming
121
- start_time = time.time()
122
 
123
- generation_config = {
124
- "max_new_tokens": max_tokens,
125
- "temperature": temperature,
126
- "top_p": top_p,
127
- "top_k": top_k,
128
- "do_sample": True if temperature > 0 else False,
129
- "pad_token_id": tokenizer.pad_token_id,
130
- "eos_token_id": tokenizer.eos_token_id,
131
- }
132
 
133
- # For Think Max mode, adjust parameters
134
- if thinking_mode == "Think Max":
135
- generation_config["max_new_tokens"] = min(max_tokens * 2, 32768)
 
136
 
137
- # Generate response
138
- outputs = model.generate(**inputs, **generation_config)
 
 
 
 
139
 
140
- # Decode response
141
- full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
142
- response = full_output[len(prompt):]
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  end_time = time.time()
145
- generation_time = end_time - start_time
 
146
 
147
- # Add generation info
148
- response += f"\n\n---\n⚡ Generated in {generation_time:.2f}s | 📊 {len(outputs[0]) - input_length} tokens | 🌡️ Temperature: {temperature}"
 
149
 
150
- return response
 
151
 
152
  except Exception as e:
153
- raise gr.Error(f"Generation failed: {str(e)}")
154
-
155
- def clear_chat():
156
- """Clear chat history"""
157
- return None, None
158
 
159
- # Create the Gradio interface
160
- with gr.Blocks(
161
- title="DeepSeek-V4 Demo",
162
- theme=gr.themes.Soft(),
163
- css="""
164
- .deepseek-header {
165
- text-align: center;
166
- margin-bottom: 20px;
167
- }
168
- .deepseek-header h1 {
169
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
170
- -webkit-background-clip: text;
171
- -webkit-text-fill-color: transparent;
172
- font-size: 2.5em;
173
- }
174
- .model-info {
175
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
176
- color: white;
177
- padding: 20px;
178
- border-radius: 10px;
179
- margin-bottom: 20px;
180
- }
181
- .benchmark-grid {
182
- display: grid;
183
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
184
- gap: 10px;
185
- margin: 10px 0;
186
- }
187
- .benchmark-item {
188
- background: rgba(255,255,255,0.1);
189
- padding: 10px;
190
- border-radius: 5px;
191
- text-align: center;
192
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  """
194
- ) as demo:
195
- gr.HTML("""
196
- <div class="deepseek-header">
197
- <h1>🚀 DeepSeek-V4</h1>
198
- <p>Towards Highly Efficient Million-Token Context Intelligence</p>
199
- </div>
200
- """)
201
-
202
- with gr.Row():
203
- with gr.Column(scale=1):
204
- # Model info panel
205
- gr.HTML("""
206
- <div class="model-info">
207
- <h3>📊 Model Specifications</h3>
208
- <div class="benchmark-grid">
209
- <div class="benchmark-item">
210
- <b>1.6T</b><br>Total Parameters
211
- </div>
212
- <div class="benchmark-item">
213
- <b>49B</b><br>Activated Parameters
214
- </div>
215
- <div class="benchmark-item">
216
- <b>1M</b><br>Context Length
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  </div>
218
- <div class="benchmark-item">
219
- <b>32T+</b><br>Training Tokens
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  </div>
 
 
 
 
 
 
 
 
 
221
  </div>
 
 
 
 
 
222
 
223
- <h3>🎯 Key Benchmarks</h3>
224
- <div class="benchmark-grid">
225
- <div class="benchmark-item">
226
- <b>93.5</b><br>LiveCodeBench
227
- </div>
228
- <div class="benchmark-item">
229
- <b>3206</b><br>Codeforces Rating
230
- </div>
231
- <div class="benchmark-item">
232
- <b>87.5</b><br>MMLU-Pro
233
- </div>
234
- <div class="benchmark-item">
235
- <b>80.6%</b><br>SWE Verified
236
- </div>
237
- </div>
238
 
239
- <h3>💡 Innovation Highlights</h3>
240
- <ul>
241
- <li>Hybrid Attention (CSA + HCA)</li>
242
- <li>Manifold-Constrained Hyper-Connections</li>
243
- <li>Muon Optimizer</li>
244
- <li>Two-stage Post-training</li>
245
- <li>FP4 + FP8 Mixed Precision</li>
246
- </ul>
247
- </div>
248
- """)
249
-
250
- # Configuration panel
251
- with gr.Group():
252
- gr.Markdown("### ⚙️ Configuration")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
- thinking_mode = gr.Radio(
255
- choices=["Non-think", "Think High", "Think Max"],
256
- value="Think High",
257
- label="Reasoning Mode",
258
- info="Non-think: Fast responses | Think High: Careful analysis | Think Max: Maximum reasoning"
 
 
 
 
 
 
 
259
  )
260
-
261
- system_prompt = gr.Textbox(
262
- label="System Prompt",
263
- placeholder="Enter system instructions...",
264
- lines=3,
265
- value="You are DeepSeek-V4, an advanced AI assistant with strong reasoning capabilities. Provide accurate and helpful responses."
 
 
 
 
 
 
 
 
266
  )
267
 
268
- with gr.Accordion("Advanced Parameters", open=False):
269
- max_tokens = gr.Slider(
270
- minimum=64,
271
- maximum=32768,
272
- value=2048,
273
- step=64,
274
- label="Max Tokens",
275
- info="Maximum number of tokens to generate"
276
- )
277
-
278
- temperature = gr.Slider(
279
- minimum=0.0,
280
- maximum=2.0,
281
- value=0.7,
282
- step=0.1,
283
- label="Temperature",
284
- info="Higher values = more creative, lower = more focused"
285
  )
286
-
287
- top_p = gr.Slider(
288
- minimum=0.0,
289
- maximum=1.0,
290
- value=1.0,
291
- step=0.05,
292
- label="Top P"
 
 
 
293
  )
294
-
295
- top_k = gr.Slider(
296
- minimum=1,
297
- maximum=100,
298
- value=50,
299
- step=1,
300
- label="Top K"
301
  )
302
-
303
- # Quick examples
304
- gr.Markdown("### 💬 Example Prompts")
305
- examples = gr.Examples(
306
- examples=[
307
- ["Explain quantum entanglement like I'm 5 years old"],
308
- ["Write a Python function to find prime numbers using the Sieve of Eratosthenes"],
309
- ["What are the key differences between DeepSeek-V4 and previous versions?"],
310
- ["Solve this math problem: Find the derivative of f(x) = x³sin(x)"],
311
- ["Design a REST API for a todo application"],
312
- ],
313
- inputs=[message] if 'message' in locals() else None,
314
- )
315
-
316
- with gr.Column(scale=2):
317
- # Chat interface
318
- chatbot = gr.Chatbot(
319
- label="Chat with DeepSeek-V4",
320
- height=600,
321
- show_copy_button=True,
322
- avatar_images=(
323
- "https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-sm.svg",
324
- "https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-sm.svg"
325
  )
326
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
- with gr.Row():
329
- message = gr.Textbox(
330
- label="Your Message",
331
- placeholder="Type your message here... (Shift+Enter for new line, Enter to send)",
332
- lines=3,
333
- scale=9
 
334
  )
335
- send_btn = gr.Button("Send", variant="primary", scale=1)
336
-
337
- with gr.Row():
338
- clear_btn = gr.Button("Clear Chat", size="sm")
339
- stop_btn = gr.Button("Stop Generation", size="sm", variant="stop")
340
 
341
- # Status indicator
342
- status = gr.Textbox(
343
- label="Status",
344
- value="Ready to chat! Select your configuration and start a conversation.",
345
- interactive=False
346
- )
347
-
348
- # Event handlers
349
- def respond(message, history, thinking_mode, system_prompt, max_tokens, temperature, top_p, top_k):
350
- """Main response handler"""
351
- if not message.strip():
352
- return "", history, "Please enter a message."
 
 
 
353
 
354
- history = history or []
355
- history.append([message, None])
 
 
 
 
 
 
 
 
356
 
357
- yield "", history, "Generating..."
 
 
 
 
 
 
 
 
 
358
 
359
- try:
360
- response = generate_response(
361
- message,
362
- history[:-1],
363
- thinking_mode,
364
- max_tokens,
365
- temperature,
366
- top_p,
367
- top_k,
368
- system_prompt
369
  )
370
-
371
- history[-1][1] = response
372
- yield "", history, "Ready"
373
-
374
- except Exception as e:
375
- history[-1][1] = f"Error: {str(e)}"
376
- yield "", history, f"Error: {str(e)}"
377
-
378
- # Wire up events
379
- submit_event = message.submit(
380
- respond,
381
- inputs=[message, chatbot, thinking_mode, system_prompt, max_tokens, temperature, top_p, top_k],
382
- outputs=[message, chatbot, status]
383
- )
384
-
385
- send_btn.click(
386
- respond,
387
- inputs=[message, chatbot, thinking_mode, system_prompt, max_tokens, temperature, top_p, top_k],
388
- outputs=[message, chatbot, status]
389
- )
390
-
391
- clear_btn.click(
392
- lambda: ([], "Chat cleared. Ready for new conversation."),
393
- outputs=[chatbot, status]
394
- )
395
-
396
- # Stop generation
397
- stop_btn.click(
398
- lambda: "Generation stopped by user.",
399
- outputs=[status]
400
- )
401
 
402
- # Footer
403
- gr.HTML("""
404
- <div style="text-align: center; margin-top: 20px; padding: 20px; color: #666;">
405
- <p>
406
- <a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro" target="_blank">📦 Model Card</a> |
407
- <a href="https://github.com/deepseek-ai/DeepSeek-V4" target="_blank">📖 Documentation</a> |
408
- <a href="https://deepseek.ai" target="_blank">🌐 Homepage</a>
409
- </p>
410
- <p>⚠️ This is a preview version. Results may vary. For production use, please deploy with proper infrastructure.</p>
411
- <p>License: MIT | DeepSeek-AI © 2026</p>
412
- </div>
413
- """)
414
 
 
415
  if __name__ == "__main__":
416
- # Launch the demo
417
- demo.queue(max_size=20).launch(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
  server_name="0.0.0.0",
419
  server_port=7860,
420
- share=False, # Set to True for temporary public link
421
  debug=False,
422
- show_error=True
 
423
  )
 
 
 
 
 
1
  # app.py
2
  import gradio as gr
3
+ from openai import OpenAI
 
4
  import os
 
 
5
  import time
6
+ from typing import List, Tuple, Optional
7
 
8
+ # ==================== Configuration ====================
9
+ DEFAULT_SYSTEM_PROMPT = "You are DeepSeek-V4, an advanced AI assistant with strong reasoning capabilities. Provide accurate, helpful, and well-reasoned responses."
 
 
 
 
10
 
11
+ # Reasoning effort mapping
12
+ REASONING_EFFORT_MAP = {
13
+ "Non-think": "minimal",
14
+ "Think High": "high",
15
+ "Think Max": "maximum"
16
+ }
17
 
18
+ # Thinking type mapping
19
+ THINKING_TYPE_MAP = {
20
+ "Non-think": "disabled",
21
+ "Think High": "enabled",
22
+ "Think Max": "enabled"
23
+ }
24
+
25
+ # ==================== API Client Setup ====================
26
+ def get_client():
27
+ """Initialize DeepSeek API client"""
28
+ api_key = os.environ.get('DEEPSEEK_API_KEY')
29
+ if not api_key:
30
+ raise ValueError(
31
+ "⚠️ DEEPSEEK_API_KEY not found!\n\n"
32
+ "Please set your API key:\n"
33
+ "1. Get your key from: https://platform.deepseek.com/api_keys\n"
34
+ "2. Set environment variable:\n"
35
+ " export DEEPSEEK_API_KEY='your-api-key-here'\n"
36
+ " or create a .env file with: DEEPSEEK_API_KEY=your-api-key-here"
 
37
  )
 
38
 
39
+ return OpenAI(
40
+ api_key=api_key,
41
+ base_url="https://api.deepseek.com"
42
+ )
43
 
44
+ # ==================== Response Generation ====================
45
  def generate_response(
46
+ message: str,
47
+ history: List[Tuple[str, str]],
48
+ thinking_mode: str = "Think High",
49
+ max_tokens: int = 4096,
50
+ temperature: float = 0.7,
51
+ top_p: float = 1.0,
52
+ system_prompt: str = DEFAULT_SYSTEM_PROMPT,
53
+ show_thinking: bool = True
54
+ ) -> Tuple[str, List[Tuple[str, str]], str, str]:
55
+ """
56
+ Generate response using DeepSeek API
57
 
58
+ Returns:
59
+ Tuple of (empty_message, updated_history, response_text, thinking_text, status)
60
+ """
61
+ if not message.strip():
62
+ return "", history, "", "", "Please enter a message."
63
 
64
+ client = get_client()
 
65
 
66
+ # Build messages array
67
+ messages = [{"role": "system", "content": system_prompt}]
 
68
 
69
+ # Add conversation history
70
+ for user_msg, assistant_msg in history:
71
+ messages.append({"role": "user", "content": user_msg})
72
+ if assistant_msg:
73
+ messages.append({"role": "assistant", "content": assistant_msg})
74
 
75
  # Add current message
76
  messages.append({"role": "user", "content": message})
77
 
78
+ # Prepare API parameters
79
+ reasoning_effort = REASONING_EFFORT_MAP.get(thinking_mode, "high")
80
+ thinking_type = THINKING_TYPE_MAP.get(thinking_mode, "enabled")
 
 
 
81
 
82
  try:
83
+ start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # Call DeepSeek API
86
+ response = client.chat.completions.create(
87
+ model="deepseek-v4-pro",
88
+ messages=messages,
89
+ stream=False,
90
+ max_tokens=max_tokens,
91
+ temperature=temperature,
92
+ top_p=top_p,
93
+ reasoning_effort=reasoning_effort,
94
+ extra_body={
95
+ "thinking": {"type": thinking_type}
96
+ }
97
+ )
98
 
99
+ end_time = time.time()
100
+ generation_time = end_time - start_time
 
101
 
102
+ # Extract response content
103
+ choice = response.choices[0]
104
+ message_obj = choice.message
 
105
 
106
+ # Get main content
107
+ content = message_obj.content or ""
108
 
109
+ # Get reasoning/thinking content if available
110
+ thinking_content = ""
111
+ if hasattr(message_obj, 'reasoning_content') and message_obj.reasoning_content:
112
+ thinking_content = message_obj.reasoning_content
 
 
 
 
 
113
 
114
+ # Update history
115
+ full_response = content
116
+ if show_thinking and thinking_content:
117
+ full_response = f"{thinking_content}\n\n---\n\n{content}"
118
 
119
+ # Add usage info if available
120
+ if hasattr(response, 'usage') and response.usage:
121
+ usage = response.usage
122
+ tokens_info = f"📊 Input: {usage.prompt_tokens} tokens | Output: {usage.completion_tokens} tokens | Total: {usage.total_tokens} tokens"
123
+ else:
124
+ tokens_info = ""
125
 
126
+ status = f"✅ Generated in {generation_time:.2f}s | 🎯 Mode: {thinking_mode} | {tokens_info}"
 
 
127
 
128
+ return "", history + [(message, full_response)], content, thinking_content, status
129
+
130
+ except Exception as e:
131
+ error_msg = f"❌ Error: {str(e)}"
132
+ return "", history + [(message, error_msg)], "", "", error_msg
133
+
134
+ def generate_response_stream(
135
+ message: str,
136
+ history: List[Tuple[str, str]],
137
+ thinking_mode: str = "Think High",
138
+ max_tokens: int = 4096,
139
+ temperature: float = 0.7,
140
+ top_p: float = 1.0,
141
+ system_prompt: str = DEFAULT_SYSTEM_PROMPT,
142
+ show_thinking: bool = True
143
+ ):
144
+ """
145
+ Stream response from DeepSeek API
146
+
147
+ Yields:
148
+ Tuple of (empty_message, updated_history, content_so_far, thinking_so_far, status)
149
+ """
150
+ if not message.strip():
151
+ yield "", history, "", "", "Please enter a message."
152
+ return
153
+
154
+ client = get_client()
155
+
156
+ # Build messages array
157
+ messages = [{"role": "system", "content": system_prompt}]
158
+ for user_msg, assistant_msg in history:
159
+ messages.append({"role": "user", "content": user_msg})
160
+ if assistant_msg:
161
+ messages.append({"role": "assistant", "content": assistant_msg})
162
+ messages.append({"role": "user", "content": message})
163
+
164
+ reasoning_effort = REASONING_EFFORT_MAP.get(thinking_mode, "high")
165
+ thinking_type = THINKING_TYPE_MAP.get(thinking_mode, "enabled")
166
+
167
+ try:
168
+ start_time = time.time()
169
+
170
+ # Stream response
171
+ stream = client.chat.completions.create(
172
+ model="deepseek-v4-pro",
173
+ messages=messages,
174
+ stream=True,
175
+ max_tokens=max_tokens,
176
+ temperature=temperature,
177
+ top_p=top_p,
178
+ reasoning_effort=reasoning_effort,
179
+ extra_body={
180
+ "thinking": {"type": thinking_type}
181
+ }
182
+ )
183
+
184
+ content_chunks = []
185
+ thinking_chunks = []
186
+
187
+ for chunk in stream:
188
+ if chunk.choices[0].delta.content:
189
+ content_chunks.append(chunk.choices[0].delta.content)
190
+
191
+ # Check for reasoning content in stream
192
+ if hasattr(chunk.choices[0].delta, 'reasoning_content'):
193
+ if chunk.choices[0].delta.reasoning_content:
194
+ thinking_chunks.append(chunk.choices[0].delta.reasoning_content)
195
+
196
+ current_content = ''.join(content_chunks)
197
+ current_thinking = ''.join(thinking_chunks)
198
+
199
+ full_response = current_content
200
+ if show_thinking and current_thinking:
201
+ full_response = f"🧠 Thinking:\n{current_thinking}\n\n💬 Response:\n{current_content}"
202
+
203
+ elapsed = time.time() - start_time
204
+ status = f"🔄 Streaming... ({elapsed:.1f}s) | Mode: {thinking_mode}"
205
+
206
+ yield "", history + [(message, full_response)], current_content, current_thinking, status
207
+
208
+ # Final yield with complete response
209
  end_time = time.time()
210
+ final_content = ''.join(content_chunks)
211
+ final_thinking = ''.join(thinking_chunks)
212
 
213
+ full_response = final_content
214
+ if show_thinking and final_thinking:
215
+ full_response = f"🧠 Thinking:\n{final_thinking}\n\n💬 Response:\n{final_content}"
216
 
217
+ status = f"✅ Done in {end_time - start_time:.2f}s | Mode: {thinking_mode}"
218
+ yield "", history + [(message, full_response)], final_content, final_thinking, status
219
 
220
  except Exception as e:
221
+ error_msg = f" Error: {str(e)}"
222
+ yield "", history + [(message, error_msg)], "", "", error_msg
 
 
 
223
 
224
+ # ==================== Gradio Interface ====================
225
+ def create_demo():
226
+ """Create the Gradio interface"""
227
+
228
+ # Custom CSS
229
+ custom_css = """
230
+ :root {
231
+ --primary: #667eea;
232
+ --secondary: #764ba2;
233
+ }
234
+
235
+ .deepseek-header {
236
+ text-align: center;
237
+ margin-bottom: 20px;
238
+ padding: 30px;
239
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
240
+ border-radius: 16px;
241
+ color: white;
242
+ }
243
+
244
+ .deepseek-header h1 {
245
+ font-size: 2.8em;
246
+ font-weight: 800;
247
+ margin: 0;
248
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
249
+ }
250
+
251
+ .deepseek-header p {
252
+ font-size: 1.2em;
253
+ opacity: 0.95;
254
+ margin: 10px 0 0 0;
255
+ }
256
+
257
+ .model-info {
258
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
259
+ padding: 20px;
260
+ border-radius: 12px;
261
+ margin-bottom: 20px;
262
+ border: 1px solid #e0e0e0;
263
+ }
264
+
265
+ .benchmark-grid {
266
+ display: grid;
267
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
268
+ gap: 12px;
269
+ margin: 15px 0;
270
+ }
271
+
272
+ .benchmark-item {
273
+ background: white;
274
+ padding: 12px;
275
+ border-radius: 8px;
276
+ text-align: center;
277
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
278
+ transition: transform 0.2s;
279
+ }
280
+
281
+ .benchmark-item:hover {
282
+ transform: translateY(-2px);
283
+ box-shadow: 0 4px 8px rgba(0,0,0,0.15);
284
+ }
285
+
286
+ .benchmark-item .value {
287
+ font-size: 1.5em;
288
+ font-weight: 700;
289
+ color: #667eea;
290
+ }
291
+
292
+ .benchmark-item .label {
293
+ font-size: 0.85em;
294
+ color: #666;
295
+ margin-top: 4px;
296
+ }
297
+
298
+ .chat-container {
299
+ border: 1px solid #e0e0e0;
300
+ border-radius: 12px;
301
+ overflow: hidden;
302
+ }
303
+
304
+ .thinking-box {
305
+ background: #f8f9fa;
306
+ border-left: 4px solid #667eea;
307
+ padding: 15px;
308
+ margin: 10px 0;
309
+ border-radius: 8px;
310
+ font-style: italic;
311
+ color: #555;
312
+ }
313
+
314
+ .thinking-box::before {
315
+ content: "🧠 Thinking Process";
316
+ display: block;
317
+ font-weight: 600;
318
+ color: #667eea;
319
+ margin-bottom: 8px;
320
+ }
321
+
322
+ .response-box {
323
+ background: white;
324
+ padding: 15px;
325
+ border-radius: 8px;
326
+ line-height: 1.6;
327
+ }
328
+
329
+ .status-bar {
330
+ padding: 10px;
331
+ background: #f5f5f5;
332
+ border-radius: 8px;
333
+ font-family: monospace;
334
+ font-size: 0.9em;
335
+ }
336
+
337
+ .mode-indicator {
338
+ display: inline-block;
339
+ padding: 4px 12px;
340
+ border-radius: 20px;
341
+ font-size: 0.85em;
342
+ font-weight: 600;
343
+ margin-right: 8px;
344
+ }
345
+
346
+ .mode-non-think {
347
+ background: #e3f2fd;
348
+ color: #1976d2;
349
+ }
350
+
351
+ .mode-think-high {
352
+ background: #f3e5f5;
353
+ color: #7b1fa2;
354
+ }
355
+
356
+ .mode-think-max {
357
+ background: #fce4ec;
358
+ color: #c62828;
359
+ }
360
+
361
+ .api-key-warning {
362
+ background: #fff3cd;
363
+ border: 1px solid #ffc107;
364
+ color: #856404;
365
+ padding: 15px;
366
+ border-radius: 8px;
367
+ margin: 10px 0;
368
+ }
369
  """
370
+
371
+ with gr.Blocks(
372
+ title="DeepSeek-V4 Pro - API Demo",
373
+ theme=gr.themes.Soft(),
374
+ css=custom_css,
375
+ analytics_enabled=False
376
+ ) as demo:
377
+
378
+ # Header
379
+ gr.HTML("""
380
+ <div class="deepseek-header">
381
+ <h1>🚀 DeepSeek-V4 Pro</h1>
382
+ <p>Towards Highly Efficient Million-Token Context Intelligence</p>
383
+ <p style="font-size: 0.9em; opacity: 0.8;">Powered by DeepSeek API • 1.6T Parameters • 49B Activated</p>
384
+ </div>
385
+ """)
386
+
387
+ # Main layout
388
+ with gr.Row():
389
+ # Left sidebar - Configuration
390
+ with gr.Column(scale=1, min_width=350):
391
+ # Model Info Card
392
+ gr.HTML("""
393
+ <div class="model-info">
394
+ <h3 style="margin-top: 0;">📊 Model Specifications</h3>
395
+ <div class="benchmark-grid">
396
+ <div class="benchmark-item">
397
+ <div class="value">1.6T</div>
398
+ <div class="label">Total Parameters</div>
399
+ </div>
400
+ <div class="benchmark-item">
401
+ <div class="value">49B</div>
402
+ <div class="label">Activated Parameters</div>
403
+ </div>
404
+ <div class="benchmark-item">
405
+ <div class="value">1M</div>
406
+ <div class="label">Context Length</div>
407
+ </div>
408
+ <div class="benchmark-item">
409
+ <div class="value">32T+</div>
410
+ <div class="label">Training Tokens</div>
411
+ </div>
412
  </div>
413
+
414
+ <h3>🎯 Key Benchmarks</h3>
415
+ <div class="benchmark-grid">
416
+ <div class="benchmark-item">
417
+ <div class="value">93.5</div>
418
+ <div class="label">LiveCodeBench</div>
419
+ </div>
420
+ <div class="benchmark-item">
421
+ <div class="value">3206</div>
422
+ <div class="label">Codeforces Rating</div>
423
+ </div>
424
+ <div class="benchmark-item">
425
+ <div class="value">87.5</div>
426
+ <div class="label">MMLU-Pro</div>
427
+ </div>
428
+ <div class="benchmark-item">
429
+ <div class="value">80.6%</div>
430
+ <div class="label">SWE Verified</div>
431
+ </div>
432
  </div>
433
+
434
+ <h3>💡 Key Innovations</h3>
435
+ <ul style="padding-left: 20px;">
436
+ <li>Hybrid Attention (CSA + HCA)</li>
437
+ <li>Manifold-Constrained Hyper-Connections</li>
438
+ <li>Muon Optimizer</li>
439
+ <li>Two-stage Post-training</li>
440
+ <li>FP4 + FP8 Mixed Precision</li>
441
+ </ul>
442
  </div>
443
+ """)
444
+
445
+ # Configuration Panel
446
+ with gr.Group():
447
+ gr.Markdown("### ⚙️ Generation Settings")
448
 
449
+ thinking_mode = gr.Radio(
450
+ choices=["Non-think", "Think High", "Think Max"],
451
+ value="Think High",
452
+ label="🧠 Reasoning Mode",
453
+ info="""
454
+ Non-think: Fast, intuitive responses for daily tasks
455
+ Think High: Deliberate reasoning for complex problems
456
+ • Think Max: Maximum effort for hardest challenges
457
+ """
458
+ )
 
 
 
 
 
459
 
460
+ show_thinking = gr.Checkbox(
461
+ value=True,
462
+ label="📝 Show Thinking Process",
463
+ info="Display the model's reasoning steps"
464
+ )
465
+
466
+ system_prompt = gr.Textbox(
467
+ label="📋 System Prompt",
468
+ value=DEFAULT_SYSTEM_PROMPT,
469
+ lines=3,
470
+ max_lines=5
471
+ )
472
+
473
+ with gr.Accordion("🔧 Advanced Parameters", open=False):
474
+ max_tokens = gr.Slider(
475
+ minimum=64,
476
+ maximum=32768,
477
+ value=4096,
478
+ step=64,
479
+ label="Max Tokens"
480
+ )
481
+
482
+ temperature = gr.Slider(
483
+ minimum=0.0,
484
+ maximum=2.0,
485
+ value=0.7,
486
+ step=0.05,
487
+ label="Temperature",
488
+ info="0 = deterministic, 1+ = creative"
489
+ )
490
+
491
+ top_p = gr.Slider(
492
+ minimum=0.0,
493
+ maximum=1.0,
494
+ value=1.0,
495
+ step=0.05,
496
+ label="Top P"
497
+ )
498
+
499
+ stream_output = gr.Checkbox(
500
+ value=True,
501
+ label="📡 Stream Output",
502
+ info="Show response as it's generated"
503
+ )
504
 
505
+ # Quick examples
506
+ gr.Markdown("### 💡 Quick Examples")
507
+ examples = [
508
+ "Explain quantum computing to a 10-year-old",
509
+ "Write a Python function for Fibonacci with memoization",
510
+ "What are the key features of DeepSeek-V4?",
511
+ "Solve: If x² + y² = 25 and x + y = 7, find x and y",
512
+ "Design a REST API for a social media platform",
513
+ ]
514
+ gr.Examples(
515
+ examples=examples,
516
+ inputs=gr.Textbox(label="Click to try", visible=False),
517
  )
518
+
519
+ # Right - Chat Interface
520
+ with gr.Column(scale=2):
521
+ # Chatbot
522
+ chatbot = gr.Chatbot(
523
+ label="💬 Chat with DeepSeek-V4 Pro",
524
+ height=550,
525
+ show_copy_button=True,
526
+ bubble_full_width=False,
527
+ avatar_images=(
528
+ "https://api.dicebear.com/7.x/bottts/svg?seed=user&backgroundColor=667eea",
529
+ "https://api.dicebear.com/7.x/bottts/svg?seed=assistant&backgroundColor=764ba2"
530
+ ),
531
+ layout="panel"
532
  )
533
 
534
+ # Thinking process display
535
+ with gr.Accordion("🧠 Thinking Process", open=True, visible=True):
536
+ thinking_display = gr.Markdown(
537
+ value="*The model's reasoning will appear here...*",
538
+ elem_classes="thinking-box"
 
 
 
 
 
 
 
 
 
 
 
 
539
  )
540
+
541
+ # Input area
542
+ with gr.Row():
543
+ message_input = gr.Textbox(
544
+ label="Your Message",
545
+ placeholder="Type your message here... (Shift+Enter for new line)",
546
+ lines=2,
547
+ max_lines=5,
548
+ scale=9,
549
+ autofocus=True
550
  )
551
+ send_btn = gr.Button(
552
+ "🚀 Send",
553
+ variant="primary",
554
+ scale=1,
555
+ size="lg"
 
 
556
  )
557
+
558
+ # Control buttons
559
+ with gr.Row():
560
+ clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
561
+ stop_btn = gr.Button("⏹️ Stop", size="sm", variant="stop", visible=False)
562
+ retry_btn = gr.Button("🔄 Retry", size="sm", variant="secondary")
563
+
564
+ # Status bar
565
+ status_display = gr.Textbox(
566
+ label="Status",
567
+ value="✅ Ready | Using DeepSeek API (deepseek-v4-pro)",
568
+ interactive=False,
569
+ elem_classes="status-bar"
 
 
 
 
 
 
 
 
 
 
570
  )
571
+
572
+ # Footer
573
+ gr.HTML("""
574
+ <div style="text-align: center; margin-top: 30px; padding: 20px; color: #666; border-top: 1px solid #e0e0e0;">
575
+ <p style="margin: 5px 0;">
576
+ <a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro" target="_blank">📦 Model Card</a> |
577
+ <a href="https://platform.deepseek.com/api_keys" target="_blank">🔑 Get API Key</a> |
578
+ <a href="https://platform.deepseek.com/docs" target="_blank">📚 API Docs</a> |
579
+ <a href="https://deepseek.ai" target="_blank">🌐 Homepage</a>
580
+ </p>
581
+ <p style="margin: 5px 0; font-size: 0.9em;">
582
+ ⚡ Powered by DeepSeek API • Streaming Available • MIT License
583
+ </p>
584
+ <p style="margin: 5px 0; font-size: 0.8em; opacity: 0.7;">
585
+ DeepSeek-AI © 2026 • All benchmarks are for reference only
586
+ </p>
587
+ </div>
588
+ """)
589
+
590
+ # ==================== Event Handlers ====================
591
+
592
+ def process_message(
593
+ message: str,
594
+ history: List[Tuple[str, str]],
595
+ thinking_mode: str,
596
+ show_thinking: bool,
597
+ system_prompt: str,
598
+ max_tokens: int,
599
+ temperature: float,
600
+ top_p: float,
601
+ stream_output: bool
602
+ ):
603
+ """Process message with streaming or non-streaming mode"""
604
+ if not message.strip():
605
+ return message, history, "", "Please enter a message."
606
 
607
+ # Check API key
608
+ if not os.environ.get('DEEPSEEK_API_KEY'):
609
+ return (
610
+ message,
611
+ history + [(message, "⚠️ **API Key Missing**\n\nPlease set your `DEEPSEEK_API_KEY` environment variable.\nGet one at: https://platform.deepseek.com/api_keys")],
612
+ "",
613
+ "❌ API Key not configured"
614
  )
 
 
 
 
 
615
 
616
+ if stream_output:
617
+ # Use streaming
618
+ for msg, hist, content, thinking, status in generate_response_stream(
619
+ message, history, thinking_mode, max_tokens,
620
+ temperature, top_p, system_prompt, show_thinking
621
+ ):
622
+ yield msg, hist, thinking, status
623
+ else:
624
+ # Use non-streaming
625
+ result = generate_response(
626
+ message, history, thinking_mode, max_tokens,
627
+ temperature, top_p, system_prompt, show_thinking
628
+ )
629
+ msg, hist, content, thinking, status = result
630
+ yield msg, hist, thinking, status
631
 
632
+ # Wire up send button
633
+ send_event = send_btn.click(
634
+ fn=process_message,
635
+ inputs=[
636
+ message_input, chatbot, thinking_mode, show_thinking,
637
+ system_prompt, max_tokens, temperature, top_p, stream_output
638
+ ],
639
+ outputs=[message_input, chatbot, thinking_display, status_display],
640
+ show_progress="hidden"
641
+ )
642
 
643
+ # Wire up Enter key
644
+ enter_event = message_input.submit(
645
+ fn=process_message,
646
+ inputs=[
647
+ message_input, chatbot, thinking_mode, show_thinking,
648
+ system_prompt, max_tokens, temperature, top_p, stream_output
649
+ ],
650
+ outputs=[message_input, chatbot, thinking_display, status_display],
651
+ show_progress="hidden"
652
+ )
653
 
654
+ # Clear chat
655
+ def clear_chat():
656
+ return (
657
+ [],
658
+ "*The model's reasoning will appear here...*",
659
+ "✅ Chat cleared. Ready for new conversation."
 
 
 
 
660
  )
661
+
662
+ clear_btn.click(
663
+ fn=clear_chat,
664
+ outputs=[chatbot, thinking_display, status_display]
665
+ )
666
+
667
+ # Retry last message
668
+ def retry_last(history):
669
+ if not history:
670
+ return history, "No message to retry."
671
+ last_message = history[-1][0]
672
+ history = history[:-1]
673
+ return history, last_message
674
+
675
+ retry_btn.click(
676
+ fn=retry_last,
677
+ inputs=[chatbot],
678
+ outputs=[chatbot, message_input]
679
+ )
680
+
681
+ # Mode change indicator
682
+ def update_mode_indicator(mode):
683
+ mode_classes = {
684
+ "Non-think": "mode-non-think",
685
+ "Think High": "mode-think-high",
686
+ "Think Max": "mode-think-max"
687
+ }
688
+ class_name = mode_classes.get(mode, "")
689
+ return f'<span class="mode-indicator {class_name}">{mode}</span>'
 
 
690
 
691
+ return demo
 
 
 
 
 
 
 
 
 
 
 
692
 
693
+ # ==================== Main ====================
694
  if __name__ == "__main__":
695
+ # Check environment
696
+ api_key = os.environ.get('DEEPSEEK_API_KEY')
697
+ if not api_key:
698
+ print("\n" + "=" * 60)
699
+ print("⚠️ DEEPSEEK_API_KEY not found!")
700
+ print("=" * 60)
701
+ print("\nTo get started:")
702
+ print("1. Get your API key: https://platform.deepseek.com/api_keys")
703
+ print("2. Set the environment variable:")
704
+ print(" export DEEPSEEK_API_KEY='your-key-here'")
705
+ print("\nOr create a .env file:")
706
+ print(' echo DEEPSEEK_API_KEY=your-key-here > .env')
707
+ print("\n" + "=" * 60 + "\n")
708
+
709
+ # Create and launch demo
710
+ demo = create_demo()
711
+ demo.queue(
712
+ max_size=50,
713
+ concurrency_count=10,
714
+ default_concurrency_limit=10
715
+ ).launch(
716
  server_name="0.0.0.0",
717
  server_port=7860,
718
+ share=False,
719
  debug=False,
720
+ show_error=True,
721
+ favicon_path=None
722
  )