wangsheng commited on
Commit
b920ac7
·
verified ·
1 Parent(s): 7bd0413

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -499
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  from openai import OpenAI
4
  import os
5
  import time
6
- from typing import List, Tuple, Optional
7
 
8
  # ==================== Configuration ====================
9
  DEFAULT_SYSTEM_PROMPT = "You are DeepSeek-V4, an advanced AI assistant with strong reasoning capabilities. Provide accurate, helpful, and well-reasoned responses."
@@ -32,8 +32,7 @@ def get_client():
32
  "Please set your API key:\n"
33
  "1. Get your key from: https://platform.deepseek.com/api_keys\n"
34
  "2. Set environment variable:\n"
35
- " export DEEPSEEK_API_KEY='your-api-key-here'\n"
36
- " or create a .env file with: DEEPSEEK_API_KEY=your-api-key-here"
37
  )
38
 
39
  return OpenAI(
@@ -51,114 +50,22 @@ def generate_response(
51
  top_p: float = 1.0,
52
  system_prompt: str = DEFAULT_SYSTEM_PROMPT,
53
  show_thinking: bool = True
54
- ) -> Tuple[str, List[Tuple[str, str]], str, str]:
55
- """
56
- Generate response using DeepSeek API
57
-
58
- Returns:
59
- Tuple of (empty_message, updated_history, response_text, thinking_text, status)
60
- """
61
- if not message.strip():
62
- return "", history, "", "", "Please enter a message."
63
-
64
- client = get_client()
65
-
66
- # Build messages array
67
- messages = [{"role": "system", "content": system_prompt}]
68
-
69
- # Add conversation history
70
- for user_msg, assistant_msg in history:
71
- messages.append({"role": "user", "content": user_msg})
72
- if assistant_msg:
73
- messages.append({"role": "assistant", "content": assistant_msg})
74
-
75
- # Add current message
76
- messages.append({"role": "user", "content": message})
77
-
78
- # Prepare API parameters
79
- reasoning_effort = REASONING_EFFORT_MAP.get(thinking_mode, "high")
80
- thinking_type = THINKING_TYPE_MAP.get(thinking_mode, "enabled")
81
-
82
- try:
83
- start_time = time.time()
84
-
85
- # Call DeepSeek API
86
- response = client.chat.completions.create(
87
- model="deepseek-v4-pro",
88
- messages=messages,
89
- stream=False,
90
- max_tokens=max_tokens,
91
- temperature=temperature,
92
- top_p=top_p,
93
- reasoning_effort=reasoning_effort,
94
- extra_body={
95
- "thinking": {"type": thinking_type}
96
- }
97
- )
98
-
99
- end_time = time.time()
100
- generation_time = end_time - start_time
101
-
102
- # Extract response content
103
- choice = response.choices[0]
104
- message_obj = choice.message
105
-
106
- # Get main content
107
- content = message_obj.content or ""
108
-
109
- # Get reasoning/thinking content if available
110
- thinking_content = ""
111
- if hasattr(message_obj, 'reasoning_content') and message_obj.reasoning_content:
112
- thinking_content = message_obj.reasoning_content
113
-
114
- # Update history
115
- full_response = content
116
- if show_thinking and thinking_content:
117
- full_response = f"{thinking_content}\n\n---\n\n{content}"
118
-
119
- # Add usage info if available
120
- if hasattr(response, 'usage') and response.usage:
121
- usage = response.usage
122
- tokens_info = f"📊 Input: {usage.prompt_tokens} tokens | Output: {usage.completion_tokens} tokens | Total: {usage.total_tokens} tokens"
123
- else:
124
- tokens_info = ""
125
-
126
- status = f"✅ Generated in {generation_time:.2f}s | 🎯 Mode: {thinking_mode} | {tokens_info}"
127
-
128
- return "", history + [(message, full_response)], content, thinking_content, status
129
-
130
- except Exception as e:
131
- error_msg = f"❌ Error: {str(e)}"
132
- return "", history + [(message, error_msg)], "", "", error_msg
133
-
134
- def generate_response_stream(
135
- message: str,
136
- history: List[Tuple[str, str]],
137
- thinking_mode: str = "Think High",
138
- max_tokens: int = 4096,
139
- temperature: float = 0.7,
140
- top_p: float = 1.0,
141
- system_prompt: str = DEFAULT_SYSTEM_PROMPT,
142
- show_thinking: bool = True
143
  ):
144
- """
145
- Stream response from DeepSeek API
146
-
147
- Yields:
148
- Tuple of (empty_message, updated_history, content_so_far, thinking_so_far, status)
149
- """
150
  if not message.strip():
151
- yield "", history, "", "", "Please enter a message."
152
  return
153
 
154
  client = get_client()
155
 
156
  # Build messages array
157
  messages = [{"role": "system", "content": system_prompt}]
 
158
  for user_msg, assistant_msg in history:
159
  messages.append({"role": "user", "content": user_msg})
160
  if assistant_msg:
161
  messages.append({"role": "assistant", "content": assistant_msg})
 
162
  messages.append({"role": "user", "content": message})
163
 
164
  reasoning_effort = REASONING_EFFORT_MAP.get(thinking_mode, "high")
@@ -167,7 +74,7 @@ def generate_response_stream(
167
  try:
168
  start_time = time.time()
169
 
170
- # Stream response
171
  stream = client.chat.completions.create(
172
  model="deepseek-v4-pro",
173
  messages=messages,
@@ -176,9 +83,7 @@ def generate_response_stream(
176
  temperature=temperature,
177
  top_p=top_p,
178
  reasoning_effort=reasoning_effort,
179
- extra_body={
180
- "thinking": {"type": thinking_type}
181
- }
182
  )
183
 
184
  content_chunks = []
@@ -188,7 +93,6 @@ def generate_response_stream(
188
  if chunk.choices[0].delta.content:
189
  content_chunks.append(chunk.choices[0].delta.content)
190
 
191
- # Check for reasoning content in stream
192
  if hasattr(chunk.choices[0].delta, 'reasoning_content'):
193
  if chunk.choices[0].delta.reasoning_content:
194
  thinking_chunks.append(chunk.choices[0].delta.reasoning_content)
@@ -198,488 +102,188 @@ def generate_response_stream(
198
 
199
  full_response = current_content
200
  if show_thinking and current_thinking:
201
- full_response = f"🧠 Thinking:\n{current_thinking}\n\n💬 Response:\n{current_content}"
202
 
203
  elapsed = time.time() - start_time
204
- status = f"🔄 Streaming... ({elapsed:.1f}s) | Mode: {thinking_mode}"
205
 
206
- yield "", history + [(message, full_response)], current_content, current_thinking, status
207
 
208
- # Final yield with complete response
209
  end_time = time.time()
210
  final_content = ''.join(content_chunks)
211
  final_thinking = ''.join(thinking_chunks)
212
 
213
  full_response = final_content
214
  if show_thinking and final_thinking:
215
- full_response = f"🧠 Thinking:\n{final_thinking}\n\n💬 Response:\n{final_content}"
216
 
217
- status = f"✅ Done in {end_time - start_time:.2f}s | Mode: {thinking_mode}"
218
- yield "", history + [(message, full_response)], final_content, final_thinking, status
219
 
220
  except Exception as e:
221
  error_msg = f"❌ Error: {str(e)}"
222
- yield "", history + [(message, error_msg)], "", "", error_msg
223
 
224
  # ==================== Gradio Interface ====================
225
  def create_demo():
226
  """Create the Gradio interface"""
227
 
228
- # Use gr.Blocks without theme and css (moved to launch)
229
- with gr.Blocks(
230
- title="DeepSeek-V4 Pro - API Demo",
231
- analytics_enabled=False
232
- ) as demo:
233
 
234
  # Header
235
- gr.HTML("""
236
- <div class="deepseek-header">
237
- <h1>🚀 DeepSeek-V4 Pro</h1>
238
- <p>Towards Highly Efficient Million-Token Context Intelligence</p>
239
- <p style="font-size: 0.9em; opacity: 0.8;">Powered by DeepSeek API • 1.6T Parameters • 49B Activated</p>
240
- </div>
241
  """)
242
 
243
- # Main layout
244
  with gr.Row():
245
- # Left sidebar - Configuration
246
- with gr.Column(scale=1, min_width=350):
247
- # Model Info Card
248
- gr.HTML("""
249
- <div class="model-info">
250
- <h3 style="margin-top: 0;">📊 Model Specifications</h3>
251
- <div class="benchmark-grid">
252
- <div class="benchmark-item">
253
- <div class="value">1.6T</div>
254
- <div class="label">Total Parameters</div>
255
- </div>
256
- <div class="benchmark-item">
257
- <div class="value">49B</div>
258
- <div class="label">Activated Parameters</div>
259
- </div>
260
- <div class="benchmark-item">
261
- <div class="value">1M</div>
262
- <div class="label">Context Length</div>
263
- </div>
264
- <div class="benchmark-item">
265
- <div class="value">32T+</div>
266
- <div class="label">Training Tokens</div>
267
- </div>
268
- </div>
269
-
270
- <h3>🎯 Key Benchmarks</h3>
271
- <div class="benchmark-grid">
272
- <div class="benchmark-item">
273
- <div class="value">93.5</div>
274
- <div class="label">LiveCodeBench</div>
275
- </div>
276
- <div class="benchmark-item">
277
- <div class="value">3206</div>
278
- <div class="label">Codeforces Rating</div>
279
- </div>
280
- <div class="benchmark-item">
281
- <div class="value">87.5</div>
282
- <div class="label">MMLU-Pro</div>
283
- </div>
284
- <div class="benchmark-item">
285
- <div class="value">80.6%</div>
286
- <div class="label">SWE Verified</div>
287
- </div>
288
- </div>
289
-
290
- <h3>💡 Key Innovations</h3>
291
- <ul style="padding-left: 20px;">
292
- <li>Hybrid Attention (CSA + HCA)</li>
293
- <li>Manifold-Constrained Hyper-Connections</li>
294
- <li>Muon Optimizer</li>
295
- <li>Two-stage Post-training</li>
296
- <li>FP4 + FP8 Mixed Precision</li>
297
- </ul>
298
- </div>
299
- """)
300
 
301
- # Configuration Panel
302
- with gr.Group():
303
- gr.Markdown("### ⚙️ Generation Settings")
304
-
305
- thinking_mode = gr.Radio(
306
- choices=["Non-think", "Think High", "Think Max"],
307
- value="Think High",
308
- label="🧠 Reasoning Mode",
309
- info="""
310
- • Non-think: Fast, intuitive responses for daily tasks
311
- • Think High: Deliberate reasoning for complex problems
312
- Think Max: Maximum effort for hardest challenges
313
- """
314
- )
315
-
316
- show_thinking = gr.Checkbox(
317
- value=True,
318
- label="📝 Show Thinking Process",
319
- info="Display the model's reasoning steps"
320
- )
321
-
322
- system_prompt = gr.Textbox(
323
- label="📋 System Prompt",
324
- value=DEFAULT_SYSTEM_PROMPT,
325
- lines=3,
326
- max_lines=5
327
- )
328
-
329
- with gr.Accordion("🔧 Advanced Parameters", open=False):
330
- max_tokens = gr.Slider(
331
- minimum=64,
332
- maximum=32768,
333
- value=4096,
334
- step=64,
335
- label="Max Tokens"
336
- )
337
-
338
- temperature = gr.Slider(
339
- minimum=0.0,
340
- maximum=2.0,
341
- value=0.7,
342
- step=0.05,
343
- label="Temperature",
344
- info="0 = deterministic, 1+ = creative"
345
- )
346
-
347
- top_p = gr.Slider(
348
- minimum=0.0,
349
- maximum=1.0,
350
- value=1.0,
351
- step=0.05,
352
- label="Top P"
353
- )
354
-
355
- stream_output = gr.Checkbox(
356
- value=True,
357
- label="📡 Stream Output",
358
- info="Show response as it's generated"
359
- )
360
 
361
- # Quick examples
362
- gr.Markdown("### 💡 Quick Examples")
363
- examples = [
364
- "Explain quantum computing to a 10-year-old",
365
- "Write a Python function for Fibonacci with memoization",
366
- "What are the key features of DeepSeek-V4?",
367
- "Solve: If x² + y² = 25 and x + y = 7, find x and y",
368
- "Design a REST API for a social media platform",
369
- ]
370
  gr.Examples(
371
- examples=examples,
372
- inputs=gr.Textbox(label="Click to try", visible=False),
 
 
 
 
373
  )
374
 
375
- # Right - Chat Interface
376
  with gr.Column(scale=2):
377
- # Chatbot - Gradio 6.0 compatible (removed bubble_full_width)
378
  chatbot = gr.Chatbot(
379
  label="💬 Chat with DeepSeek-V4 Pro",
380
- height=550,
381
- type="messages"
382
  )
383
 
384
- # Thinking process display
385
  with gr.Accordion("🧠 Thinking Process", open=True):
386
- thinking_display = gr.Markdown(
387
- value="*The model's reasoning will appear here...*",
388
- elem_classes="thinking-box"
389
- )
390
 
391
- # Input area
392
  with gr.Row():
393
  message_input = gr.Textbox(
394
- label="Your Message",
395
- placeholder="Type your message here... (Shift+Enter for new line)",
396
  lines=2,
397
- max_lines=5,
398
- scale=9,
399
- autofocus=True
400
- )
401
- send_btn = gr.Button(
402
- "🚀 Send",
403
- variant="primary",
404
- scale=1,
405
- size="lg"
406
  )
 
407
 
408
- # Control buttons
409
  with gr.Row():
410
- clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
411
- retry_btn = gr.Button("🔄 Retry", size="sm", variant="secondary")
412
 
413
- # Status bar
414
  status_display = gr.Textbox(
415
  label="Status",
416
- value="✅ Ready | Using DeepSeek API (deepseek-v4-pro)",
417
- interactive=False,
418
- elem_classes="status-bar"
419
  )
420
 
421
  # Footer
422
- gr.HTML("""
423
- <div style="text-align: center; margin-top: 30px; padding: 20px; color: #666; border-top: 1px solid #e0e0e0;">
424
- <p style="margin: 5px 0;">
425
- <a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro" target="_blank">📦 Model Card</a> |
426
- <a href="https://platform.deepseek.com/api_keys" target="_blank">🔑 Get API Key</a> |
427
- <a href="https://platform.deepseek.com/docs" target="_blank">📚 API Docs</a> |
428
- <a href="https://deepseek.ai" target="_blank">🌐 Homepage</a>
429
- </p>
430
- <p style="margin: 5px 0; font-size: 0.9em;">
431
- ⚡ Powered by DeepSeek API • Streaming Available • MIT License
432
- </p>
433
- <p style="margin: 5px 0; font-size: 0.8em; opacity: 0.7;">
434
- DeepSeek-AI © 2026 • All benchmarks are for reference only
435
- </p>
436
- </div>
437
  """)
438
 
439
  # ==================== Event Handlers ====================
440
 
441
  def process_message(
442
- message: str,
443
- history: list,
444
- thinking_mode: str,
445
- show_thinking: bool,
446
- system_prompt: str,
447
- max_tokens: int,
448
- temperature: float,
449
- top_p: float,
450
- stream_output: bool
451
  ):
452
- """Process message with streaming or non-streaming mode"""
453
  if not message.strip():
454
  yield message, history, "", "Please enter a message."
455
  return
456
 
457
- # Check API key
458
  if not os.environ.get('DEEPSEEK_API_KEY'):
459
- error_msg = "⚠️ **API Key Missing**\n\nPlease set your `DEEPSEEK_API_KEY` environment variable.\nGet one at: https://platform.deepseek.com/api_keys"
460
- if history is None:
461
- history = []
462
- history.append({"role": "user", "content": message})
463
- history.append({"role": "assistant", "content": error_msg})
464
- yield "", history, "", "❌ API Key not configured"
465
  return
466
 
467
- if stream_output:
468
- # Use streaming
469
- # Convert history format for internal use
470
- internal_history = []
471
- if history:
472
- for i in range(0, len(history), 2):
473
- if i + 1 < len(history):
474
- internal_history.append((history[i]["content"], history[i+1]["content"]))
475
-
476
- for msg, hist, content, thinking, status in generate_response_stream(
477
- message, internal_history, thinking_mode, max_tokens,
478
- temperature, top_p, system_prompt, show_thinking
479
- ):
480
- # Convert hist back to Gradio format
481
- gr_history = []
482
- for user_msg, assistant_msg in hist:
483
- gr_history.append({"role": "user", "content": user_msg})
484
- gr_history.append({"role": "assistant", "content": assistant_msg})
485
- yield msg, gr_history, thinking, status
486
- else:
487
- # Use non-streaming
488
- internal_history = []
489
- if history:
490
- for i in range(0, len(history), 2):
491
- if i + 1 < len(history):
492
- internal_history.append((history[i]["content"], history[i+1]["content"]))
493
-
494
- msg, hist, content, thinking, status = generate_response(
495
- message, internal_history, thinking_mode, max_tokens,
496
- temperature, top_p, system_prompt, show_thinking
497
- )
498
-
499
- gr_history = []
500
- for user_msg, assistant_msg in hist:
501
- gr_history.append({"role": "user", "content": user_msg})
502
- gr_history.append({"role": "assistant", "content": assistant_msg})
503
-
504
- yield msg, gr_history, thinking, status
505
 
506
- # Wire up send button
507
  send_btn.click(
508
  fn=process_message,
509
- inputs=[
510
- message_input, chatbot, thinking_mode, show_thinking,
511
- system_prompt, max_tokens, temperature, top_p, stream_output
512
- ],
513
- outputs=[message_input, chatbot, thinking_display, status_display],
514
- show_progress="hidden"
515
  )
516
 
517
- # Wire up Enter key
518
  message_input.submit(
519
  fn=process_message,
520
- inputs=[
521
- message_input, chatbot, thinking_mode, show_thinking,
522
- system_prompt, max_tokens, temperature, top_p, stream_output
523
- ],
524
- outputs=[message_input, chatbot, thinking_display, status_display],
525
- show_progress="hidden"
526
  )
527
 
528
- # Clear chat
529
- def clear_chat():
530
- return (
531
- [],
532
- "*The model's reasoning will appear here...*",
533
- "✅ Chat cleared. Ready for new conversation."
534
- )
535
-
536
  clear_btn.click(
537
- fn=clear_chat,
538
  outputs=[chatbot, thinking_display, status_display]
539
  )
540
-
541
- # Retry last message
542
- def retry_last(history):
543
- if not history or len(history) < 2:
544
- return history, ""
545
- # Remove last assistant message, keep last user message
546
- last_user_msg = history[-2]["content"] if len(history) >= 2 else ""
547
- new_history = history[:-2] if len(history) >= 2 else []
548
- return new_history, last_user_msg
549
-
550
- retry_btn.click(
551
- fn=retry_last,
552
- inputs=[chatbot],
553
- outputs=[chatbot, message_input]
554
- )
555
 
556
  return demo
557
 
558
  # ==================== Main ====================
559
  if __name__ == "__main__":
560
- # Try to load .env file
561
  try:
562
  from dotenv import load_dotenv
563
  load_dotenv()
564
  except ImportError:
565
  pass
566
 
567
- # Check environment
568
  api_key = os.environ.get('DEEPSEEK_API_KEY')
569
  if not api_key:
570
- print("\n" + "=" * 60)
571
  print("⚠️ DEEPSEEK_API_KEY not found!")
572
- print("=" * 60)
573
- print("\nTo get started:")
574
- print("1. Get your API key: https://platform.deepseek.com/api_keys")
575
- print("2. Set the environment variable:")
576
- print(" export DEEPSEEK_API_KEY='your-key-here'")
577
- print("\nOr create a .env file:")
578
- print(' echo DEEPSEEK_API_KEY=your-key-here > .env')
579
- print("\n" + "=" * 60 + "\n")
580
 
581
- # Create demo
582
  demo = create_demo()
583
 
584
- # Custom CSS
585
- custom_css = """
586
- :root {
587
- --primary: #667eea;
588
- --secondary: #764ba2;
589
- }
590
-
591
- .deepseek-header {
592
- text-align: center;
593
- margin-bottom: 20px;
594
- padding: 30px;
595
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
596
- border-radius: 16px;
597
- color: white;
598
- }
599
-
600
- .deepseek-header h1 {
601
- font-size: 2.8em;
602
- font-weight: 800;
603
- margin: 0;
604
- text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
605
- }
606
-
607
- .deepseek-header p {
608
- font-size: 1.2em;
609
- opacity: 0.95;
610
- margin: 10px 0 0 0;
611
- }
612
-
613
- .model-info {
614
- background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
615
- padding: 20px;
616
- border-radius: 12px;
617
- margin-bottom: 20px;
618
- border: 1px solid #e0e0e0;
619
- }
620
-
621
- .benchmark-grid {
622
- display: grid;
623
- grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
624
- gap: 12px;
625
- margin: 15px 0;
626
- }
627
-
628
- .benchmark-item {
629
- background: white;
630
- padding: 12px;
631
- border-radius: 8px;
632
- text-align: center;
633
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
634
- transition: transform 0.2s;
635
- }
636
-
637
- .benchmark-item:hover {
638
- transform: translateY(-2px);
639
- box-shadow: 0 4px 8px rgba(0,0,0,0.15);
640
- }
641
-
642
- .benchmark-item .value {
643
- font-size: 1.5em;
644
- font-weight: 700;
645
- color: #667eea;
646
- }
647
-
648
- .benchmark-item .label {
649
- font-size: 0.85em;
650
- color: #666;
651
- margin-top: 4px;
652
- }
653
-
654
- .thinking-box {
655
- background: #f8f9fa;
656
- border-left: 4px solid #667eea;
657
- padding: 15px;
658
- margin: 10px 0;
659
- border-radius: 8px;
660
- font-style: italic;
661
- color: #555;
662
- }
663
-
664
- .status-bar {
665
- padding: 10px;
666
- background: #f5f5f5;
667
- border-radius: 8px;
668
- font-family: monospace;
669
- font-size: 0.9em;
670
- }
671
- """
672
-
673
- # Launch with Gradio 6.0 compatible parameters
674
- demo.queue(
675
- max_size=50,
676
- default_concurrency_limit=10
677
- ).launch(
678
- server_name="0.0.0.0",
679
- server_port=7860,
680
- share=False,
681
- debug=False,
682
- show_error=True,
683
- theme=gr.themes.Soft(),
684
- css=custom_css
685
- )
 
3
  from openai import OpenAI
4
  import os
5
  import time
6
+ from typing import List, Tuple
7
 
8
  # ==================== Configuration ====================
9
  DEFAULT_SYSTEM_PROMPT = "You are DeepSeek-V4, an advanced AI assistant with strong reasoning capabilities. Provide accurate, helpful, and well-reasoned responses."
 
32
  "Please set your API key:\n"
33
  "1. Get your key from: https://platform.deepseek.com/api_keys\n"
34
  "2. Set environment variable:\n"
35
+ " export DEEPSEEK_API_KEY='your-api-key-here'"
 
36
  )
37
 
38
  return OpenAI(
 
50
  top_p: float = 1.0,
51
  system_prompt: str = DEFAULT_SYSTEM_PROMPT,
52
  show_thinking: bool = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  ):
54
+ """Generate response using DeepSeek API"""
 
 
 
 
 
55
  if not message.strip():
56
+ yield "", history, "", "Please enter a message."
57
  return
58
 
59
  client = get_client()
60
 
61
  # Build messages array
62
  messages = [{"role": "system", "content": system_prompt}]
63
+
64
  for user_msg, assistant_msg in history:
65
  messages.append({"role": "user", "content": user_msg})
66
  if assistant_msg:
67
  messages.append({"role": "assistant", "content": assistant_msg})
68
+
69
  messages.append({"role": "user", "content": message})
70
 
71
  reasoning_effort = REASONING_EFFORT_MAP.get(thinking_mode, "high")
 
74
  try:
75
  start_time = time.time()
76
 
77
+ # Streaming call
78
  stream = client.chat.completions.create(
79
  model="deepseek-v4-pro",
80
  messages=messages,
 
83
  temperature=temperature,
84
  top_p=top_p,
85
  reasoning_effort=reasoning_effort,
86
+ extra_body={"thinking": {"type": thinking_type}}
 
 
87
  )
88
 
89
  content_chunks = []
 
93
  if chunk.choices[0].delta.content:
94
  content_chunks.append(chunk.choices[0].delta.content)
95
 
 
96
  if hasattr(chunk.choices[0].delta, 'reasoning_content'):
97
  if chunk.choices[0].delta.reasoning_content:
98
  thinking_chunks.append(chunk.choices[0].delta.reasoning_content)
 
102
 
103
  full_response = current_content
104
  if show_thinking and current_thinking:
105
+ full_response = f"[Thinking]\n{current_thinking}\n\n[Response]\n{current_content}"
106
 
107
  elapsed = time.time() - start_time
108
+ status = f"🔄 Streaming... ({elapsed:.1f}s)"
109
 
110
+ yield "", history + [(message, full_response)], current_thinking, status
111
 
112
+ # Final
113
  end_time = time.time()
114
  final_content = ''.join(content_chunks)
115
  final_thinking = ''.join(thinking_chunks)
116
 
117
  full_response = final_content
118
  if show_thinking and final_thinking:
119
+ full_response = f"[Thinking]\n{final_thinking}\n\n[Response]\n{final_content}"
120
 
121
+ status = f"✅ Done in {end_time - start_time:.2f}s"
122
+ yield "", history + [(message, full_response)], final_thinking, status
123
 
124
  except Exception as e:
125
  error_msg = f"❌ Error: {str(e)}"
126
+ yield "", history + [(message, error_msg)], "", error_msg
127
 
128
  # ==================== Gradio Interface ====================
129
  def create_demo():
130
  """Create the Gradio interface"""
131
 
132
+ with gr.Blocks(title="DeepSeek-V4 Pro Demo") as demo:
 
 
 
 
133
 
134
  # Header
135
+ gr.Markdown("""
136
+ # 🚀 DeepSeek-V4 Pro
137
+ **Towards Highly Efficient Million-Token Context Intelligence**
138
+
139
+ Powered by DeepSeek API • 1.6T Parameters • 49B Activated • 1M Context
 
140
  """)
141
 
 
142
  with gr.Row():
143
+ # Left sidebar
144
+ with gr.Column(scale=1, min_width=300):
145
+ gr.Markdown("### ⚙️ Settings")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ thinking_mode = gr.Radio(
148
+ choices=["Non-think", "Think High", "Think Max"],
149
+ value="Think High",
150
+ label="🧠 Reasoning Mode"
151
+ )
152
+
153
+ show_thinking = gr.Checkbox(
154
+ value=True,
155
+ label="📝 Show Thinking Process"
156
+ )
157
+
158
+ system_prompt = gr.Textbox(
159
+ label="📋 System Prompt",
160
+ value=DEFAULT_SYSTEM_PROMPT,
161
+ lines=3
162
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
+ with gr.Accordion("🔧 Advanced", open=False):
165
+ max_tokens = gr.Slider(64, 32768, value=4096, step=64, label="Max Tokens")
166
+ temperature = gr.Slider(0.0, 2.0, value=0.7, step=0.05, label="Temperature")
167
+ top_p = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Top P")
168
+
169
+ gr.Markdown("### 💡 Examples")
 
 
 
170
  gr.Examples(
171
+ examples=[
172
+ "Explain quantum computing simply",
173
+ "Write a Python Fibonacci function",
174
+ "What's new in DeepSeek-V4?",
175
+ ],
176
+ inputs=gr.Textbox(label="Try an example", visible=False),
177
  )
178
 
179
+ # Right - Chat
180
  with gr.Column(scale=2):
 
181
  chatbot = gr.Chatbot(
182
  label="💬 Chat with DeepSeek-V4 Pro",
183
+ height=500
 
184
  )
185
 
 
186
  with gr.Accordion("🧠 Thinking Process", open=True):
187
+ thinking_display = gr.Markdown("*Reasoning will appear here...*")
 
 
 
188
 
 
189
  with gr.Row():
190
  message_input = gr.Textbox(
191
+ label="Message",
192
+ placeholder="Type your message...",
193
  lines=2,
194
+ scale=9
 
 
 
 
 
 
 
 
195
  )
196
+ send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
197
 
 
198
  with gr.Row():
199
+ clear_btn = gr.Button("🗑️ Clear", size="sm")
 
200
 
 
201
  status_display = gr.Textbox(
202
  label="Status",
203
+ value="✅ Ready",
204
+ interactive=False
 
205
  )
206
 
207
  # Footer
208
+ gr.Markdown("""
209
+ ---
210
+ [📦 Model Card](https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro) |
211
+ [🔑 Get API Key](https://platform.deepseek.com/api_keys) |
212
+ [📚 Docs](https://platform.deepseek.com/docs)
 
 
 
 
 
 
 
 
 
 
213
  """)
214
 
215
  # ==================== Event Handlers ====================
216
 
217
  def process_message(
218
+ message, history, thinking_mode, show_thinking,
219
+ system_prompt, max_tokens, temperature, top_p
 
 
 
 
 
 
 
220
  ):
 
221
  if not message.strip():
222
  yield message, history, "", "Please enter a message."
223
  return
224
 
 
225
  if not os.environ.get('DEEPSEEK_API_KEY'):
226
+ error = "⚠️ Please set DEEPSEEK_API_KEY environment variable"
227
+ history = history or []
228
+ history.append((message, error))
229
+ yield "", history, "", " API Key missing"
 
 
230
  return
231
 
232
+ history = history or []
233
+
234
+ for msg, hist, thinking, status in generate_response(
235
+ message, history, thinking_mode, max_tokens,
236
+ temperature, top_p, system_prompt, show_thinking
237
+ ):
238
+ yield msg, hist, thinking, status
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ # Events
241
  send_btn.click(
242
  fn=process_message,
243
+ inputs=[message_input, chatbot, thinking_mode, show_thinking,
244
+ system_prompt, max_tokens, temperature, top_p],
245
+ outputs=[message_input, chatbot, thinking_display, status_display]
 
 
 
246
  )
247
 
 
248
  message_input.submit(
249
  fn=process_message,
250
+ inputs=[message_input, chatbot, thinking_mode, show_thinking,
251
+ system_prompt, max_tokens, temperature, top_p],
252
+ outputs=[message_input, chatbot, thinking_display, status_display]
 
 
 
253
  )
254
 
 
 
 
 
 
 
 
 
255
  clear_btn.click(
256
+ fn=lambda: ([], "*Reasoning will appear here...*", "✅ Cleared"),
257
  outputs=[chatbot, thinking_display, status_display]
258
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
  return demo
261
 
262
  # ==================== Main ====================
263
  if __name__ == "__main__":
 
264
  try:
265
  from dotenv import load_dotenv
266
  load_dotenv()
267
  except ImportError:
268
  pass
269
 
 
270
  api_key = os.environ.get('DEEPSEEK_API_KEY')
271
  if not api_key:
272
+ print("\n" + "=" * 50)
273
  print("⚠️ DEEPSEEK_API_KEY not found!")
274
+ print("=" * 50)
275
+ print("Get one: https://platform.deepseek.com/api_keys")
276
+ print("Set it: export DEEPSEEK_API_KEY='your-key'\n")
 
 
 
 
 
277
 
 
278
  demo = create_demo()
279
 
280
+ # Try launch without theme/css first (most compatible)
281
+ try:
282
+ demo.queue(max_size=50).launch(
283
+ server_name="0.0.0.0",
284
+ server_port=7860,
285
+ share=False
286
+ )
287
+ except TypeError:
288
+ # Fallback: simplest launch
289
+ demo.queue().launch()