Eeppa commited on
Commit
3912358
·
verified ·
1 Parent(s): 76f7d2f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
+ import torch
4
+ from threading import Thread
5
+
6
+ # Model ID for DeepSeek-R1-Distill-Qwen-1.5B
7
+ model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
8
+
9
+ # Load tokenizer and model
10
+ # Using low_cpu_mem_usage to stay within Space limits
11
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_id,
14
+ torch_dtype="auto",
15
+ device_map="auto",
16
+ low_cpu_mem_usage=True
17
+ )
18
+
19
+ def generate_response(message, history):
20
+ # DeepSeek works best with a clear instruction
21
+ system_prompt = "You are DeepSeek-R1, a helpful assistant. Keep your answers direct and avoid hallucinating outside contexts."
22
+
23
+ # Build conversation with history
24
+ messages = [{"role": "system", "content": system_prompt}]
25
+ for user_msg, assistant_msg in history:
26
+ messages.append({"role": "user", "content": user_msg})
27
+ messages.append({"role": "assistant", "content": assistant_msg})
28
+ messages.append({"role": "user", "content": message})
29
+
30
+ # Prepare the input using the official template
31
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
32
+ inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
33
+
34
+ # Setup streaming
35
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
36
+
37
+ generation_kwargs = dict(
38
+ inputs,
39
+ streamer=streamer,
40
+ max_new_tokens=512,
41
+ temperature=0.6, # DeepSeek R1 recommends 0.5-0.7
42
+ repetition_penalty=1.1 # Prevents the "yapping" loop
43
+ )
44
+
45
+ # Run in a thread so the UI doesn't freeze
46
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
47
+ thread.start()
48
+
49
+ partial_text = ""
50
+ for new_text in streamer:
51
+ partial_text += new_text
52
+ # DeepSeek-R1 often uses <think> tags. We'll leave them in so you see the 'reasoning'.
53
+ yield partial_text
54
+
55
+ # Create the Interface
56
+ demo = gr.ChatInterface(
57
+ fn=generate_response,
58
+ title="DeepSeek-R1 (1.5B) - The Smarter Slow AI",
59
+ description="Now streaming! Watch it 'think' before it speaks. Still slow, but way smarter.",
60
+ theme="soft"
61
+ )
62
+
63
+ if __name__ == "__main__":
64
+ demo.launch()