Spaces:

Eeppa
/

VerySlowAISmartedition

Running

App Files Files Community

Eeppa commited on 12 days ago

Commit

3912358

verified ·

1 Parent(s): 76f7d2f

Create app.py

Browse files

Files changed (1) hide show

app.py +64 -0

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+import torch
+from threading import Thread
+# Model ID for DeepSeek-R1-Distill-Qwen-1.5B
+model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+# Load tokenizer and model
+# Using low_cpu_mem_usage to stay within Space limits
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype="auto",
+    device_map="auto",
+    low_cpu_mem_usage=True
+)
+def generate_response(message, history):
+    # DeepSeek works best with a clear instruction
+    system_prompt = "You are DeepSeek-R1, a helpful assistant. Keep your answers direct and avoid hallucinating outside contexts."
+    # Build conversation with history
+    messages = [{"role": "system", "content": system_prompt}]
+    for user_msg, assistant_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": assistant_msg})
+    messages.append({"role": "user", "content": message})
+    # Prepare the input using the official template
+    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
+    # Setup streaming
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = dict(
+        inputs,
+        streamer=streamer,
+        max_new_tokens=512,
+        temperature=0.6,      # DeepSeek R1 recommends 0.5-0.7
+        repetition_penalty=1.1 # Prevents the "yapping" loop
+    )
+    # Run in a thread so the UI doesn't freeze
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    partial_text = ""
+    for new_text in streamer:
+        partial_text += new_text
+        # DeepSeek-R1 often uses <think> tags. We'll leave them in so you see the 'reasoning'.
+        yield partial_text
+# Create the Interface
+demo = gr.ChatInterface(
+    fn=generate_response,
+    title="DeepSeek-R1 (1.5B) - The Smarter Slow AI",
+    description="Now streaming! Watch it 'think' before it speaks. Still slow, but way smarter.",
+    theme="soft"
+)
+if __name__ == "__main__":
+    demo.launch()