Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| import torch | |
| from threading import Thread | |
| # Model ID for DeepSeek-R1-Distill-Qwen-1.5B | |
| model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" | |
| # Load tokenizer and model | |
| # Using bfloat16 to save 50% RAM and avoid crashes | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| low_cpu_mem_usage=True | |
| ) | |
| def generate_response(message, history): | |
| # System prompt to keep the model focused | |
| system_prompt = "You are DeepSeek-R1, a helpful assistant. Use the <think> tags to show your reasoning." | |
| # Build conversation with history | |
| messages = [{"role": "system", "content": system_prompt}] | |
| for user_msg, assistant_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| # Prepare the input | |
| input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer([input_text], return_tensors="pt").to(model.device) | |
| # Setup streaming | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| inputs, | |
| streamer=streamer, | |
| max_new_tokens=512, | |
| temperature=0.6, | |
| repetition_penalty=1.1, | |
| do_sample=True | |
| ) | |
| # Run in a thread so the UI doesn't freeze | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| partial_text = "" | |
| for new_text in streamer: | |
| partial_text += new_text | |
| yield partial_text | |
| # Create the Interface (No 'theme' argument to avoid Gradio 6 errors) | |
| demo = gr.ChatInterface( | |
| fn=generate_response, | |
| title="DeepSeek-R1 (1.5B) - Smart Slow AI", | |
| description="Streaming enabled. Watch it think!" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |