rufatronics commited on
Commit
732bdc1
Β·
verified Β·
1 Parent(s): 5a5eeed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ # Load the model and tokenizer
6
+ checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"
7
+ device = "cpu" # Since we are on free tier CPU
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
10
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
11
+
12
+ def chat(message, history):
13
+ # Prepare the chat history for the model
14
+ # SmolLM2 uses a specific 'instruct' format
15
+ messages = [{"role": "system", "content": "You are a helpful assistant."}]
16
+ for val in history:
17
+ if val[0]: messages.append({"role": "user", "content": val[0]})
18
+ if val[1]: messages.append({"role": "assistant", "content": val[1]})
19
+
20
+ messages.append({"role": "user", "content": message})
21
+
22
+ # Convert to model-ready format
23
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
24
+ inputs = tokenizer(input_text, return_tensors="pt").to(device)
25
+
26
+ # Generate response
27
+ outputs = model.generate(**inputs, max_new_tokens=500, temperature=0.7, top_p=0.9, do_sample=True)
28
+ response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
29
+
30
+ return response
31
+
32
+ # Create the Gradio interface
33
+ demo = gr.ChatInterface(fn=chat, title="SmolLM2-135M Personal Assistant")
34
+
35
+ if __name__ == "__main__":
36
+ demo.launch()
37
+