Eeppa commited on
Commit
783756c
·
verified ·
1 Parent(s): f67d6dc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ # Initialize the pipeline with the small model
5
+ # We use the 'instruct' version because it's better at following prompts
6
+ generator = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-135M-Instruct")
7
+
8
+ def generate_response(message, history):
9
+ # Format the conversation for the model
10
+ # SmolLM2 uses a specific chat template
11
+ formatted_prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
12
+
13
+ # Generate response
14
+ # max_new_tokens: 512 is plenty for a small model
15
+ # temperature: 0.7 keeps it creative but focused
16
+ output = generator(
17
+ formatted_prompt,
18
+ max_new_tokens=512,
19
+ temperature=0.7,
20
+ top_p=0.9,
21
+ do_sample=True
22
+ )
23
+
24
+ # Extract the text from the output
25
+ response = output[0]['generated_text']
26
+
27
+ # Clean up the response to only show the assistant's part
28
+ return response.split("<|im_start|>assistant\n")[-1]
29
+
30
+ # Create a simple Chat Interface
31
+ demo = gr.ChatInterface(
32
+ fn=generate_response,
33
+ title="Very Slow Ai (but actually fast)",
34
+ description="Running SmolLM2 135M on a basic CPU Space."
35
+ )
36
+
37
+ if __name__ == "__main__":
38
+ demo.launch()