Eeppa commited on
Commit
56bf91e
·
verified ·
1 Parent(s): 783756c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -20
app.py CHANGED
@@ -1,37 +1,39 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
 
4
- # Initialize the pipeline with the small model
5
- # We use the 'instruct' version because it's better at following prompts
6
  generator = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-135M-Instruct")
7
 
8
  def generate_response(message, history):
9
- # Format the conversation for the model
10
- # SmolLM2 uses a specific chat template
11
- formatted_prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
12
 
13
- # Generate response
14
- # max_new_tokens: 512 is plenty for a small model
15
- # temperature: 0.7 keeps it creative but focused
 
 
 
 
 
 
16
  output = generator(
17
- formatted_prompt,
18
- max_new_tokens=512,
19
- temperature=0.7,
20
- top_p=0.9,
21
- do_sample=True
22
  )
23
 
24
- # Extract the text from the output
25
  response = output[0]['generated_text']
26
-
27
- # Clean up the response to only show the assistant's part
28
- return response.split("<|im_start|>assistant\n")[-1]
29
 
30
- # Create a simple Chat Interface
31
  demo = gr.ChatInterface(
32
  fn=generate_response,
33
- title="Very Slow Ai (but actually fast)",
34
- description="Running SmolLM2 135M on a basic CPU Space."
35
  )
36
 
37
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from transformers import pipeline
3
 
4
+ # Initialize the pipeline
 
5
  generator = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-135M-Instruct")
6
 
7
  def generate_response(message, history):
8
+ # This 'system_prompt' anchors the AI
9
+ system_prompt = "<|im_start|>system\nYou are a concise and helpful assistant. No yapping.<|im_end|>\n"
 
10
 
11
+ # Build the conversation history so it has a memory
12
+ full_prompt = system_prompt
13
+ for user_msg, assistant_msg in history:
14
+ full_prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
15
+
16
+ # Add the current message
17
+ full_prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
18
+
19
+ # Generate with settings that prevent rambling
20
  output = generator(
21
+ full_prompt,
22
+ max_new_tokens=256,
23
+ temperature=0.4,
24
+ do_sample=True,
25
+ repetition_penalty=1.2
26
  )
27
 
28
+ # Clean the output to only show the assistant's new text
29
  response = output[0]['generated_text']
30
+ return response.split("<|im_start|>assistant\n")[-1].replace("<|im_end|>", "").strip()
 
 
31
 
32
+ # Create the Chat Interface
33
  demo = gr.ChatInterface(
34
  fn=generate_response,
35
+ title="Not So Slow AI",
36
+ description="SmolLM2 135M: Now with 100% less accidental festival advice."
37
  )
38
 
39
  if __name__ == "__main__":