Spaces:

Eeppa
/

Very_Slow.Ai

Sleeping

Eeppa commited on 11 days ago

Commit

56bf91e

verified ·

1 Parent(s): 783756c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,37 +1,39 @@
 import gradio as gr
 from transformers import pipeline
-# Initialize the pipeline with the small model
-# We use the 'instruct' version because it's better at following prompts
 generator = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-135M-Instruct")
 def generate_response(message, history):
-    # Format the conversation for the model
-    # SmolLM2 uses a specific chat template
-    formatted_prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
-    # Generate response
-    # max_new_tokens: 512 is plenty for a small model
-    # temperature: 0.7 keeps it creative but focused
     output = generator(
-        formatted_prompt,
-        max_new_tokens=512,
-        temperature=0.7,
-        top_p=0.9,
-        do_sample=True
     )
-    # Extract the text from the output
     response = output[0]['generated_text']
-    # Clean up the response to only show the assistant's part
-    return response.split("<|im_start|>assistant\n")[-1]
-# Create a simple Chat Interface
 demo = gr.ChatInterface(
     fn=generate_response,
-    title="Very Slow Ai (but actually fast)",
-    description="Running SmolLM2 135M on a basic CPU Space."
 )
 if __name__ == "__main__":

 import gradio as gr
 from transformers import pipeline
+# Initialize the pipeline
 generator = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-135M-Instruct")
 def generate_response(message, history):
+    # This 'system_prompt' anchors the AI
+    system_prompt = "<|im_start|>system\nYou are a concise and helpful assistant. No yapping.<|im_end|>\n"
+    # Build the conversation history so it has a memory
+    full_prompt = system_prompt
+    for user_msg, assistant_msg in history:
+        full_prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
+    # Add the current message
+    full_prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+    # Generate with settings that prevent rambling
     output = generator(
+        full_prompt,
+        max_new_tokens=256,
+        temperature=0.4,
+        do_sample=True,
+        repetition_penalty=1.2
     )
+    # Clean the output to only show the assistant's new text
     response = output[0]['generated_text']
+    return response.split("<|im_start|>assistant\n")[-1].replace("<|im_end|>", "").strip()
+# Create the Chat Interface
 demo = gr.ChatInterface(
     fn=generate_response,
+    title="Not So Slow AI",
+    description="SmolLM2 135M: Now with 100% less accidental festival advice."
 )
 if __name__ == "__main__":