Spaces:

Eeppa
/

Very_Slow.Ai

Sleeping

Eeppa commited on 11 days ago

Commit

783756c

verified ·

1 Parent(s): f67d6dc

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+from transformers import pipeline
+# Initialize the pipeline with the small model
+# We use the 'instruct' version because it's better at following prompts
+generator = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-135M-Instruct")
+def generate_response(message, history):
+    # Format the conversation for the model
+    # SmolLM2 uses a specific chat template
+    formatted_prompt = f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+    # Generate response
+    # max_new_tokens: 512 is plenty for a small model
+    # temperature: 0.7 keeps it creative but focused
+    output = generator(
+        formatted_prompt,
+        max_new_tokens=512,
+        temperature=0.7,
+        top_p=0.9,
+        do_sample=True
+    )
+    # Extract the text from the output
+    response = output[0]['generated_text']
+    # Clean up the response to only show the assistant's part
+    return response.split("<|im_start|>assistant\n")[-1]
+# Create a simple Chat Interface
+demo = gr.ChatInterface(
+    fn=generate_response,
+    title="Very Slow Ai (but actually fast)",
+    description="Running SmolLM2 135M on a basic CPU Space."
+)
+if __name__ == "__main__":
+    demo.launch()