Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| # Carrega o modelo na CPU | |
| pipe = pipeline("text-generation", model="Qwen/Qwen2.5-0.5B-Instruct", device_map="cpu") | |
| def predict(message, history): | |
| messages = [{"role": "user", "content": message}] | |
| # Gera a resposta | |
| results = pipe(messages, max_new_tokens=512) | |
| # Retorna apenas o texto da resposta | |
| return results[0]['generated_text'][-1]['content'] | |
| # O segredo está aqui: Definimos o nome do endpoint como "chat" | |
| demo = gr.ChatInterface(fn=predict).queue() | |
| if __name__ == "__main__": | |
| demo.launch() | |