import gradio as gr from transformers import pipeline # Carrega o modelo para rodar na CPU do seu Space # Usamos o modelo de 0.5B pois ele cabe nos 16GB de RAM gratuitos model_id = "Qwen/Qwen2.5-0.5B-Instruct" pipe = pipeline("text-generation", model=model_id, device_map="cpu") def chat_function(message, history): # Formata a mensagem para o modelo messages = [{"role": "user", "content": message}] # Gera a resposta results = pipe(messages, max_new_tokens=512, truncation=True) return results[0]['generated_text'][-1]['content'] # Cria a interface de Chat demo = gr.ChatInterface(fn=chat_function, title="Meu Chat Qwen Local") if __name__ == "__main__": demo.launch()