import gradio as gr from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="TheBloke/TinyLlama-1.1B-GGUF", filename="tinyllama-1.1b.Q4_K_M.gguf" ) def chat(message, history): response = llm.create_chat_completion( messages=[{"role": "user", "content": message}] ) return response["choices"][0]["message"]["content"] gr.ChatInterface(fn=chat).launch()