Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from llama_cpp import Llama | |
| # Path to your Q2 model inside the Space | |
| MODEL_PATH = "finance-chat.Q2_K.gguf" | |
| # Load the model with llama.cpp | |
| llm = Llama( | |
| model_path=MODEL_PATH, | |
| n_threads=2, # Free HF CPU = ~2 threads | |
| n_ctx=4096, | |
| verbose=False | |
| ) | |
| def generate(prompt): | |
| output = llm( | |
| prompt, | |
| max_tokens=256, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ) | |
| return output["choices"][0]["text"].strip() | |
| demo = gr.Interface( | |
| fn=generate, | |
| inputs="text", | |
| outputs="text", | |
| title="Finance Chat LLM (GGUF Q2_K - Free Space)" | |
| ) | |
| demo.launch() | |