FinanceAgent / app.py
MuhammadHamza33's picture
test
d25852b
raw
history blame contribute delete
615 Bytes
import gradio as gr
from llama_cpp import Llama
# Path to your Q2 model inside the Space
MODEL_PATH = "finance-chat.Q2_K.gguf"
# Load the model with llama.cpp
llm = Llama(
model_path=MODEL_PATH,
n_threads=2, # Free HF CPU = ~2 threads
n_ctx=4096,
verbose=False
)
def generate(prompt):
output = llm(
prompt,
max_tokens=256,
temperature=0.7,
top_p=0.9
)
return output["choices"][0]["text"].strip()
demo = gr.Interface(
fn=generate,
inputs="text",
outputs="text",
title="Finance Chat LLM (GGUF Q2_K - Free Space)"
)
demo.launch()