Demo-bonsai / app.py
rufatronics's picture
Update app.py
39f50df verified
import gradio as gr
import subprocess
import os
from huggingface_hub import hf_hub_download
# MODEL REGISTRY
MODELS = {
"Bonsai 1.7B (248MB)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"},
"Bonsai 4B (572MB)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"},
"Bonsai 8B (1.15GB)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"}
}
def chat(message, history, system_prompt, model_choice, temp):
# 1. Download/Path Setup
config = MODELS[model_choice]
model_path = hf_hub_download(repo_id=config["repo"], filename=config["file"])
# 2. Build the Prompt (Standard Format)
prompt = f"System: {system_prompt}\n"
for human, assistant in history:
prompt += f"User: {human}\nAssistant: {assistant}\n"
prompt += f"User: {message}\nAssistant:"
# 3. Subprocess Call (The Old Way)
# Using the binary we moved in the Dockerfile
cmd = [
"./llama-cli", "-m", model_path,
"-p", prompt,
"-n", "512",
"--threads", "4",
"--temp", str(temp),
"--repeat_penalty", "1.1",
"--no-display-prompt"
]
try:
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, bufsize=1)
response = ""
for line in process.stdout:
response += line
yield response
except Exception as e:
yield f"Inference Error: {str(e)}"
# GRADIO UI
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.Markdown("# 🌿 Bonsai 1-Bit AI Sandbox")
with gr.Row():
with gr.Column(scale=1):
model_select = gr.Dropdown(list(MODELS.keys()), value="Bonsai 1.7B (248MB)", label="Model Selector")
sys_input = gr.Textbox(
value="You are a helpful AI assistant. Be concise and prioritize logic.",
label="System Prompt", lines=4
)
temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
gr.Markdown("### Standard Benchmarks")
btn_math = gr.Button("Logic: Math Problem")
btn_code = gr.Button("Code: C Implementation")
with gr.Column(scale=3):
chatbot = gr.ChatInterface(
fn=chat,
additional_inputs=[sys_input, model_select, temp_slider]
)
# Simple Test Triggers
btn_math.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None)
btn_code.click(fn=lambda: "Write a C function to reverse a string in-place.", outputs=None)
demo.queue().launch(server_name="0.0.0.0", server_port=7860)