Spaces:
Running
Running
File size: 2,619 Bytes
3f92ad3 39f50df 3f92ad3 39f50df 3f92ad3 39f50df 6c9177c 39f50df 6c9177c 3f92ad3 39f50df 3f92ad3 39f50df 6c9177c 39f50df 3f92ad3 39f50df 6c9177c 39f50df 3f92ad3 39f50df 6c9177c 39f50df 6c9177c 39f50df 3f92ad3 39f50df 6c9177c 39f50df 6c9177c 39f50df 3f92ad3 39f50df | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | import gradio as gr
import subprocess
import os
from huggingface_hub import hf_hub_download
# MODEL REGISTRY
MODELS = {
"Bonsai 1.7B (248MB)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"},
"Bonsai 4B (572MB)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"},
"Bonsai 8B (1.15GB)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"}
}
def chat(message, history, system_prompt, model_choice, temp):
# 1. Download/Path Setup
config = MODELS[model_choice]
model_path = hf_hub_download(repo_id=config["repo"], filename=config["file"])
# 2. Build the Prompt (Standard Format)
prompt = f"System: {system_prompt}\n"
for human, assistant in history:
prompt += f"User: {human}\nAssistant: {assistant}\n"
prompt += f"User: {message}\nAssistant:"
# 3. Subprocess Call (The Old Way)
# Using the binary we moved in the Dockerfile
cmd = [
"./llama-cli", "-m", model_path,
"-p", prompt,
"-n", "512",
"--threads", "4",
"--temp", str(temp),
"--repeat_penalty", "1.1",
"--no-display-prompt"
]
try:
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, bufsize=1)
response = ""
for line in process.stdout:
response += line
yield response
except Exception as e:
yield f"Inference Error: {str(e)}"
# GRADIO UI
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.Markdown("# 🌿 Bonsai 1-Bit AI Sandbox")
with gr.Row():
with gr.Column(scale=1):
model_select = gr.Dropdown(list(MODELS.keys()), value="Bonsai 1.7B (248MB)", label="Model Selector")
sys_input = gr.Textbox(
value="You are a helpful AI assistant. Be concise and prioritize logic.",
label="System Prompt", lines=4
)
temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
gr.Markdown("### Standard Benchmarks")
btn_math = gr.Button("Logic: Math Problem")
btn_code = gr.Button("Code: C Implementation")
with gr.Column(scale=3):
chatbot = gr.ChatInterface(
fn=chat,
additional_inputs=[sys_input, model_select, temp_slider]
)
# Simple Test Triggers
btn_math.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None)
btn_code.click(fn=lambda: "Write a C function to reverse a string in-place.", outputs=None)
demo.queue().launch(server_name="0.0.0.0", server_port=7860)
|