import gradio as gr import subprocess import os from huggingface_hub import hf_hub_download # MODEL REGISTRY MODELS = { "Bonsai 1.7B (248MB)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"}, "Bonsai 4B (572MB)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"}, "Bonsai 8B (1.15GB)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"} } def chat(message, history, system_prompt, model_choice, temp): # 1. Download/Path Setup config = MODELS[model_choice] model_path = hf_hub_download(repo_id=config["repo"], filename=config["file"]) # 2. Build the Prompt (Standard Format) prompt = f"System: {system_prompt}\n" for human, assistant in history: prompt += f"User: {human}\nAssistant: {assistant}\n" prompt += f"User: {message}\nAssistant:" # 3. Subprocess Call (The Old Way) # Using the binary we moved in the Dockerfile cmd = [ "./llama-cli", "-m", model_path, "-p", prompt, "-n", "512", "--threads", "4", "--temp", str(temp), "--repeat_penalty", "1.1", "--no-display-prompt" ] try: process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, bufsize=1) response = "" for line in process.stdout: response += line yield response except Exception as e: yield f"Inference Error: {str(e)}" # GRADIO UI with gr.Blocks(theme=gr.themes.Default()) as demo: gr.Markdown("# 🌿 Bonsai 1-Bit AI Sandbox") with gr.Row(): with gr.Column(scale=1): model_select = gr.Dropdown(list(MODELS.keys()), value="Bonsai 1.7B (248MB)", label="Model Selector") sys_input = gr.Textbox( value="You are a helpful AI assistant. Be concise and prioritize logic.", label="System Prompt", lines=4 ) temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature") gr.Markdown("### Standard Benchmarks") btn_math = gr.Button("Logic: Math Problem") btn_code = gr.Button("Code: C Implementation") with gr.Column(scale=3): chatbot = gr.ChatInterface( fn=chat, additional_inputs=[sys_input, model_select, temp_slider] ) # Simple Test Triggers btn_math.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None) btn_code.click(fn=lambda: "Write a C function to reverse a string in-place.", outputs=None) demo.queue().launch(server_name="0.0.0.0", server_port=7860)