Spaces:
Running
Running
| import gradio as gr | |
| import subprocess | |
| import os | |
| from huggingface_hub import hf_hub_download | |
| # MODEL REGISTRY | |
| MODELS = { | |
| "Bonsai 1.7B (248MB)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"}, | |
| "Bonsai 4B (572MB)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"}, | |
| "Bonsai 8B (1.15GB)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"} | |
| } | |
| def chat(message, history, system_prompt, model_choice, temp): | |
| # 1. Download/Path Setup | |
| config = MODELS[model_choice] | |
| model_path = hf_hub_download(repo_id=config["repo"], filename=config["file"]) | |
| # 2. Build the Prompt (Standard Format) | |
| prompt = f"System: {system_prompt}\n" | |
| for human, assistant in history: | |
| prompt += f"User: {human}\nAssistant: {assistant}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| # 3. Subprocess Call (The Old Way) | |
| # Using the binary we moved in the Dockerfile | |
| cmd = [ | |
| "./llama-cli", "-m", model_path, | |
| "-p", prompt, | |
| "-n", "512", | |
| "--threads", "4", | |
| "--temp", str(temp), | |
| "--repeat_penalty", "1.1", | |
| "--no-display-prompt" | |
| ] | |
| try: | |
| process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, bufsize=1) | |
| response = "" | |
| for line in process.stdout: | |
| response += line | |
| yield response | |
| except Exception as e: | |
| yield f"Inference Error: {str(e)}" | |
| # GRADIO UI | |
| with gr.Blocks(theme=gr.themes.Default()) as demo: | |
| gr.Markdown("# 🌿 Bonsai 1-Bit AI Sandbox") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| model_select = gr.Dropdown(list(MODELS.keys()), value="Bonsai 1.7B (248MB)", label="Model Selector") | |
| sys_input = gr.Textbox( | |
| value="You are a helpful AI assistant. Be concise and prioritize logic.", | |
| label="System Prompt", lines=4 | |
| ) | |
| temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature") | |
| gr.Markdown("### Standard Benchmarks") | |
| btn_math = gr.Button("Logic: Math Problem") | |
| btn_code = gr.Button("Code: C Implementation") | |
| with gr.Column(scale=3): | |
| chatbot = gr.ChatInterface( | |
| fn=chat, | |
| additional_inputs=[sys_input, model_select, temp_slider] | |
| ) | |
| # Simple Test Triggers | |
| btn_math.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None) | |
| btn_code.click(fn=lambda: "Write a C function to reverse a string in-place.", outputs=None) | |
| demo.queue().launch(server_name="0.0.0.0", server_port=7860) | |