File size: 2,619 Bytes
3f92ad3
39f50df
3f92ad3
39f50df
3f92ad3
39f50df
6c9177c
39f50df
 
 
6c9177c
3f92ad3
39f50df
 
 
 
3f92ad3
39f50df
6c9177c
 
 
 
 
39f50df
 
 
 
 
 
 
 
 
 
 
3f92ad3
39f50df
 
 
 
 
 
 
 
6c9177c
39f50df
 
 
3f92ad3
 
 
39f50df
6c9177c
39f50df
 
6c9177c
39f50df
3f92ad3
39f50df
 
 
6c9177c
 
39f50df
 
 
6c9177c
 
39f50df
 
 
3f92ad3
39f50df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
import subprocess
import os
from huggingface_hub import hf_hub_download

# MODEL REGISTRY
MODELS = {
    "Bonsai 1.7B (248MB)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"},
    "Bonsai 4B (572MB)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"},
    "Bonsai 8B (1.15GB)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"}
}

def chat(message, history, system_prompt, model_choice, temp):
    # 1. Download/Path Setup
    config = MODELS[model_choice]
    model_path = hf_hub_download(repo_id=config["repo"], filename=config["file"])
    
    # 2. Build the Prompt (Standard Format)
    prompt = f"System: {system_prompt}\n"
    for human, assistant in history:
        prompt += f"User: {human}\nAssistant: {assistant}\n"
    prompt += f"User: {message}\nAssistant:"

    # 3. Subprocess Call (The Old Way)
    # Using the binary we moved in the Dockerfile
    cmd = [
        "./llama-cli", "-m", model_path,
        "-p", prompt,
        "-n", "512",
        "--threads", "4",
        "--temp", str(temp),
        "--repeat_penalty", "1.1",
        "--no-display-prompt"
    ]

    try:
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, bufsize=1)
        response = ""
        for line in process.stdout:
            response += line
            yield response
    except Exception as e:
        yield f"Inference Error: {str(e)}"

# GRADIO UI
with gr.Blocks(theme=gr.themes.Default()) as demo:
    gr.Markdown("# 🌿 Bonsai 1-Bit AI Sandbox")
    
    with gr.Row():
        with gr.Column(scale=1):
            model_select = gr.Dropdown(list(MODELS.keys()), value="Bonsai 1.7B (248MB)", label="Model Selector")
            sys_input = gr.Textbox(
                value="You are a helpful AI assistant. Be concise and prioritize logic.",
                label="System Prompt", lines=4
            )
            temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
            
            gr.Markdown("### Standard Benchmarks")
            btn_math = gr.Button("Logic: Math Problem")
            btn_code = gr.Button("Code: C Implementation")

        with gr.Column(scale=3):
            chatbot = gr.ChatInterface(
                fn=chat,
                additional_inputs=[sys_input, model_select, temp_slider]
            )

    # Simple Test Triggers
    btn_math.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None)
    btn_code.click(fn=lambda: "Write a C function to reverse a string in-place.", outputs=None)

demo.queue().launch(server_name="0.0.0.0", server_port=7860)