Spaces:

rufatronics
/

Demo-bonsai

Running

App Files Files Community

Demo-bonsai / app.py

rufatronics

Update app.py

39f50df verified 26 days ago

raw

history blame contribute delete

2.62 kB

	import gradio as gr
	import subprocess
	import os
	from huggingface_hub import hf_hub_download

	# MODEL REGISTRY
	MODELS = {
	"Bonsai 1.7B (248MB)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"},
	"Bonsai 4B (572MB)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"},
	"Bonsai 8B (1.15GB)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"}
	}

	def chat(message, history, system_prompt, model_choice, temp):
	# 1. Download/Path Setup
	config = MODELS[model_choice]
	model_path = hf_hub_download(repo_id=config["repo"], filename=config["file"])

	# 2. Build the Prompt (Standard Format)
	prompt = f"System: {system_prompt}\n"
	for human, assistant in history:
	prompt += f"User: {human}\nAssistant: {assistant}\n"
	prompt += f"User: {message}\nAssistant:"

	# 3. Subprocess Call (The Old Way)
	# Using the binary we moved in the Dockerfile
	cmd = [
	"./llama-cli", "-m", model_path,
	"-p", prompt,
	"-n", "512",
	"--threads", "4",
	"--temp", str(temp),
	"--repeat_penalty", "1.1",
	"--no-display-prompt"
	]

	try:
	process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, bufsize=1)
	response = ""
	for line in process.stdout:
	response += line
	yield response
	except Exception as e:
	yield f"Inference Error: {str(e)}"

	# GRADIO UI
	with gr.Blocks(theme=gr.themes.Default()) as demo:
	gr.Markdown("# 🌿 Bonsai 1-Bit AI Sandbox")

	with gr.Row():
	with gr.Column(scale=1):
	model_select = gr.Dropdown(list(MODELS.keys()), value="Bonsai 1.7B (248MB)", label="Model Selector")
	sys_input = gr.Textbox(
	value="You are a helpful AI assistant. Be concise and prioritize logic.",
	label="System Prompt", lines=4
	)
	temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")

	gr.Markdown("### Standard Benchmarks")
	btn_math = gr.Button("Logic: Math Problem")
	btn_code = gr.Button("Code: C Implementation")

	with gr.Column(scale=3):
	chatbot = gr.ChatInterface(
	fn=chat,
	additional_inputs=[sys_input, model_select, temp_slider]
	)

	# Simple Test Triggers
	btn_math.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None)
	btn_code.click(fn=lambda: "Write a C function to reverse a string in-place.", outputs=None)

	demo.queue().launch(server_name="0.0.0.0", server_port=7860)