Spaces:

rufatronics
/

Demo-bonsai

Running

App Files Files Community

rufatronics commited on 26 days ago

Commit

39f50df

verified ·

1 Parent(s): 606fa1b

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -82

app.py CHANGED Viewed

@@ -1,107 +1,72 @@
 import gradio as gr
-from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
 import os
-# 1. MODEL CONFIGURATIONS (Bonsai 1-bit Family)
 MODELS = {
-    "Bonsai 1.7B (Ultra-Light)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"},
-    "Bonsai 4B (Balanced)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"},
-    "Bonsai 8B (High Intelligence)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"}
 }
-# Persistent variable for the loaded model
-current_model = None
-loaded_model_name = ""
-def load_model(name):
-    global current_model, loaded_model_name
-    if loaded_model_name == name:
-        return current_model
-    print(f"--- Loading {name} ---")
-    model_config = MODELS[name]
-    model_path = hf_hub_download(repo_id=model_config["repo"], filename=model_config["file"])
-    # Initialize the 1-bit engine
-    current_model = Llama(
-        model_path=model_path,
-        n_ctx=2048,      # Standard context window
-        n_threads=4,     # Good for public CPU spaces
-        verbose=False
-    )
-    loaded_model_name = name
-    return current_model
-def chat_interface(message, history, system_prompt, model_name, temp, top_p):
-    # Ensure selected model is loaded
-    llm = load_model(model_name)
-    # Build Chat History
     prompt = f"System: {system_prompt}\n"
     for human, assistant in history:
         prompt += f"User: {human}\nAssistant: {assistant}\n"
     prompt += f"User: {message}\nAssistant:"
-    # Generate Stream
-    output = llm(
-        prompt,
-        max_tokens=512,
-        stop=["User:", "System:"],
-        echo=False,
-        temperature=temp,
-        top_p=top_p,
-        stream=True
-    )
-    response = ""
-    for chunk in output:
-        delta = chunk['choices'][0]['text']
-        response += delta
-        yield response
-# 3. UI DESIGN (Public Standard)
-with gr.Blocks(theme=gr.themes.Default(primary_hue="green")) as demo:
-    gr.Markdown("# 🌿 Bonsai 1-Bit AI: Multi-Model Demo")
-    gr.Markdown("Explore the 1-bit 'Bonsai' family by PrismML. These models run on standard CPUs using minimal RAM.")
     with gr.Row():
         with gr.Column(scale=1):
-            model_selector = gr.Dropdown(
-                choices=list(MODELS.keys()),
-                value="Bonsai 1.7B (Ultra-Light)",
-                label="Select Model Size"
-            )
             sys_input = gr.Textbox(
-                value="You are a helpful, concise AI assistant powered by 1-bit technology.",
-                label="System Prompt",
-                lines=3
             )
-            with gr.Accordion("Advanced Parameters", open=False):
-                temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
-                p_slider = gr.Slider(0.1, 1.0, value=0.9, label="Top-P")
-            gr.Markdown("### Standard Tests")
-            test1 = gr.Button("Logic: Explain 1+1")
-            test2 = gr.Button("Creative: Write a Haiku")
-            test3 = gr.Button("Coding: Python Reverse String")
         with gr.Column(scale=3):
-            chat = gr.ChatInterface(
-                fn=chat_interface,
-                additional_inputs=[sys_input, model_selector, temp_slider, p_slider],
             )
-    # Test Button Handlers
-    def run_logic(): return "Explain why 1+1=2 from a logical perspective."
-    def run_creative(): return "Write a short poem about the future of technology."
-    def run_coding(): return "Write a clean Python function to reverse a string."
-    # Note: Using placeholders to trigger input in chat box
-    test1.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None)
-    test2.click(fn=lambda: "Write a short poem about technology.", outputs=None)
-    test3.click(fn=lambda: "Write a Python script to reverse a string.", outputs=None)
-if __name__ == "__main__":
-    demo.queue().launch()

 import gradio as gr
+import subprocess
 import os
+from huggingface_hub import hf_hub_download
+# MODEL REGISTRY
 MODELS = {
+    "Bonsai 1.7B (248MB)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"},
+    "Bonsai 4B (572MB)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"},
+    "Bonsai 8B (1.15GB)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"}
 }
+def chat(message, history, system_prompt, model_choice, temp):
+    # 1. Download/Path Setup
+    config = MODELS[model_choice]
+    model_path = hf_hub_download(repo_id=config["repo"], filename=config["file"])
+    # 2. Build the Prompt (Standard Format)
     prompt = f"System: {system_prompt}\n"
     for human, assistant in history:
         prompt += f"User: {human}\nAssistant: {assistant}\n"
     prompt += f"User: {message}\nAssistant:"
+    # 3. Subprocess Call (The Old Way)
+    # Using the binary we moved in the Dockerfile
+    cmd = [
+        "./llama-cli", "-m", model_path,
+        "-p", prompt,
+        "-n", "512",
+        "--threads", "4",
+        "--temp", str(temp),
+        "--repeat_penalty", "1.1",
+        "--no-display-prompt"
+    ]
+    try:
+        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, bufsize=1)
+        response = ""
+        for line in process.stdout:
+            response += line
+            yield response
+    except Exception as e:
+        yield f"Inference Error: {str(e)}"
+# GRADIO UI
+with gr.Blocks(theme=gr.themes.Default()) as demo:
+    gr.Markdown("# 🌿 Bonsai 1-Bit AI Sandbox")
     with gr.Row():
         with gr.Column(scale=1):
+            model_select = gr.Dropdown(list(MODELS.keys()), value="Bonsai 1.7B (248MB)", label="Model Selector")
             sys_input = gr.Textbox(
+                value="You are a helpful AI assistant. Be concise and prioritize logic.",
+                label="System Prompt", lines=4
             )
+            temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
+            gr.Markdown("### Standard Benchmarks")
+            btn_math = gr.Button("Logic: Math Problem")
+            btn_code = gr.Button("Code: C Implementation")
         with gr.Column(scale=3):
+            chatbot = gr.ChatInterface(
+                fn=chat,
+                additional_inputs=[sys_input, model_select, temp_slider]
             )
+    # Simple Test Triggers
+    btn_math.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None)
+    btn_code.click(fn=lambda: "Write a C function to reverse a string in-place.", outputs=None)
+demo.queue().launch(server_name="0.0.0.0", server_port=7860)