rufatronics commited on
Commit
39f50df
·
verified ·
1 Parent(s): 606fa1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -82
app.py CHANGED
@@ -1,107 +1,72 @@
1
  import gradio as gr
2
- from llama_cpp import Llama
3
- from huggingface_hub import hf_hub_download
4
  import os
 
5
 
6
- # 1. MODEL CONFIGURATIONS (Bonsai 1-bit Family)
7
  MODELS = {
8
- "Bonsai 1.7B (Ultra-Light)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"},
9
- "Bonsai 4B (Balanced)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"},
10
- "Bonsai 8B (High Intelligence)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"}
11
  }
12
 
13
- # Persistent variable for the loaded model
14
- current_model = None
15
- loaded_model_name = ""
16
-
17
- def load_model(name):
18
- global current_model, loaded_model_name
19
- if loaded_model_name == name:
20
- return current_model
21
-
22
- print(f"--- Loading {name} ---")
23
- model_config = MODELS[name]
24
- model_path = hf_hub_download(repo_id=model_config["repo"], filename=model_config["file"])
25
-
26
- # Initialize the 1-bit engine
27
- current_model = Llama(
28
- model_path=model_path,
29
- n_ctx=2048, # Standard context window
30
- n_threads=4, # Good for public CPU spaces
31
- verbose=False
32
- )
33
- loaded_model_name = name
34
- return current_model
35
-
36
- def chat_interface(message, history, system_prompt, model_name, temp, top_p):
37
- # Ensure selected model is loaded
38
- llm = load_model(model_name)
39
 
40
- # Build Chat History
41
  prompt = f"System: {system_prompt}\n"
42
  for human, assistant in history:
43
  prompt += f"User: {human}\nAssistant: {assistant}\n"
44
  prompt += f"User: {message}\nAssistant:"
45
 
46
- # Generate Stream
47
- output = llm(
48
- prompt,
49
- max_tokens=512,
50
- stop=["User:", "System:"],
51
- echo=False,
52
- temperature=temp,
53
- top_p=top_p,
54
- stream=True
55
- )
 
56
 
57
- response = ""
58
- for chunk in output:
59
- delta = chunk['choices'][0]['text']
60
- response += delta
61
- yield response
 
 
 
62
 
63
- # 3. UI DESIGN (Public Standard)
64
- with gr.Blocks(theme=gr.themes.Default(primary_hue="green")) as demo:
65
- gr.Markdown("# 🌿 Bonsai 1-Bit AI: Multi-Model Demo")
66
- gr.Markdown("Explore the 1-bit 'Bonsai' family by PrismML. These models run on standard CPUs using minimal RAM.")
67
 
68
  with gr.Row():
69
  with gr.Column(scale=1):
70
- model_selector = gr.Dropdown(
71
- choices=list(MODELS.keys()),
72
- value="Bonsai 1.7B (Ultra-Light)",
73
- label="Select Model Size"
74
- )
75
  sys_input = gr.Textbox(
76
- value="You are a helpful, concise AI assistant powered by 1-bit technology.",
77
- label="System Prompt",
78
- lines=3
79
  )
80
- with gr.Accordion("Advanced Parameters", open=False):
81
- temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
82
- p_slider = gr.Slider(0.1, 1.0, value=0.9, label="Top-P")
83
 
84
- gr.Markdown("### Standard Tests")
85
- test1 = gr.Button("Logic: Explain 1+1")
86
- test2 = gr.Button("Creative: Write a Haiku")
87
- test3 = gr.Button("Coding: Python Reverse String")
88
 
89
  with gr.Column(scale=3):
90
- chat = gr.ChatInterface(
91
- fn=chat_interface,
92
- additional_inputs=[sys_input, model_selector, temp_slider, p_slider],
93
  )
94
 
95
- # Test Button Handlers
96
- def run_logic(): return "Explain why 1+1=2 from a logical perspective."
97
- def run_creative(): return "Write a short poem about the future of technology."
98
- def run_coding(): return "Write a clean Python function to reverse a string."
99
-
100
- # Note: Using placeholders to trigger input in chat box
101
- test1.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None)
102
- test2.click(fn=lambda: "Write a short poem about technology.", outputs=None)
103
- test3.click(fn=lambda: "Write a Python script to reverse a string.", outputs=None)
104
 
105
- if __name__ == "__main__":
106
- demo.queue().launch()
107
-
 
1
  import gradio as gr
2
+ import subprocess
 
3
  import os
4
+ from huggingface_hub import hf_hub_download
5
 
6
+ # MODEL REGISTRY
7
  MODELS = {
8
+ "Bonsai 1.7B (248MB)": {"repo": "prism-ml/Bonsai-1.7B-gguf", "file": "Bonsai-1.7B-v1.0.gguf"},
9
+ "Bonsai 4B (572MB)": {"repo": "prism-ml/Bonsai-4B-gguf", "file": "Bonsai-4B-v1.0.gguf"},
10
+ "Bonsai 8B (1.15GB)": {"repo": "prism-ml/Bonsai-8B-gguf", "file": "Bonsai-8B-v1.0.gguf"}
11
  }
12
 
13
+ def chat(message, history, system_prompt, model_choice, temp):
14
+ # 1. Download/Path Setup
15
+ config = MODELS[model_choice]
16
+ model_path = hf_hub_download(repo_id=config["repo"], filename=config["file"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # 2. Build the Prompt (Standard Format)
19
  prompt = f"System: {system_prompt}\n"
20
  for human, assistant in history:
21
  prompt += f"User: {human}\nAssistant: {assistant}\n"
22
  prompt += f"User: {message}\nAssistant:"
23
 
24
+ # 3. Subprocess Call (The Old Way)
25
+ # Using the binary we moved in the Dockerfile
26
+ cmd = [
27
+ "./llama-cli", "-m", model_path,
28
+ "-p", prompt,
29
+ "-n", "512",
30
+ "--threads", "4",
31
+ "--temp", str(temp),
32
+ "--repeat_penalty", "1.1",
33
+ "--no-display-prompt"
34
+ ]
35
 
36
+ try:
37
+ process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True, bufsize=1)
38
+ response = ""
39
+ for line in process.stdout:
40
+ response += line
41
+ yield response
42
+ except Exception as e:
43
+ yield f"Inference Error: {str(e)}"
44
 
45
+ # GRADIO UI
46
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
47
+ gr.Markdown("# 🌿 Bonsai 1-Bit AI Sandbox")
 
48
 
49
  with gr.Row():
50
  with gr.Column(scale=1):
51
+ model_select = gr.Dropdown(list(MODELS.keys()), value="Bonsai 1.7B (248MB)", label="Model Selector")
 
 
 
 
52
  sys_input = gr.Textbox(
53
+ value="You are a helpful AI assistant. Be concise and prioritize logic.",
54
+ label="System Prompt", lines=4
 
55
  )
56
+ temp_slider = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
 
 
57
 
58
+ gr.Markdown("### Standard Benchmarks")
59
+ btn_math = gr.Button("Logic: Math Problem")
60
+ btn_code = gr.Button("Code: C Implementation")
 
61
 
62
  with gr.Column(scale=3):
63
+ chatbot = gr.ChatInterface(
64
+ fn=chat,
65
+ additional_inputs=[sys_input, model_select, temp_slider]
66
  )
67
 
68
+ # Simple Test Triggers
69
+ btn_math.click(fn=lambda: "Explain why 1+1=2 logically.", outputs=None)
70
+ btn_code.click(fn=lambda: "Write a C function to reverse a string in-place.", outputs=None)
 
 
 
 
 
 
71
 
72
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)