rufatronics commited on
Commit
f721e12
·
verified ·
1 Parent(s): 5c03266

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+ from llama_cpp import Llama
4
+ import os
5
+
6
+ # Download the model (this will happen automatically when the space starts)
7
+ model_path = hf_hub_download(
8
+ repo_id="bartowski/Gemma-2-2B-Instruct-GGUF",
9
+ filename="Gemma-2-2B-Instruct-Q4_K_M.gguf"
10
+ )
11
+
12
+ # Load the model
13
+ llm = Llama(
14
+ model_path=model_path,
15
+ n_ctx=2048, # Context length
16
+ n_threads=2, # Use both CPUs
17
+ n_gpu_layers=0, # CPU only
18
+ verbose=False
19
+ )
20
+
21
+ def chat_response(message, history):
22
+ # Format the prompt with conversation history
23
+ prompt = f"""<start_of_turn>user
24
+ {message}<end_of_turn>
25
+ <start_of_turn>model"""
26
+
27
+ # Generate response
28
+ output = llm(
29
+ prompt,
30
+ max_tokens=256,
31
+ stop=["<end_of_turn>", "<start_of_turn>"],
32
+ temperature=0.7,
33
+ top_p=0.9
34
+ )
35
+
36
+ return output['choices'][0]['text']
37
+
38
+ # Create the chat interface
39
+ demo = gr.ChatInterface(
40
+ fn=chat_response,
41
+ title="Hausa AI Assistant",
42
+ description="A simple AI assistant powered by Gemma 2"
43
+ )
44
+
45
+ # Launch the application
46
+ if __name__ == "__main__":
47
+ demo.launch()