HedronCreeper commited on
Commit
ed3d1bc
·
verified ·
1 Parent(s): c692c9c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+
5
+ # The path to your specific model
6
+ model_id = "HedronCreeper/gemma-2b-security-bot"
7
+
8
+ # 1. Load Tokenizer
9
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
10
+
11
+ # 2. Load Model
12
+ # We use device_map="auto" to let the system handle memory allocation
13
+ # and torch_dtype=torch.float16 to try and keep it somewhat lean
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_id,
16
+ device_map="auto",
17
+ torch_dtype=torch.float16,
18
+ low_cpu_mem_usage=True
19
+ )
20
+
21
+ def chat_func(message):
22
+ # Prepare the prompt format we used in training
23
+ prompt = f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n"
24
+
25
+ # Tokenize input
26
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
27
+
28
+ # Generate response
29
+ with torch.no_grad():
30
+ outputs = model.generate(
31
+ **inputs,
32
+ max_new_tokens=128,
33
+ do_sample=True,
34
+ temperature=0.7,
35
+ top_k=50
36
+ )
37
+
38
+ # Decode and clean up output
39
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+
41
+ # Extract only the model's response part
42
+ if "model" in decoded:
43
+ response = decoded.split("model")[-1].strip()
44
+ else:
45
+ response = decoded
46
+
47
+ return response
48
+
49
+ # 3. Simple Interface
50
+ demo = gr.Interface(
51
+ fn=chat_func,
52
+ inputs=gr.Textbox(label="Message the Security Bot"),
53
+ outputs=gr.Textbox(label="Response"),
54
+ title="Gemma Security Bot (Raw Test)"
55
+ )
56
+
57
+ if __name__ == "__main__":
58
+ demo.launch()