Optitransfer commited on
Commit
69fc7ff
·
verified ·
1 Parent(s): 4526530

Add Gradio chat interface

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ MODEL_ID = "Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1"
6
+
7
+ # Load model and tokenizer
8
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_ID,
11
+ torch_dtype=torch.float16,
12
+ device_map="auto",
13
+ trust_remote_code=True,
14
+ )
15
+
16
+ SYSTEM_PROMPT = (
17
+ "You are a helpful, harmless, and honest assistant created by Optitransfer. "
18
+ "You are a merged model combining knowledge from 9 source models spanning "
19
+ "4 architecture families. Answer questions clearly and step by step when "
20
+ "reasoning is needed."
21
+ )
22
+
23
+ def respond(message, history):
24
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
25
+ for user_msg, bot_msg in history:
26
+ if user_msg:
27
+ messages.append({"role": "user", "content": user_msg})
28
+ if bot_msg:
29
+ messages.append({"role": "assistant", "content": bot_msg})
30
+ messages.append({"role": "user", "content": message})
31
+
32
+ text = tokenizer.apply_chat_template(
33
+ messages, tokenize=False, add_generation_prompt=True
34
+ )
35
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
36
+
37
+ with torch.no_grad():
38
+ output_ids = model.generate(
39
+ **inputs,
40
+ max_new_tokens=1024,
41
+ temperature=0.7,
42
+ top_p=0.9,
43
+ repetition_penalty=1.1,
44
+ do_sample=True,
45
+ )
46
+
47
+ new_tokens = output_ids[0][inputs["input_ids"].shape[1]:]
48
+ response = tokenizer.decode(new_tokens, skip_special_tokens=True)
49
+ return response
50
+
51
+
52
+ DESCRIPTION = """
53
+ # 🤖 Qwen2.5-7B-Instruct — Borg Merge v1
54
+
55
+ **A single checkpoint created by merging 9 models from 4 architecture families.**
56
+
57
+ This model was created using training-free cross-family weight merging — no fine-tuning,
58
+ no distillation, no router. It lifts **GSM8K by +3.3 pp**, **ARC-Challenge by +3.2 pp**,
59
+ and **IFEval by +2.6 pp** over the unmerged Qwen2.5-7B-Instruct anchor.
60
+
61
+ Try asking it reasoning questions, math problems, or instruction-following tasks!
62
+
63
+ | Task | Anchor | This Model | Lift |
64
+ |---|---|---|---|
65
+ | GSM8K | 0.812 | **0.845** | **+3.3 pp** |
66
+ | ARC-Challenge | 0.526 | **0.557** | **+3.2 pp** |
67
+ | IFEval | 0.655 | **0.681** | **+2.6 pp** |
68
+ """
69
+
70
+ EXAMPLES = [
71
+ "Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What's the total discount percentage?",
72
+ "Explain the difference between supervised and unsupervised learning. Give a real-world example of each.",
73
+ "Write a Python function that finds the longest common subsequence of two strings.",
74
+ "If 5 machines can produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?",
75
+ "What are three key advantages of renewable energy over fossil fuels? Be specific.",
76
+ ]
77
+
78
+ demo = gr.ChatInterface(
79
+ fn=respond,
80
+ title="Borg Merge v1 — Cross-Family Merged Model",
81
+ description=DESCRIPTION,
82
+ examples=EXAMPLES,
83
+ theme=gr.themes.Soft(),
84
+ analytics_enabled=False,
85
+ chatbot=gr.Chatbot(height=500),
86
+ )
87
+
88
+ if __name__ == "__main__":
89
+ demo.launch()