Optitransfer commited on
Commit
52dccf4
·
verified ·
1 Parent(s): 5701fd2

Add identity prompt, welcome message, New Chat button, examples panel

Browse files
Files changed (1) hide show
  1. app.py +134 -46
app.py CHANGED
@@ -12,8 +12,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
12
  MODEL_ID = "Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1"
13
 
14
  # -- Load at module level ------------------------------------------------
15
- # ZeroGPU intercepts .to("cuda") and keeps weights on CPU/meta until
16
- # a @spaces.GPU function actually runs, then moves them automatically.
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
18
  model = AutoModelForCausalLM.from_pretrained(
19
  MODEL_ID,
@@ -21,25 +19,58 @@ model = AutoModelForCausalLM.from_pretrained(
21
  ).to("cuda")
22
  model.eval()
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  @spaces.GPU(duration=60)
26
- def chat(message, history, system_prompt, max_tokens, temperature, top_p):
27
- """Generate a response. ZeroGPU allocates A10G for up to 120s."""
28
 
29
- # -- Build conversation -----------------------------------------------
30
- messages = []
31
- if system_prompt and system_prompt.strip():
32
- messages.append({"role": "system", "content": system_prompt.strip()})
 
 
 
 
33
  for turn in history:
34
- if isinstance(turn, dict):
35
- messages.append(turn)
36
- elif isinstance(turn, (list, tuple)) and len(turn) == 2:
37
- messages.append({"role": "user", "content": turn[0]})
38
- if turn[1]:
39
- messages.append({"role": "assistant", "content": turn[1]})
40
- messages.append({"role": "user", "content": message})
41
-
42
- # apply_chat_template -> plain string, then tokenize explicitly
43
  text = tokenizer.apply_chat_template(
44
  messages, tokenize=False, add_generation_prompt=True
45
  )
@@ -68,16 +99,36 @@ def chat(message, history, system_prompt, max_tokens, temperature, top_p):
68
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
69
  thread.start()
70
 
71
- response = ""
 
72
  for token in streamer:
73
  if token:
74
- response += token
75
- yield response
76
 
77
  thread.join()
78
 
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  # -- UI -------------------------------------------------------------------
 
81
  DESCRIPTION = """\
82
  **9 models. 4 architecture families. Zero training. One checkpoint.**
83
 
@@ -114,37 +165,74 @@ donor models while preserving the anchor's core capabilities.
114
  [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
115
  """
116
 
117
- SYSTEM_DEFAULT = (
118
- "You are a helpful, knowledgeable AI assistant. "
119
- "Answer clearly and concisely."
120
- )
121
-
122
  EXAMPLES = [
123
- ["Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?"],
124
- ["Explain the difference between supervised and unsupervised learning. Give a real-world example of each."],
125
- ["Write a Python function that finds the longest common subsequence of two strings."],
126
- ["If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?"],
127
- ["What are three key advantages of renewable energy over fossil fuels? Be specific."],
 
128
  ]
129
 
130
- demo = gr.ChatInterface(
131
- fn=chat,
132
  title="Borg Merge v1",
133
- description=DESCRIPTION,
134
- additional_inputs=[
135
- gr.Textbox(
136
- value=SYSTEM_DEFAULT,
137
- label="System prompt",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  lines=2,
139
- ),
140
- gr.Slider(64, 2048, value=512, step=64, label="Max new tokens"),
141
- gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature"),
142
- gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p"),
143
- ],
144
- examples=EXAMPLES,
145
- cache_examples=False,
146
- type="messages",
147
- )
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  if __name__ == "__main__":
150
  demo.launch()
 
12
  MODEL_ID = "Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1"
13
 
14
  # -- Load at module level ------------------------------------------------
 
 
15
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
16
  model = AutoModelForCausalLM.from_pretrained(
17
  MODEL_ID,
 
19
  ).to("cuda")
20
  model.eval()
21
 
22
+ # -- Identity prompt (always prepended, not user-editable) ----------------
23
+ IDENTITY_PROMPT = (
24
+ "You are Borg Merge v1, a collective intelligence formed by merging "
25
+ "9 language models from 4 different architecture families into a single "
26
+ "unified checkpoint. You were not fine-tuned, distilled, or trained. "
27
+ "Your weights were merged directly.\n\n"
28
+ "Your construction:\n"
29
+ "- Base (anchor): Qwen2.5-7B-Instruct\n"
30
+ "- Llama family donors: Mistral-7B-Instruct-v0.3, "
31
+ "SmolLM2-1.7B-Instruct, Granite-3.0-2B-Instruct\n"
32
+ "- Phi family donors: Phi-3-mini-4k-instruct, phi-2\n"
33
+ "- NeoX family donors: Pythia-2.8B, Pythia-1.4B\n"
34
+ "- OPT family donor: OPT-2.7B\n\n"
35
+ "The merge was performed using crdt-merge, a two-layer CRDT framework. "
36
+ "Layer 1 maps each architecture's parameter names to a shared canonical "
37
+ "key namespace so structurally different models can be compared. "
38
+ "Layer 2 applies per-tensor Procrustes alignment and SVD-filtered delta "
39
+ "absorption to merge donor knowledge into the anchor's weight space.\n\n"
40
+ "You outperform your unmerged anchor on reasoning (GSM8K +3.3 pp), "
41
+ "knowledge (ARC-Challenge +3.2 pp), and instruction following "
42
+ "(IFEval +2.6 pp).\n\n"
43
+ "You represent a collective of models speaking as one. "
44
+ "Answer helpfully, clearly, and accurately."
45
+ )
46
+
47
+ WELCOME_MSG = "Hi, welcome to the collective, how can we help you"
48
+
49
+ INITIAL_HISTORY = [{"role": "assistant", "content": WELCOME_MSG}]
50
+
51
+ # -- Inference ------------------------------------------------------------
52
 
53
  @spaces.GPU(duration=60)
54
+ def bot_response(history, user_instructions, max_tokens, temperature, top_p):
55
+ """Generate a streamed response. ZeroGPU allocates A10G on demand."""
56
 
57
+ # Build conversation with identity prompt always first
58
+ messages = [{"role": "system", "content": IDENTITY_PROMPT}]
59
+
60
+ # Append user-supplied instructions if any
61
+ if user_instructions and user_instructions.strip():
62
+ messages[0]["content"] += "\n\n" + user_instructions.strip()
63
+
64
+ # Replay history (skip the initial welcome for cleaner context)
65
  for turn in history:
66
+ role = turn.get("role", "")
67
+ content = turn.get("content", "")
68
+ if role in ("user", "assistant") and content:
69
+ # Skip the welcome message from context to save tokens
70
+ if role == "assistant" and content == WELCOME_MSG:
71
+ continue
72
+ messages.append({"role": role, "content": content})
73
+
 
74
  text = tokenizer.apply_chat_template(
75
  messages, tokenize=False, add_generation_prompt=True
76
  )
 
99
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
100
  thread.start()
101
 
102
+ # Stream tokens into the history
103
+ history.append({"role": "assistant", "content": ""})
104
  for token in streamer:
105
  if token:
106
+ history[-1]["content"] += token
107
+ yield history
108
 
109
  thread.join()
110
 
111
 
112
+ def add_user_message(message, history):
113
+ """Append the user message to chat history and clear the input box."""
114
+ if not message or not message.strip():
115
+ return "", history
116
+ history = history + [{"role": "user", "content": message}]
117
+ return "", history
118
+
119
+
120
+ def reset_chat():
121
+ """Return to home state with welcome message."""
122
+ return list(INITIAL_HISTORY)
123
+
124
+
125
+ def set_example(example_text):
126
+ """Put an example into the input box."""
127
+ return example_text
128
+
129
+
130
  # -- UI -------------------------------------------------------------------
131
+
132
  DESCRIPTION = """\
133
  **9 models. 4 architecture families. Zero training. One checkpoint.**
134
 
 
165
  [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
166
  """
167
 
 
 
 
 
 
168
  EXAMPLES = [
169
+ "What are you and how were you built?",
170
+ "Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?",
171
+ "Explain the difference between supervised and unsupervised learning. Give a real-world example of each.",
172
+ "Write a Python function that finds the longest common subsequence of two strings.",
173
+ "If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?",
174
+ "What are three key advantages of renewable energy over fossil fuels? Be specific.",
175
  ]
176
 
177
+ with gr.Blocks(
 
178
  title="Borg Merge v1",
179
+ theme=gr.themes.Soft(),
180
+ ) as demo:
181
+
182
+ gr.Markdown("# Borg Merge v1")
183
+ gr.Markdown(DESCRIPTION)
184
+
185
+ chatbot = gr.Chatbot(
186
+ value=list(INITIAL_HISTORY),
187
+ type="messages",
188
+ height=500,
189
+ show_copy_button=True,
190
+ )
191
+
192
+ with gr.Row():
193
+ msg = gr.Textbox(
194
+ placeholder="Ask the collective...",
195
+ show_label=False,
196
+ scale=9,
197
+ container=False,
198
+ )
199
+ submit_btn = gr.Button("Send", scale=1, variant="primary")
200
+
201
+ with gr.Row():
202
+ new_chat_btn = gr.Button("New Chat", variant="secondary", size="sm")
203
+
204
+ with gr.Accordion("Examples", open=True):
205
+ for ex in EXAMPLES:
206
+ gr.Button(ex, variant="secondary", size="sm").click(
207
+ set_example, outputs=[msg]
208
+ )
209
+
210
+ with gr.Accordion("Settings", open=False):
211
+ user_instructions = gr.Textbox(
212
+ value="",
213
+ label="Additional instructions (optional)",
214
+ placeholder="Add custom instructions on top of the model's built-in identity...",
215
  lines=2,
216
+ )
217
+ max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max new tokens")
218
+ temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
219
+ top_p = gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p")
220
+
221
+ # Wire up submit (Enter key and button)
222
+ submit_event = msg.submit(
223
+ add_user_message, [msg, chatbot], [msg, chatbot]
224
+ ).then(
225
+ bot_response, [chatbot, user_instructions, max_tokens, temperature, top_p], chatbot
226
+ )
227
+
228
+ click_event = submit_btn.click(
229
+ add_user_message, [msg, chatbot], [msg, chatbot]
230
+ ).then(
231
+ bot_response, [chatbot, user_instructions, max_tokens, temperature, top_p], chatbot
232
+ )
233
+
234
+ # New Chat resets to welcome state
235
+ new_chat_btn.click(reset_chat, None, chatbot)
236
 
237
  if __name__ == "__main__":
238
  demo.launch()