Optitransfer commited on
Commit
be58545
·
verified ·
1 Parent(s): 467f9c1

Switch to gr.Blocks: welcome message, New Chat button, examples

Browse files
Files changed (1) hide show
  1. app.py +113 -43
app.py CHANGED
@@ -156,31 +156,55 @@ IDENTITY_PROMPT = (
156
  "Model card: https://huggingface.co/Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1\n"
157
  "Paper: https://ssrn.com/abstract=6545518\n"
158
  "crdt-merge: https://github.com/mgillr/crdt-merge\n"
159
- "Write-up: https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252"
 
160
  )
161
 
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  @spaces.GPU(duration=60)
164
- def chat(message, history, extra_instructions, max_tokens, temperature, top_p):
165
- """Generate a response. ZeroGPU allocates A10G for up to 60s."""
 
 
 
166
 
167
- # Always start with the identity prompt
168
  system_content = IDENTITY_PROMPT
169
  if extra_instructions and extra_instructions.strip():
170
  system_content += "\n\n" + extra_instructions.strip()
171
 
172
  messages = [{"role": "system", "content": system_content}]
 
 
 
173
 
174
- for turn in history:
175
- if isinstance(turn, dict):
176
- messages.append(turn)
177
- elif isinstance(turn, (list, tuple)) and len(turn) == 2:
178
- messages.append({"role": "user", "content": turn[0]})
179
- if turn[1]:
180
- messages.append({"role": "assistant", "content": turn[1]})
181
- messages.append({"role": "user", "content": message})
182
-
183
- # apply_chat_template -> plain string, then tokenize explicitly
184
  text = tokenizer.apply_chat_template(
185
  messages, tokenize=False, add_generation_prompt=True
186
  )
@@ -209,16 +233,25 @@ def chat(message, history, extra_instructions, max_tokens, temperature, top_p):
209
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
210
  thread.start()
211
 
212
- response = ""
 
213
  for token in streamer:
214
  if token:
215
- response += token
216
- yield response
 
 
 
217
 
218
  thread.join()
219
 
220
 
221
- # -- UI -------------------------------------------------------------------
 
 
 
 
 
222
  DESCRIPTION = """\
223
  **9 models. 4 architecture families. Zero training. One checkpoint.**
224
 
@@ -255,35 +288,72 @@ donor models while preserving the anchor's core capabilities.
255
  [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
256
  """
257
 
258
- EXAMPLES = [
259
- ["What are you and how were you built?"],
260
- ["Explain the crdt-merge paper and its technical details"],
261
- ["Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?"],
262
- ["Explain the difference between supervised and unsupervised learning. Give a real-world example of each."],
263
- ["Write a Python function that finds the longest common subsequence of two strings."],
264
- ["If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?"],
265
- ["What are three key advantages of renewable energy over fossil fuels? Be specific."],
266
- ]
267
 
268
- demo = gr.ChatInterface(
269
- fn=chat,
270
- title="Borg Merge v1",
271
- description=DESCRIPTION,
272
- additional_inputs=[
273
- gr.Textbox(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  value="",
275
  label="Additional instructions (optional)",
276
- placeholder="Add custom instructions on top of the built-in identity...",
277
  lines=2,
278
- ),
279
- gr.Slider(64, 2048, value=512, step=64, label="Max new tokens"),
280
- gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature"),
281
- gr.Slider(0.0, 1.0, value=0.9, step=0.05, label="Top-p"),
282
- ],
283
- examples=EXAMPLES,
284
- cache_examples=False,
285
- type="messages",
286
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
  if __name__ == "__main__":
289
  demo.launch()
 
156
  "Model card: https://huggingface.co/Optitransfer/Qwen2.5-7B-Instruct-borg-merge-v1\n"
157
  "Paper: https://ssrn.com/abstract=6545518\n"
158
  "crdt-merge: https://github.com/mgillr/crdt-merge\n"
159
+ "Write-up: https://medium.com/@rgillespie83/we-merged-9-models-from-4-"
160
+ "architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252"
161
  )
162
 
163
 
164
+ # -- Welcome state --------------------------------------------------------
165
+ WELCOME = [
166
+ {"role": "assistant", "content": "Hi, welcome to the collective, how can we help you"},
167
+ ]
168
+
169
+ EXAMPLE_LIST = [
170
+ "What are you and how were you built?",
171
+ "Explain the crdt-merge paper and its technical details",
172
+ "Solve step by step: A store offers 30% off, then an additional 20% off the sale price. What is the total discount percentage?",
173
+ "Explain the difference between supervised and unsupervised learning. Give a real-world example of each.",
174
+ "Write a Python function that finds the longest common subsequence of two strings.",
175
+ "If 5 machines produce 100 widgets in 4 hours, how many widgets can 8 machines produce in 6 hours?",
176
+ "What are three key advantages of renewable energy over fossil fuels? Be specific.",
177
+ ]
178
+
179
+
180
+ # -- Inference functions (identical logic to proven baseline) --------------
181
+
182
+ def add_user_message(message, history):
183
+ """Append user message to history, clear input box."""
184
+ if not message or not message.strip():
185
+ return "", history
186
+ history = history + [{"role": "user", "content": message.strip()}]
187
+ return "", history
188
+
189
+
190
  @spaces.GPU(duration=60)
191
+ def respond(history, extra_instructions, max_tokens, temperature, top_p):
192
+ """Generate a streamed response. ZeroGPU allocates A10G for up to 60s."""
193
+ if not history or history[-1]["role"] != "user":
194
+ yield history
195
+ return
196
 
197
+ # Build messages -- identity prompt always first
198
  system_content = IDENTITY_PROMPT
199
  if extra_instructions and extra_instructions.strip():
200
  system_content += "\n\n" + extra_instructions.strip()
201
 
202
  messages = [{"role": "system", "content": system_content}]
203
+ for msg in history:
204
+ if msg["role"] in ("user", "assistant") and msg.get("content"):
205
+ messages.append({"role": msg["role"], "content": msg["content"]})
206
 
207
+ # Tokenize (same approach as proven baseline)
 
 
 
 
 
 
 
 
 
208
  text = tokenizer.apply_chat_template(
209
  messages, tokenize=False, add_generation_prompt=True
210
  )
 
233
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
234
  thread.start()
235
 
236
+ # Stream tokens -- yield full history with growing assistant response
237
+ history = history + [{"role": "assistant", "content": ""}]
238
  for token in streamer:
239
  if token:
240
+ history[-1] = {
241
+ "role": "assistant",
242
+ "content": history[-1]["content"] + token,
243
+ }
244
+ yield history
245
 
246
  thread.join()
247
 
248
 
249
+ def new_chat():
250
+ """Reset conversation to welcome state."""
251
+ return [{"role": "assistant", "content": "Hi, welcome to the collective, how can we help you"}], ""
252
+
253
+
254
+ # -- UI description -------------------------------------------------------
255
  DESCRIPTION = """\
256
  **9 models. 4 architecture families. Zero training. One checkpoint.**
257
 
 
288
  [Write-up](https://medium.com/@rgillespie83/we-merged-9-models-from-4-architecture-families-into-one-and-it-beats-the-anchor-on-real-e6537dfa9252)
289
  """
290
 
 
 
 
 
 
 
 
 
 
291
 
292
+ # -- Build UI with gr.Blocks ---------------------------------------------
293
+
294
+ with gr.Blocks(title="Borg Merge v1", theme=gr.themes.Soft()) as demo:
295
+ gr.Markdown("# Borg Merge v1")
296
+ gr.Markdown(DESCRIPTION)
297
+
298
+ chatbot = gr.Chatbot(
299
+ value=list(WELCOME),
300
+ type="messages",
301
+ height=500,
302
+ show_copy_button=True,
303
+ )
304
+
305
+ with gr.Row():
306
+ msg = gr.Textbox(
307
+ show_label=False,
308
+ placeholder="Ask the collective anything...",
309
+ scale=8,
310
+ container=False,
311
+ )
312
+ send_btn = gr.Button("Send", variant="primary", scale=1)
313
+
314
+ with gr.Row():
315
+ new_chat_btn = gr.Button("New Chat", variant="secondary", size="sm")
316
+
317
+ with gr.Accordion("Settings", open=False):
318
+ extra = gr.Textbox(
319
  value="",
320
  label="Additional instructions (optional)",
321
+ placeholder="Custom instructions appended to the built-in identity...",
322
  lines=2,
323
+ )
324
+ max_tokens = gr.Slider(
325
+ 64, 2048, value=512, step=64, label="Max new tokens"
326
+ )
327
+ temperature = gr.Slider(
328
+ 0.0, 1.5, value=0.7, step=0.05, label="Temperature"
329
+ )
330
+ top_p = gr.Slider(
331
+ 0.0, 1.0, value=0.9, step=0.05, label="Top-p"
332
+ )
333
+
334
+ gr.Examples(
335
+ examples=EXAMPLE_LIST,
336
+ inputs=msg,
337
+ label="Try these examples",
338
+ )
339
+
340
+ # -- Wire events -------------------------------------------------------
341
+ gen_inputs = [chatbot, extra, max_tokens, temperature, top_p]
342
+
343
+ msg.submit(
344
+ add_user_message, [msg, chatbot], [msg, chatbot]
345
+ ).then(
346
+ respond, gen_inputs, chatbot
347
+ )
348
+
349
+ send_btn.click(
350
+ add_user_message, [msg, chatbot], [msg, chatbot]
351
+ ).then(
352
+ respond, gen_inputs, chatbot
353
+ )
354
+
355
+ new_chat_btn.click(new_chat, outputs=[chatbot, msg])
356
+
357
 
358
  if __name__ == "__main__":
359
  demo.launch()