MiniCPM-V-4.6-Demo

Running on Zero

App Files Files Community

userisuser Cursor commited on 8 days ago

Commit

cae3a38

1 Parent(s): 57ab14f

Load both MiniCPM-V 4.6 variants at startup

Browse files

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (3) hide show

README.md +1 -0
app.py +14 -4
v46/app.py +39 -19

README.md CHANGED Viewed

@@ -9,6 +9,7 @@ python_version: "3.12"
 app_file: app.py
 models:
 - openbmb/MiniCPM-V-4.6
 pinned: false
 short_description: MiniCPM-V 4.6 Ultra-Efficient Multimodal AI
 ---

 app_file: app.py
 models:
 - openbmb/MiniCPM-V-4.6
+- openbmb/MiniCPM-V-4.6-Thinking
 pinned: false
 short_description: MiniCPM-V 4.6 Ultra-Efficient Multimodal AI
 ---

app.py CHANGED Viewed

@@ -3,15 +3,25 @@ import os
 import spaces
 from v46 import app as v46_app
-MODEL_ID = os.environ.get("V46_MODEL_ID", "openbmb/MiniCPM-V-4.6")
 DEVICE = os.environ.get("V46_DEVICE", "cuda")
 DEFAULT_THINKING = os.environ.get("V46_DEFAULT_THINKING", "0") == "1"
 GPU_DURATION = int(os.environ.get("V46_GPU_DURATION", "300"))
-print(f"[official-space] lazy model config: {MODEL_ID} on {DEVICE}", flush=True)
-v46_app.configure_lazy_models(instruct_path=MODEL_ID, device=DEVICE)
-# ZeroGPU exposes CUDA only while decorated callbacks are running.
 v46_app.native_chat_respond = spaces.GPU(duration=GPU_DURATION)(v46_app.native_chat_respond)
 v46_app.native_fewshot_respond = spaces.GPU(duration=GPU_DURATION)(v46_app.native_fewshot_respond)

 import spaces
 from v46 import app as v46_app
+INSTRUCT_MODEL_ID = os.environ.get("V46_INSTRUCT_MODEL_ID", "openbmb/MiniCPM-V-4.6")
+THINKING_MODEL_ID = os.environ.get("V46_THINKING_MODEL_ID", "openbmb/MiniCPM-V-4.6-Thinking")
 DEVICE = os.environ.get("V46_DEVICE", "cuda")
 DEFAULT_THINKING = os.environ.get("V46_DEFAULT_THINKING", "0") == "1"
 GPU_DURATION = int(os.environ.get("V46_GPU_DURATION", "300"))
+print(
+    f"[official-space] loading models at module startup: "
+    f"instruct={INSTRUCT_MODEL_ID}, thinking={THINKING_MODEL_ID}, device={DEVICE}",
+    flush=True,
+)
+v46_app.load_models(
+    instruct_path=INSTRUCT_MODEL_ID,
+    thinking_path=THINKING_MODEL_ID,
+    device=DEVICE,
+)
+# ZeroGPU docs recommend placing models on cuda at module level and
+# decorating GPU-dependent callbacks.
 v46_app.native_chat_respond = spaces.GPU(duration=GPU_DURATION)(v46_app.native_chat_respond)
 v46_app.native_fewshot_respond = spaces.GPU(duration=GPU_DURATION)(v46_app.native_fewshot_respond)

v46/app.py CHANGED Viewed

@@ -1099,7 +1099,6 @@ def on_thinking_toggle(thinking_mode, chat_bot, app_session):
         return gr.update(), gr.update(), app_session, \
                gr.update(), gr.update(), gr.update()
-    gr.Info(f"Switched to '{new_variant}' model, history cleared.")
     app_session["ctx"] = []
     app_session["images_cnt"] = 0
     app_session["videos_cnt"] = 0
@@ -1302,6 +1301,20 @@ def native_remove_last_turn(chat_messages, app_cfg):
     return last_turn, chat_messages, app_cfg
 def native_chat_respond(user_input, chat_messages, app_cfg,
                         params_form, thinking_mode, streaming_mode,
                         max_new_tokens, temperature, top_p, top_k, max_frames,
@@ -1320,23 +1333,24 @@ def native_chat_respond(user_input, chat_messages, app_cfg,
         yield gr.update(), chat_messages, app_cfg, gr.update(visible=False)
         return
     chat_messages = list(chat_messages or [])
     display_start = len(chat_messages)
     chat_messages.extend(native_display_user_messages(text, files))
     assistant_index = len(chat_messages)
-    chat_messages.append({"role": "assistant", "content": "⏳ Processing…"})
     yield native_empty_input(), chat_messages, app_cfg, gr.update(visible=True)
     ctx = app_cfg.get("ctx", [])
     messages = [{"role": item["role"], "content": copy.copy(item["content"])} for item in ctx]
     messages.append({"role": "user", "content": user_content})
-    sampling = (params_form == "Sampling")
-    if not sampling:
-        streaming_mode = False
-    use_thinking = bool(thinking_mode)
-    variant = pick_variant(use_thinking)
-    enable_thinking = use_thinking and variant == "thinking"
-    app_cfg["current_variant"] = variant
     print(f"[native] respond variant={variant} enable_thinking={enable_thinking}", flush=True)
     try:
@@ -1464,23 +1478,24 @@ def native_fewshot_respond(_image, _user_message, _chat_messages, _app_cfg,
         yield _image, _user_message, "", _chat_messages, _app_cfg, gr.update(visible=False)
         return
     _chat_messages = list(_chat_messages or [])
     display_start = len(_chat_messages)
     _chat_messages.extend(native_display_user_messages(_user_message or "", files))
     assistant_index = len(_chat_messages)
-    _chat_messages.append({"role": "assistant", "content": "⏳ Processing…"})
     yield None, "", "", _chat_messages, _app_cfg, gr.update(visible=True)
     ctx = list(_app_cfg.get("ctx", []))
     messages = [{"role": item["role"], "content": copy.copy(item["content"])} for item in ctx]
     messages.append({"role": "user", "content": user_content})
-    sampling = (params_form == "Sampling")
-    if not sampling:
-        streaming_mode = False
-    use_thinking = bool(thinking_mode)
-    variant = pick_variant(use_thinking)
-    enable_thinking = use_thinking and variant == "thinking"
-    _app_cfg["current_variant"] = variant
     print(f"[native] fewshot variant={variant} enable_thinking={enable_thinking}", flush=True)
     try:
@@ -1600,8 +1615,6 @@ def native_clear_all(txt_message, chat_messages, app_session):
 def native_on_thinking_toggle(thinking_mode, chat_messages, app_session):
     target_variant = pick_variant(bool(thinking_mode))
-    if target_variant != app_session.get("current_variant"):
-        gr.Info(f"Switched to '{target_variant}' model, history cleared.")
     app_session["current_variant"] = target_variant
     return native_clear_all(None, chat_messages, app_session)
@@ -1778,6 +1791,7 @@ def build_ui(model_display_name: str, default_thinking: bool):
                                  params_form, thinking_mode, streaming_mode,
                                  max_new_tokens, temperature, top_p, top_k, max_frames],
                                 [txt_message, chat_bot, app_session, stop_btn],
                             )
                         with gr.Tab("Few Shot") as fewshot_tab:
@@ -1809,6 +1823,7 @@ def build_ui(model_display_name: str, default_thinking: bool):
                                  chat_bot, app_session],
                                 [image_input, user_message, assistant_message,
                                  chat_bot, app_session],
                             )
                             generate_btn.click(
                                 native_fewshot_respond,
@@ -1817,6 +1832,7 @@ def build_ui(model_display_name: str, default_thinking: bool):
                                  max_new_tokens, temperature, top_p, top_k, max_frames],
                                 [image_input, user_message, assistant_message,
                                  chat_bot, app_session, stop_btn],
                             )
                         # Tab switch events: remember current tab + clear state
@@ -1853,6 +1869,7 @@ def build_ui(model_display_name: str, default_thinking: bool):
                             inputs=[thinking_mode, chat_bot, app_session],
                             outputs=[txt_message, chat_bot, app_session,
                                      image_input, user_message, assistant_message],
                         )
                         regenerate_btn.click(
                             native_regenerate_clicked,
@@ -1860,17 +1877,20 @@ def build_ui(model_display_name: str, default_thinking: bool):
                              params_form, thinking_mode, streaming_mode,
                              max_new_tokens, temperature, top_p, top_k, max_frames],
                             [txt_message, chat_bot, app_session, stop_btn],
                         )
                         clear_btn.click(
                             native_clear_all,
                             [txt_message, chat_bot, app_session],
                             [txt_message, chat_bot, app_session,
                              image_input, user_message, assistant_message],
                         )
                         stop_btn.click(
                             stop_clicked,
                             [app_session],
                             [app_session, stop_btn],
                         )
             with gr.Tab("How to use"):

         return gr.update(), gr.update(), app_session, \
                gr.update(), gr.update(), gr.update()
     app_session["ctx"] = []
     app_session["images_cnt"] = 0
     app_session["videos_cnt"] = 0
     return last_turn, chat_messages, app_cfg
+def model_call_status_message(variant: str) -> str:
+    if variant in MODELS:
+        return "⏳ Processing…"
+    if variant == "thinking":
+        return (
+            "⏳ Loading the Thinking model. "
+            "Please wait…"
+        )
+    return (
+        "⏳ Loading the model. "
+        "Please wait…"
+    )
 def native_chat_respond(user_input, chat_messages, app_cfg,
                         params_form, thinking_mode, streaming_mode,
                         max_new_tokens, temperature, top_p, top_k, max_frames,
         yield gr.update(), chat_messages, app_cfg, gr.update(visible=False)
         return
+    sampling = (params_form == "Sampling")
+    if not sampling:
+        streaming_mode = False
+    use_thinking = bool(thinking_mode)
+    variant = pick_variant(use_thinking)
+    enable_thinking = use_thinking and variant == "thinking"
+    app_cfg["current_variant"] = variant
     chat_messages = list(chat_messages or [])
     display_start = len(chat_messages)
     chat_messages.extend(native_display_user_messages(text, files))
     assistant_index = len(chat_messages)
+    chat_messages.append({"role": "assistant", "content": model_call_status_message(variant)})
     yield native_empty_input(), chat_messages, app_cfg, gr.update(visible=True)
     ctx = app_cfg.get("ctx", [])
     messages = [{"role": item["role"], "content": copy.copy(item["content"])} for item in ctx]
     messages.append({"role": "user", "content": user_content})
     print(f"[native] respond variant={variant} enable_thinking={enable_thinking}", flush=True)
     try:
         yield _image, _user_message, "", _chat_messages, _app_cfg, gr.update(visible=False)
         return
+    sampling = (params_form == "Sampling")
+    if not sampling:
+        streaming_mode = False
+    use_thinking = bool(thinking_mode)
+    variant = pick_variant(use_thinking)
+    enable_thinking = use_thinking and variant == "thinking"
+    _app_cfg["current_variant"] = variant
     _chat_messages = list(_chat_messages or [])
     display_start = len(_chat_messages)
     _chat_messages.extend(native_display_user_messages(_user_message or "", files))
     assistant_index = len(_chat_messages)
+    _chat_messages.append({"role": "assistant", "content": model_call_status_message(variant)})
     yield None, "", "", _chat_messages, _app_cfg, gr.update(visible=True)
     ctx = list(_app_cfg.get("ctx", []))
     messages = [{"role": item["role"], "content": copy.copy(item["content"])} for item in ctx]
     messages.append({"role": "user", "content": user_content})
     print(f"[native] fewshot variant={variant} enable_thinking={enable_thinking}", flush=True)
     try:
 def native_on_thinking_toggle(thinking_mode, chat_messages, app_session):
     target_variant = pick_variant(bool(thinking_mode))
     app_session["current_variant"] = target_variant
     return native_clear_all(None, chat_messages, app_session)
                                  params_form, thinking_mode, streaming_mode,
                                  max_new_tokens, temperature, top_p, top_k, max_frames],
                                 [txt_message, chat_bot, app_session, stop_btn],
+                                show_progress="hidden",
                             )
                         with gr.Tab("Few Shot") as fewshot_tab:
                                  chat_bot, app_session],
                                 [image_input, user_message, assistant_message,
                                  chat_bot, app_session],
+                                show_progress="hidden",
                             )
                             generate_btn.click(
                                 native_fewshot_respond,
                                  max_new_tokens, temperature, top_p, top_k, max_frames],
                                 [image_input, user_message, assistant_message,
                                  chat_bot, app_session, stop_btn],
+                                show_progress="hidden",
                             )
                         # Tab switch events: remember current tab + clear state
                             inputs=[thinking_mode, chat_bot, app_session],
                             outputs=[txt_message, chat_bot, app_session,
                                      image_input, user_message, assistant_message],
+                            show_progress="hidden",
                         )
                         regenerate_btn.click(
                             native_regenerate_clicked,
                              params_form, thinking_mode, streaming_mode,
                              max_new_tokens, temperature, top_p, top_k, max_frames],
                             [txt_message, chat_bot, app_session, stop_btn],
+                            show_progress="hidden",
                         )
                         clear_btn.click(
                             native_clear_all,
                             [txt_message, chat_bot, app_session],
                             [txt_message, chat_bot, app_session,
                              image_input, user_message, assistant_message],
+                            show_progress="hidden",
                         )
                         stop_btn.click(
                             stop_clicked,
                             [app_session],
                             [app_session, stop_btn],
+                            show_progress="hidden",
                         )
             with gr.Tab("How to use"):