Spaces:
Running on Zero
Running on Zero
Commit ·
cae3a38
1
Parent(s): 57ab14f
Load both MiniCPM-V 4.6 variants at startup
Browse filesCo-authored-by: Cursor <cursoragent@cursor.com>
- README.md +1 -0
- app.py +14 -4
- v46/app.py +39 -19
README.md
CHANGED
|
@@ -9,6 +9,7 @@ python_version: "3.12"
|
|
| 9 |
app_file: app.py
|
| 10 |
models:
|
| 11 |
- openbmb/MiniCPM-V-4.6
|
|
|
|
| 12 |
pinned: false
|
| 13 |
short_description: MiniCPM-V 4.6 Ultra-Efficient Multimodal AI
|
| 14 |
---
|
|
|
|
| 9 |
app_file: app.py
|
| 10 |
models:
|
| 11 |
- openbmb/MiniCPM-V-4.6
|
| 12 |
+
- openbmb/MiniCPM-V-4.6-Thinking
|
| 13 |
pinned: false
|
| 14 |
short_description: MiniCPM-V 4.6 Ultra-Efficient Multimodal AI
|
| 15 |
---
|
app.py
CHANGED
|
@@ -3,15 +3,25 @@ import os
|
|
| 3 |
import spaces
|
| 4 |
from v46 import app as v46_app
|
| 5 |
|
| 6 |
-
|
|
|
|
| 7 |
DEVICE = os.environ.get("V46_DEVICE", "cuda")
|
| 8 |
DEFAULT_THINKING = os.environ.get("V46_DEFAULT_THINKING", "0") == "1"
|
| 9 |
GPU_DURATION = int(os.environ.get("V46_GPU_DURATION", "300"))
|
| 10 |
|
| 11 |
-
print(
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
# ZeroGPU
|
|
|
|
| 15 |
v46_app.native_chat_respond = spaces.GPU(duration=GPU_DURATION)(v46_app.native_chat_respond)
|
| 16 |
v46_app.native_fewshot_respond = spaces.GPU(duration=GPU_DURATION)(v46_app.native_fewshot_respond)
|
| 17 |
|
|
|
|
| 3 |
import spaces
|
| 4 |
from v46 import app as v46_app
|
| 5 |
|
| 6 |
+
INSTRUCT_MODEL_ID = os.environ.get("V46_INSTRUCT_MODEL_ID", "openbmb/MiniCPM-V-4.6")
|
| 7 |
+
THINKING_MODEL_ID = os.environ.get("V46_THINKING_MODEL_ID", "openbmb/MiniCPM-V-4.6-Thinking")
|
| 8 |
DEVICE = os.environ.get("V46_DEVICE", "cuda")
|
| 9 |
DEFAULT_THINKING = os.environ.get("V46_DEFAULT_THINKING", "0") == "1"
|
| 10 |
GPU_DURATION = int(os.environ.get("V46_GPU_DURATION", "300"))
|
| 11 |
|
| 12 |
+
print(
|
| 13 |
+
f"[official-space] loading models at module startup: "
|
| 14 |
+
f"instruct={INSTRUCT_MODEL_ID}, thinking={THINKING_MODEL_ID}, device={DEVICE}",
|
| 15 |
+
flush=True,
|
| 16 |
+
)
|
| 17 |
+
v46_app.load_models(
|
| 18 |
+
instruct_path=INSTRUCT_MODEL_ID,
|
| 19 |
+
thinking_path=THINKING_MODEL_ID,
|
| 20 |
+
device=DEVICE,
|
| 21 |
+
)
|
| 22 |
|
| 23 |
+
# ZeroGPU docs recommend placing models on cuda at module level and
|
| 24 |
+
# decorating GPU-dependent callbacks.
|
| 25 |
v46_app.native_chat_respond = spaces.GPU(duration=GPU_DURATION)(v46_app.native_chat_respond)
|
| 26 |
v46_app.native_fewshot_respond = spaces.GPU(duration=GPU_DURATION)(v46_app.native_fewshot_respond)
|
| 27 |
|
v46/app.py
CHANGED
|
@@ -1099,7 +1099,6 @@ def on_thinking_toggle(thinking_mode, chat_bot, app_session):
|
|
| 1099 |
return gr.update(), gr.update(), app_session, \
|
| 1100 |
gr.update(), gr.update(), gr.update()
|
| 1101 |
|
| 1102 |
-
gr.Info(f"Switched to '{new_variant}' model, history cleared.")
|
| 1103 |
app_session["ctx"] = []
|
| 1104 |
app_session["images_cnt"] = 0
|
| 1105 |
app_session["videos_cnt"] = 0
|
|
@@ -1302,6 +1301,20 @@ def native_remove_last_turn(chat_messages, app_cfg):
|
|
| 1302 |
return last_turn, chat_messages, app_cfg
|
| 1303 |
|
| 1304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1305 |
def native_chat_respond(user_input, chat_messages, app_cfg,
|
| 1306 |
params_form, thinking_mode, streaming_mode,
|
| 1307 |
max_new_tokens, temperature, top_p, top_k, max_frames,
|
|
@@ -1320,23 +1333,24 @@ def native_chat_respond(user_input, chat_messages, app_cfg,
|
|
| 1320 |
yield gr.update(), chat_messages, app_cfg, gr.update(visible=False)
|
| 1321 |
return
|
| 1322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1323 |
chat_messages = list(chat_messages or [])
|
| 1324 |
display_start = len(chat_messages)
|
| 1325 |
chat_messages.extend(native_display_user_messages(text, files))
|
| 1326 |
assistant_index = len(chat_messages)
|
| 1327 |
-
chat_messages.append({"role": "assistant", "content":
|
| 1328 |
yield native_empty_input(), chat_messages, app_cfg, gr.update(visible=True)
|
| 1329 |
|
| 1330 |
ctx = app_cfg.get("ctx", [])
|
| 1331 |
messages = [{"role": item["role"], "content": copy.copy(item["content"])} for item in ctx]
|
| 1332 |
messages.append({"role": "user", "content": user_content})
|
| 1333 |
-
sampling = (params_form == "Sampling")
|
| 1334 |
-
if not sampling:
|
| 1335 |
-
streaming_mode = False
|
| 1336 |
-
use_thinking = bool(thinking_mode)
|
| 1337 |
-
variant = pick_variant(use_thinking)
|
| 1338 |
-
enable_thinking = use_thinking and variant == "thinking"
|
| 1339 |
-
app_cfg["current_variant"] = variant
|
| 1340 |
print(f"[native] respond variant={variant} enable_thinking={enable_thinking}", flush=True)
|
| 1341 |
|
| 1342 |
try:
|
|
@@ -1464,23 +1478,24 @@ def native_fewshot_respond(_image, _user_message, _chat_messages, _app_cfg,
|
|
| 1464 |
yield _image, _user_message, "", _chat_messages, _app_cfg, gr.update(visible=False)
|
| 1465 |
return
|
| 1466 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1467 |
_chat_messages = list(_chat_messages or [])
|
| 1468 |
display_start = len(_chat_messages)
|
| 1469 |
_chat_messages.extend(native_display_user_messages(_user_message or "", files))
|
| 1470 |
assistant_index = len(_chat_messages)
|
| 1471 |
-
_chat_messages.append({"role": "assistant", "content":
|
| 1472 |
yield None, "", "", _chat_messages, _app_cfg, gr.update(visible=True)
|
| 1473 |
|
| 1474 |
ctx = list(_app_cfg.get("ctx", []))
|
| 1475 |
messages = [{"role": item["role"], "content": copy.copy(item["content"])} for item in ctx]
|
| 1476 |
messages.append({"role": "user", "content": user_content})
|
| 1477 |
-
sampling = (params_form == "Sampling")
|
| 1478 |
-
if not sampling:
|
| 1479 |
-
streaming_mode = False
|
| 1480 |
-
use_thinking = bool(thinking_mode)
|
| 1481 |
-
variant = pick_variant(use_thinking)
|
| 1482 |
-
enable_thinking = use_thinking and variant == "thinking"
|
| 1483 |
-
_app_cfg["current_variant"] = variant
|
| 1484 |
print(f"[native] fewshot variant={variant} enable_thinking={enable_thinking}", flush=True)
|
| 1485 |
|
| 1486 |
try:
|
|
@@ -1600,8 +1615,6 @@ def native_clear_all(txt_message, chat_messages, app_session):
|
|
| 1600 |
|
| 1601 |
def native_on_thinking_toggle(thinking_mode, chat_messages, app_session):
|
| 1602 |
target_variant = pick_variant(bool(thinking_mode))
|
| 1603 |
-
if target_variant != app_session.get("current_variant"):
|
| 1604 |
-
gr.Info(f"Switched to '{target_variant}' model, history cleared.")
|
| 1605 |
app_session["current_variant"] = target_variant
|
| 1606 |
return native_clear_all(None, chat_messages, app_session)
|
| 1607 |
|
|
@@ -1778,6 +1791,7 @@ def build_ui(model_display_name: str, default_thinking: bool):
|
|
| 1778 |
params_form, thinking_mode, streaming_mode,
|
| 1779 |
max_new_tokens, temperature, top_p, top_k, max_frames],
|
| 1780 |
[txt_message, chat_bot, app_session, stop_btn],
|
|
|
|
| 1781 |
)
|
| 1782 |
|
| 1783 |
with gr.Tab("Few Shot") as fewshot_tab:
|
|
@@ -1809,6 +1823,7 @@ def build_ui(model_display_name: str, default_thinking: bool):
|
|
| 1809 |
chat_bot, app_session],
|
| 1810 |
[image_input, user_message, assistant_message,
|
| 1811 |
chat_bot, app_session],
|
|
|
|
| 1812 |
)
|
| 1813 |
generate_btn.click(
|
| 1814 |
native_fewshot_respond,
|
|
@@ -1817,6 +1832,7 @@ def build_ui(model_display_name: str, default_thinking: bool):
|
|
| 1817 |
max_new_tokens, temperature, top_p, top_k, max_frames],
|
| 1818 |
[image_input, user_message, assistant_message,
|
| 1819 |
chat_bot, app_session, stop_btn],
|
|
|
|
| 1820 |
)
|
| 1821 |
|
| 1822 |
# Tab switch events: remember current tab + clear state
|
|
@@ -1853,6 +1869,7 @@ def build_ui(model_display_name: str, default_thinking: bool):
|
|
| 1853 |
inputs=[thinking_mode, chat_bot, app_session],
|
| 1854 |
outputs=[txt_message, chat_bot, app_session,
|
| 1855 |
image_input, user_message, assistant_message],
|
|
|
|
| 1856 |
)
|
| 1857 |
regenerate_btn.click(
|
| 1858 |
native_regenerate_clicked,
|
|
@@ -1860,17 +1877,20 @@ def build_ui(model_display_name: str, default_thinking: bool):
|
|
| 1860 |
params_form, thinking_mode, streaming_mode,
|
| 1861 |
max_new_tokens, temperature, top_p, top_k, max_frames],
|
| 1862 |
[txt_message, chat_bot, app_session, stop_btn],
|
|
|
|
| 1863 |
)
|
| 1864 |
clear_btn.click(
|
| 1865 |
native_clear_all,
|
| 1866 |
[txt_message, chat_bot, app_session],
|
| 1867 |
[txt_message, chat_bot, app_session,
|
| 1868 |
image_input, user_message, assistant_message],
|
|
|
|
| 1869 |
)
|
| 1870 |
stop_btn.click(
|
| 1871 |
stop_clicked,
|
| 1872 |
[app_session],
|
| 1873 |
[app_session, stop_btn],
|
|
|
|
| 1874 |
)
|
| 1875 |
|
| 1876 |
with gr.Tab("How to use"):
|
|
|
|
| 1099 |
return gr.update(), gr.update(), app_session, \
|
| 1100 |
gr.update(), gr.update(), gr.update()
|
| 1101 |
|
|
|
|
| 1102 |
app_session["ctx"] = []
|
| 1103 |
app_session["images_cnt"] = 0
|
| 1104 |
app_session["videos_cnt"] = 0
|
|
|
|
| 1301 |
return last_turn, chat_messages, app_cfg
|
| 1302 |
|
| 1303 |
|
| 1304 |
+
def model_call_status_message(variant: str) -> str:
|
| 1305 |
+
if variant in MODELS:
|
| 1306 |
+
return "⏳ Processing…"
|
| 1307 |
+
if variant == "thinking":
|
| 1308 |
+
return (
|
| 1309 |
+
"⏳ Loading the Thinking model. "
|
| 1310 |
+
"Please wait…"
|
| 1311 |
+
)
|
| 1312 |
+
return (
|
| 1313 |
+
"⏳ Loading the model. "
|
| 1314 |
+
"Please wait…"
|
| 1315 |
+
)
|
| 1316 |
+
|
| 1317 |
+
|
| 1318 |
def native_chat_respond(user_input, chat_messages, app_cfg,
|
| 1319 |
params_form, thinking_mode, streaming_mode,
|
| 1320 |
max_new_tokens, temperature, top_p, top_k, max_frames,
|
|
|
|
| 1333 |
yield gr.update(), chat_messages, app_cfg, gr.update(visible=False)
|
| 1334 |
return
|
| 1335 |
|
| 1336 |
+
sampling = (params_form == "Sampling")
|
| 1337 |
+
if not sampling:
|
| 1338 |
+
streaming_mode = False
|
| 1339 |
+
use_thinking = bool(thinking_mode)
|
| 1340 |
+
variant = pick_variant(use_thinking)
|
| 1341 |
+
enable_thinking = use_thinking and variant == "thinking"
|
| 1342 |
+
app_cfg["current_variant"] = variant
|
| 1343 |
+
|
| 1344 |
chat_messages = list(chat_messages or [])
|
| 1345 |
display_start = len(chat_messages)
|
| 1346 |
chat_messages.extend(native_display_user_messages(text, files))
|
| 1347 |
assistant_index = len(chat_messages)
|
| 1348 |
+
chat_messages.append({"role": "assistant", "content": model_call_status_message(variant)})
|
| 1349 |
yield native_empty_input(), chat_messages, app_cfg, gr.update(visible=True)
|
| 1350 |
|
| 1351 |
ctx = app_cfg.get("ctx", [])
|
| 1352 |
messages = [{"role": item["role"], "content": copy.copy(item["content"])} for item in ctx]
|
| 1353 |
messages.append({"role": "user", "content": user_content})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1354 |
print(f"[native] respond variant={variant} enable_thinking={enable_thinking}", flush=True)
|
| 1355 |
|
| 1356 |
try:
|
|
|
|
| 1478 |
yield _image, _user_message, "", _chat_messages, _app_cfg, gr.update(visible=False)
|
| 1479 |
return
|
| 1480 |
|
| 1481 |
+
sampling = (params_form == "Sampling")
|
| 1482 |
+
if not sampling:
|
| 1483 |
+
streaming_mode = False
|
| 1484 |
+
use_thinking = bool(thinking_mode)
|
| 1485 |
+
variant = pick_variant(use_thinking)
|
| 1486 |
+
enable_thinking = use_thinking and variant == "thinking"
|
| 1487 |
+
_app_cfg["current_variant"] = variant
|
| 1488 |
+
|
| 1489 |
_chat_messages = list(_chat_messages or [])
|
| 1490 |
display_start = len(_chat_messages)
|
| 1491 |
_chat_messages.extend(native_display_user_messages(_user_message or "", files))
|
| 1492 |
assistant_index = len(_chat_messages)
|
| 1493 |
+
_chat_messages.append({"role": "assistant", "content": model_call_status_message(variant)})
|
| 1494 |
yield None, "", "", _chat_messages, _app_cfg, gr.update(visible=True)
|
| 1495 |
|
| 1496 |
ctx = list(_app_cfg.get("ctx", []))
|
| 1497 |
messages = [{"role": item["role"], "content": copy.copy(item["content"])} for item in ctx]
|
| 1498 |
messages.append({"role": "user", "content": user_content})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1499 |
print(f"[native] fewshot variant={variant} enable_thinking={enable_thinking}", flush=True)
|
| 1500 |
|
| 1501 |
try:
|
|
|
|
| 1615 |
|
| 1616 |
def native_on_thinking_toggle(thinking_mode, chat_messages, app_session):
|
| 1617 |
target_variant = pick_variant(bool(thinking_mode))
|
|
|
|
|
|
|
| 1618 |
app_session["current_variant"] = target_variant
|
| 1619 |
return native_clear_all(None, chat_messages, app_session)
|
| 1620 |
|
|
|
|
| 1791 |
params_form, thinking_mode, streaming_mode,
|
| 1792 |
max_new_tokens, temperature, top_p, top_k, max_frames],
|
| 1793 |
[txt_message, chat_bot, app_session, stop_btn],
|
| 1794 |
+
show_progress="hidden",
|
| 1795 |
)
|
| 1796 |
|
| 1797 |
with gr.Tab("Few Shot") as fewshot_tab:
|
|
|
|
| 1823 |
chat_bot, app_session],
|
| 1824 |
[image_input, user_message, assistant_message,
|
| 1825 |
chat_bot, app_session],
|
| 1826 |
+
show_progress="hidden",
|
| 1827 |
)
|
| 1828 |
generate_btn.click(
|
| 1829 |
native_fewshot_respond,
|
|
|
|
| 1832 |
max_new_tokens, temperature, top_p, top_k, max_frames],
|
| 1833 |
[image_input, user_message, assistant_message,
|
| 1834 |
chat_bot, app_session, stop_btn],
|
| 1835 |
+
show_progress="hidden",
|
| 1836 |
)
|
| 1837 |
|
| 1838 |
# Tab switch events: remember current tab + clear state
|
|
|
|
| 1869 |
inputs=[thinking_mode, chat_bot, app_session],
|
| 1870 |
outputs=[txt_message, chat_bot, app_session,
|
| 1871 |
image_input, user_message, assistant_message],
|
| 1872 |
+
show_progress="hidden",
|
| 1873 |
)
|
| 1874 |
regenerate_btn.click(
|
| 1875 |
native_regenerate_clicked,
|
|
|
|
| 1877 |
params_form, thinking_mode, streaming_mode,
|
| 1878 |
max_new_tokens, temperature, top_p, top_k, max_frames],
|
| 1879 |
[txt_message, chat_bot, app_session, stop_btn],
|
| 1880 |
+
show_progress="hidden",
|
| 1881 |
)
|
| 1882 |
clear_btn.click(
|
| 1883 |
native_clear_all,
|
| 1884 |
[txt_message, chat_bot, app_session],
|
| 1885 |
[txt_message, chat_bot, app_session,
|
| 1886 |
image_input, user_message, assistant_message],
|
| 1887 |
+
show_progress="hidden",
|
| 1888 |
)
|
| 1889 |
stop_btn.click(
|
| 1890 |
stop_clicked,
|
| 1891 |
[app_session],
|
| 1892 |
[app_session, stop_btn],
|
| 1893 |
+
show_progress="hidden",
|
| 1894 |
)
|
| 1895 |
|
| 1896 |
with gr.Tab("How to use"):
|