Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,13 +21,12 @@ model = AutoModel.from_pretrained(
|
|
| 21 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 22 |
print("模型加载完成!")
|
| 23 |
|
| 24 |
-
# 3. 推理函数
|
| 25 |
def analyze_image(image, prompt_text):
|
| 26 |
if image is None:
|
| 27 |
return "请上传一张图片。"
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
# 模型期望的是一个包含字典的列表,而不是直接的 (image, text) 元组
|
| 31 |
msgs = [
|
| 32 |
{
|
| 33 |
'role': 'user',
|
|
@@ -38,8 +37,7 @@ def analyze_image(image, prompt_text):
|
|
| 38 |
}
|
| 39 |
]
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
# 将输入张量强制转为 float32,因为 CPU 通常不支持 bfloat16 计算
|
| 43 |
with torch.no_grad():
|
| 44 |
# 1. 构建输入
|
| 45 |
inputs = tokenizer.apply_chat_template(
|
|
@@ -49,8 +47,7 @@ def analyze_image(image, prompt_text):
|
|
| 49 |
return_tensors="pt"
|
| 50 |
)
|
| 51 |
|
| 52 |
-
# 2. 移动到设备并转换数据类型
|
| 53 |
-
# 注意:CPU 不支持 bfloat16,如果模型权重是 bfloat16,这里可能会出错,强制转为 float32 更稳妥
|
| 54 |
inputs = inputs.to(model.device, dtype=torch.float32)
|
| 55 |
|
| 56 |
# 3. 生成回复
|
|
@@ -61,18 +58,17 @@ def analyze_image(image, prompt_text):
|
|
| 61 |
do_sample=True
|
| 62 |
)
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
response = tokenizer.decode(outputs
|
| 66 |
|
| 67 |
-
#
|
| 68 |
-
# 如果输出格式变化,这里可能需要微调
|
| 69 |
if 'assistant' in response:
|
| 70 |
response = response.split('assistant')[-1].strip()
|
| 71 |
|
| 72 |
return response
|
| 73 |
|
| 74 |
-
# 4. 构建界面
|
| 75 |
-
with gr.Blocks(
|
| 76 |
gr.Markdown("## 🖼️ MiniCPM-V 4.6 视觉理解 (CPU 优化版)")
|
| 77 |
gr.Markdown("上传图片并输入问题,AI 将为你解答。")
|
| 78 |
|
|
@@ -86,11 +82,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 86 |
|
| 87 |
btn.click(fn=analyze_image, inputs=[img_input, txt_input], outputs=output)
|
| 88 |
|
| 89 |
-
# 5. 启动 (
|
| 90 |
if __name__ == "__main__":
|
| 91 |
demo.launch(
|
| 92 |
server_name="0.0.0.0",
|
| 93 |
server_port=7860,
|
| 94 |
-
|
| 95 |
-
|
| 96 |
)
|
|
|
|
| 21 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 22 |
print("模型加载完成!")
|
| 23 |
|
| 24 |
+
# 3. 推理函数
|
| 25 |
def analyze_image(image, prompt_text):
|
| 26 |
if image is None:
|
| 27 |
return "请上传一张图片。"
|
| 28 |
|
| 29 |
+
# 构建符合 MiniCPM-V 4.6 要求的输入格式
|
|
|
|
| 30 |
msgs = [
|
| 31 |
{
|
| 32 |
'role': 'user',
|
|
|
|
| 37 |
}
|
| 38 |
]
|
| 39 |
|
| 40 |
+
# CPU 上的数据类型处理
|
|
|
|
| 41 |
with torch.no_grad():
|
| 42 |
# 1. 构建输入
|
| 43 |
inputs = tokenizer.apply_chat_template(
|
|
|
|
| 47 |
return_tensors="pt"
|
| 48 |
)
|
| 49 |
|
| 50 |
+
# 2. 移动到设备并转换数据类型 (CPU 强制 float32)
|
|
|
|
| 51 |
inputs = inputs.to(model.device, dtype=torch.float32)
|
| 52 |
|
| 53 |
# 3. 生成回复
|
|
|
|
| 58 |
do_sample=True
|
| 59 |
)
|
| 60 |
|
| 61 |
+
# 解码输出
|
| 62 |
+
response = tokenizer.decode(outputs, skip_special_tokens=True)
|
| 63 |
|
| 64 |
+
# 提取 Assistant 的回答部分
|
|
|
|
| 65 |
if 'assistant' in response:
|
| 66 |
response = response.split('assistant')[-1].strip()
|
| 67 |
|
| 68 |
return response
|
| 69 |
|
| 70 |
+
# 4. 构建界面 (注意:这里去掉了 theme 参数)
|
| 71 |
+
with gr.Blocks() as demo:
|
| 72 |
gr.Markdown("## 🖼️ MiniCPM-V 4.6 视觉理解 (CPU 优化版)")
|
| 73 |
gr.Markdown("上传图片并输入问题,AI 将为你解答。")
|
| 74 |
|
|
|
|
| 82 |
|
| 83 |
btn.click(fn=analyze_image, inputs=[img_input, txt_input], outputs=output)
|
| 84 |
|
| 85 |
+
# 5. 启动 (theme 移到这里,并删除了 show_api)
|
| 86 |
if __name__ == "__main__":
|
| 87 |
demo.launch(
|
| 88 |
server_name="0.0.0.0",
|
| 89 |
server_port=7860,
|
| 90 |
+
ssr_mode=False, # 禁用服务端渲染,避免异步报错
|
| 91 |
+
theme=gr.themes.Soft() # Gradio 6.0 要求 theme 必须放在 launch 里
|
| 92 |
)
|