Spaces:

akhaliq
/

MiniCPM-V-4.6

Running on Zero

akhaliq HF Staff commited on 10 days ago

Commit

f009ec7

1 Parent(s): f1f0cc8

feat: add text-only inference support and conditional downsample_mode parameter for model generation

Files changed (1) hide show

app.py CHANGED Viewed

@@ -40,8 +40,15 @@ def predict(message: str, file: FileData = None, downsample_mode: str = "16x"):
     General inference endpoint for both image and video.
     """
     if file is None:
-        # Text-only inference (standard LLM behavior)
         messages = [{"role": "user", "content": [{"type": "text", "text": message}]}]
     else:
         file_path = file["path"]
         is_video = any(file_path.lower().endswith(ext) for ext in ['.mp4', '.mkv', '.mov', '.avi'])
@@ -85,13 +92,17 @@ def predict(message: str, file: FileData = None, downsample_mode: str = "16x"):
             ).to(model.device)
     with torch.no_grad():
-        generated_ids = model.generate(
-            **inputs,
-            downsample_mode=downsample_mode,
-            max_new_tokens=1024,
-            do_sample=True,
-            temperature=0.7
-        )
     generated_ids_trimmed = [
         out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)

     General inference endpoint for both image and video.
     """
     if file is None:
+        # Text-only inference
         messages = [{"role": "user", "content": [{"type": "text", "text": message}]}]
+        inputs = processor.apply_chat_template(
+            messages,
+            tokenize=True,
+            add_generation_prompt=True,
+            return_dict=True,
+            return_tensors="pt"
+        ).to(model.device)
     else:
         file_path = file["path"]
         is_video = any(file_path.lower().endswith(ext) for ext in ['.mp4', '.mkv', '.mov', '.avi'])
             ).to(model.device)
     with torch.no_grad():
+        generate_kwargs = {
+            **inputs,
+            "max_new_tokens": 1024,
+            "do_sample": True,
+            "temperature": 0.7
+        }
+        if file is not None:
+            generate_kwargs["downsample_mode"] = downsample_mode
+        generated_ids = model.generate(**generate_kwargs)
     generated_ids_trimmed = [
         out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)