Spaces:
Running on Zero
Running on Zero
refactor: move video processing configuration into nested processor_kwargs for improved parameter organization
Browse files
app.py
CHANGED
|
@@ -322,11 +322,16 @@ def predict(
|
|
| 322 |
return_dict=True,
|
| 323 |
return_tensors="pt",
|
| 324 |
enable_thinking=thinking_mode,
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
).to(model.device)
|
| 331 |
|
| 332 |
for k, v in inputs.items():
|
|
|
|
| 322 |
return_dict=True,
|
| 323 |
return_tensors="pt",
|
| 324 |
enable_thinking=thinking_mode,
|
| 325 |
+
processor_kwargs={
|
| 326 |
+
"downsample_mode": "16x",
|
| 327 |
+
"max_slice_nums": 1 if any(it.get("type") == "video" for msg in messages for it in msg["content"]) else 9,
|
| 328 |
+
"use_image_id": False if any(it.get("type") == "video" for msg in messages for it in msg["content"]) else True,
|
| 329 |
+
"videos_kwargs": {
|
| 330 |
+
"max_num_frames": max_frames,
|
| 331 |
+
"do_sample_frames": False, # Frames are already sampled by load_video
|
| 332 |
+
"stack_frames": 1,
|
| 333 |
+
}
|
| 334 |
+
}
|
| 335 |
).to(model.device)
|
| 336 |
|
| 337 |
for k, v in inputs.items():
|