File size: 1,629 Bytes
b2c2640 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | #!/usr/bin/env python3
"""
Sanity check: load MiniCPM-o 4.5 and run a single sample through it.
Picks one video from sync eval set, passes video + audio + prompt, prints
the model's response.
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from minicpmo_inference import load_model, run_inference
def main():
# Pick the first original video in the sync eval set
original_root = Path("/opt/dlami/nvme/video_source/original/uag_oops")
audio_root = Path("/opt/dlami/nvme/video_source/extracted_audio/original/uag_oops")
videos = sorted(original_root.glob("*.mp4"))
if not videos:
print(f"ERROR: no videos found at {original_root}")
sys.exit(1)
video_path = videos[0]
audio_path = audio_root / f"{video_path.stem}.wav"
if not audio_path.exists():
print(f"ERROR: audio not found for {video_path.name}")
sys.exit(1)
print(f"Video: {video_path}")
print(f"Audio: {audio_path}")
print()
model, tokenizer = load_model()
prompt = (
"Watch this video and listen to its audio carefully. "
"Determine whether the audio and video tracks are synchronized. "
"Explain your reasoning."
)
print("=== Running inference ===")
response = run_inference(
model, tokenizer,
video_path=str(video_path),
audio_path=str(audio_path),
prompt=prompt,
max_new_tokens=128,
temperature=0.0,
)
print()
print("=== Response ===")
print(response)
if __name__ == "__main__":
main()
|