#!/usr/bin/env python3 """ Sanity check: load MiniCPM-o 4.5 and run a single sample through it. Picks one video from sync eval set, passes video + audio + prompt, prints the model's response. """ from __future__ import annotations import os import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from minicpmo_inference import load_model, run_inference def main(): # Pick the first original video in the sync eval set original_root = Path("/opt/dlami/nvme/video_source/original/uag_oops") audio_root = Path("/opt/dlami/nvme/video_source/extracted_audio/original/uag_oops") videos = sorted(original_root.glob("*.mp4")) if not videos: print(f"ERROR: no videos found at {original_root}") sys.exit(1) video_path = videos[0] audio_path = audio_root / f"{video_path.stem}.wav" if not audio_path.exists(): print(f"ERROR: audio not found for {video_path.name}") sys.exit(1) print(f"Video: {video_path}") print(f"Audio: {audio_path}") print() model, tokenizer = load_model() prompt = ( "Watch this video and listen to its audio carefully. " "Determine whether the audio and video tracks are synchronized. " "Explain your reasoning." ) print("=== Running inference ===") response = run_inference( model, tokenizer, video_path=str(video_path), audio_path=str(audio_path), prompt=prompt, max_new_tokens=128, temperature=0.0, ) print() print("=== Response ===") print(response) if __name__ == "__main__": main()