| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| from unittest.mock import Mock, patch |
|
|
| import numpy as np |
| import pytest |
|
|
|
|
| @pytest.fixture |
| def model_dir(tmp_path): |
| return str(tmp_path / "model_dir") |
|
|
|
|
| @pytest.fixture |
| def mock_runner(): |
| runner = Mock() |
| runner.model_type = "neva" |
| runner.load_test_media = Mock(return_value=np.zeros((1, 224, 224, 3))) |
| runner.run = Mock(return_value="Test response") |
| return runner |
|
|
|
|
| class TestTensorRTMMExporter: |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_init(self, model_dir): |
| |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| assert exporter.model_dir == model_dir |
| assert exporter.runner is None |
| assert exporter.modality == "vision" |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_init_invalid_modality(self, model_dir): |
| |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| with pytest.raises(AssertionError): |
| TensorRTMMExporter(model_dir, modality="invalid") |
|
|
| @pytest.mark.run_only_on('GPU') |
| @patch("nemo.export.tensorrt_mm_exporter.build_mllama_engine") |
| def test_export_mllama(self, mock_build, model_dir): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| exporter.export( |
| visual_checkpoint_path="dummy/path", model_type="mllama", tensor_parallel_size=1, load_model=False |
| ) |
| mock_build.assert_called_once() |
|
|
| @pytest.mark.run_only_on('GPU') |
| @patch("nemo.export.tensorrt_mm_exporter.build_trtllm_engine") |
| @patch("nemo.export.tensorrt_mm_exporter.build_visual_engine") |
| def test_export_neva(self, mock_visual, mock_trtllm, model_dir): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| exporter.export( |
| visual_checkpoint_path="dummy/path", model_type="neva", tensor_parallel_size=1, load_model=False |
| ) |
| mock_trtllm.assert_called_once() |
| mock_visual.assert_called_once() |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_forward_without_loading(self, model_dir): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| with pytest.raises(Exception) as exc_info: |
| exporter.forward("test prompt", "test_image.jpg") |
| assert "should be exported and" in str(exc_info.value) |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_forward(self, model_dir, mock_runner): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| exporter.runner = mock_runner |
|
|
| result = exporter.forward( |
| input_text="What's in this image?", input_media="test_image.jpg", batch_size=1, max_output_len=30 |
| ) |
|
|
| assert result == "Test response" |
| mock_runner.load_test_media.assert_called_once() |
| mock_runner.run.assert_called_once() |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_get_triton_input(self, model_dir): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| inputs = exporter.get_triton_input |
|
|
| |
| assert len(inputs) == 10 |
|
|
| |
| assert inputs[0].name == "input_text" |
| assert inputs[0].dtype == bytes |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_get_triton_output(self, model_dir): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| outputs = exporter.get_triton_output |
|
|
| assert len(outputs) == 1 |
| assert outputs[0].name == "outputs" |
| assert outputs[0].dtype == bytes |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_forward_with_all_params(self, model_dir, mock_runner): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| exporter.runner = mock_runner |
|
|
| result = exporter.forward( |
| input_text="What's in this image?", |
| input_media="test_image.jpg", |
| batch_size=2, |
| max_output_len=50, |
| top_k=5, |
| top_p=0.9, |
| temperature=0.7, |
| repetition_penalty=1.2, |
| num_beams=4, |
| lora_uids=["lora1", "lora2"], |
| ) |
|
|
| assert result == "Test response" |
| mock_runner.load_test_media.assert_called_once() |
| mock_runner.run.assert_called_once_with( |
| "What's in this image?", |
| mock_runner.load_test_media.return_value, |
| 50, |
| 2, |
| 5, |
| 0.9, |
| 0.7, |
| 1.2, |
| 4, |
| ["lora1", "lora2"], |
| ) |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_get_input_media_tensors_vision(self, model_dir): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False, modality="vision") |
| tensors = exporter.get_input_media_tensors() |
|
|
| assert len(tensors) == 1 |
| assert tensors[0].name == "input_media" |
| assert tensors[0].shape == (-1, -1, -1, 3) |
| assert tensors[0].dtype == np.uint8 |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_get_input_media_tensors_audio(self, model_dir): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False, modality="audio") |
| tensors = exporter.get_input_media_tensors() |
|
|
| assert len(tensors) == 2 |
| assert tensors[0].name == "input_signal" |
| assert tensors[0].shape == (-1,) |
| assert tensors[0].dtype == np.single |
| assert tensors[1].name == "input_signal_length" |
| assert tensors[1].shape == (1,) |
| assert tensors[1].dtype == np.intc |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_export_with_invalid_model_type(self, model_dir): |
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| with pytest.raises(Exception): |
| exporter.export( |
| visual_checkpoint_path="dummy/path", |
| model_type="invalid_model_type", |
| tensor_parallel_size=1, |
| load_model=False, |
| ) |
|
|
| @pytest.mark.run_only_on('GPU') |
| def test_export_with_existing_files(self, model_dir): |
| import os |
|
|
| from nemo.export.tensorrt_mm_exporter import TensorRTMMExporter |
|
|
| |
| os.makedirs(model_dir, exist_ok=True) |
| with open(os.path.join(model_dir, "test.txt"), "w") as f: |
| f.write("test") |
|
|
| exporter = TensorRTMMExporter(model_dir, load_model=False) |
| with pytest.raises(Exception) as exc_info: |
| exporter.export( |
| visual_checkpoint_path="dummy/path", |
| model_type="neva", |
| tensor_parallel_size=1, |
| load_model=False, |
| delete_existing_files=False, |
| ) |
| assert "There are files in this folder" in str(exc_info.value) |
|
|