| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| from unittest.mock import MagicMock, patch |
|
|
| import numpy as np |
| import pytest |
|
|
| from nemo.deploy.nlp.query_llm import NemoQueryLLM, NemoQueryLLMBase, NemoQueryLLMHF, NemoQueryLLMPyTorch |
|
|
|
|
| class TestNemoQueryLLMBase: |
| def test_base_initialization(self): |
| url = "localhost:8000" |
| model_name = "test-model" |
| query = NemoQueryLLMBase(url=url, model_name=model_name) |
| assert query.url == url |
| assert query.model_name == model_name |
|
|
|
|
| class TestNemoQueryLLMPyTorch: |
| @pytest.fixture |
| def query(self): |
| return NemoQueryLLMPyTorch(url="localhost:8000", model_name="test-model") |
|
|
| def test_initialization(self, query): |
| assert isinstance(query, NemoQueryLLMBase) |
| assert query.url == "localhost:8000" |
| assert query.model_name == "test-model" |
|
|
| @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| def test_query_llm_basic(self, mock_client, query): |
| |
| mock_instance = MagicMock() |
| mock_client.return_value.__enter__.return_value = mock_instance |
| mock_instance.infer_batch.return_value = {"sentences": np.array([b"test response"])} |
| mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
| |
| response = query.query_llm(prompts=["test prompt"], max_length=100, temperature=0.7, top_k=1, top_p=0.9) |
|
|
| assert isinstance(response, dict) |
| assert "choices" in response |
| assert response["choices"][0]["text"] == "test response" |
|
|
| @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| def test_query_llm_with_logprobs(self, mock_client, query): |
| |
| mock_instance = MagicMock() |
| mock_client.return_value.__enter__.return_value = mock_instance |
| mock_instance.infer_batch.return_value = { |
| "sentences": np.array([b"test response"]), |
| "log_probs": np.array([0.1, 0.2, 0.3]), |
| } |
| mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
| |
| response = query.query_llm(prompts=["test prompt"], max_length=100, compute_logprob=True) |
|
|
| assert "logprobs" in response["choices"][0] |
| assert "token_logprobs" in response["choices"][0]["logprobs"] |
|
|
|
|
| class TestNemoQueryLLMHF: |
| @pytest.fixture |
| def query(self): |
| return NemoQueryLLMHF(url="localhost:8000", model_name="test-model") |
|
|
| def test_initialization(self, query): |
| assert isinstance(query, NemoQueryLLMBase) |
| assert query.url == "localhost:8000" |
| assert query.model_name == "test-model" |
|
|
| @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| def test_query_llm_basic(self, mock_client, query): |
| |
| mock_instance = MagicMock() |
| mock_client.return_value.__enter__.return_value = mock_instance |
| mock_instance.infer_batch.return_value = {"sentences": np.array([b"test response"])} |
| mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
| |
| response = query.query_llm(prompts=["test prompt"], max_length=100, temperature=0.7, top_k=1, top_p=0.9) |
|
|
| assert isinstance(response, dict) |
| assert "choices" in response |
| assert response["choices"][0]["text"] == "test response" |
|
|
| @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| def test_query_llm_with_logits(self, mock_client, query): |
| |
| mock_instance = MagicMock() |
| mock_client.return_value.__enter__.return_value = mock_instance |
| mock_instance.infer_batch.return_value = { |
| "sentences": np.array([b"test response"]), |
| "logits": np.array([[0.1, 0.2, 0.3]]), |
| } |
| mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
| |
| response = query.query_llm(prompts=["test prompt"], max_length=100, output_logits=True) |
|
|
| assert "logits" in response |
|
|
|
|
| class TestNemoQueryLLM: |
| @pytest.fixture |
| def query(self): |
| return NemoQueryLLM(url="localhost:8000", model_name="test-model") |
|
|
| def test_initialization(self, query): |
| assert isinstance(query, NemoQueryLLMBase) |
| assert query.url == "localhost:8000" |
| assert query.model_name == "test-model" |
|
|
| @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| def test_query_llm_basic(self, mock_client, query): |
| |
| mock_instance = MagicMock() |
| mock_client.return_value.__enter__.return_value = mock_instance |
| mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
| mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
| |
| response = query.query_llm(prompts=["test prompt"], max_output_len=100, temperature=0.7, top_k=1, top_p=0.9) |
|
|
| assert isinstance(response[0], str) |
| assert response[0] == "test response" |
|
|
| @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| def test_query_llm_openai_format(self, mock_client, query): |
| |
| mock_instance = MagicMock() |
| mock_client.return_value.__enter__.return_value = mock_instance |
| mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
| mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
| |
| response = query.query_llm(prompts=["test prompt"], max_output_len=100, openai_format_response=True) |
|
|
| assert isinstance(response, dict) |
| assert "choices" in response |
| assert response["choices"][0]["text"] == "test response" |
|
|
| @patch('nemo.deploy.nlp.query_llm.DecoupledModelClient') |
| def test_query_llm_streaming(self, mock_client, query): |
| |
| mock_instance = MagicMock() |
| mock_client.return_value.__enter__.return_value = mock_instance |
| mock_instance.infer_batch.return_value = [ |
| {"outputs": np.array([b"test"])}, |
| {"outputs": np.array([b" response"])}, |
| ] |
| mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
| |
| responses = list(query.query_llm_streaming(prompts=["test prompt"], max_output_len=100)) |
|
|
| assert len(responses) == 2 |
| assert responses[0] == "test" |
| assert responses[1] == " response" |
|
|
| @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| def test_query_llm_with_stop_words(self, mock_client, query): |
| |
| mock_instance = MagicMock() |
| mock_client.return_value.__enter__.return_value = mock_instance |
| mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
| mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
| |
| response = query.query_llm(prompts=["test prompt"], max_output_len=100, stop_words_list=["stop"]) |
|
|
| assert isinstance(response[0], str) |
| assert response[0] == "test response" |
|
|
| @patch('nemo.deploy.nlp.query_llm.ModelClient') |
| def test_query_llm_with_bad_words(self, mock_client, query): |
| |
| mock_instance = MagicMock() |
| mock_client.return_value.__enter__.return_value = mock_instance |
| mock_instance.infer_batch.return_value = {"outputs": np.array([b"test response"])} |
| mock_instance.model_config.outputs = [MagicMock(dtype=np.bytes_)] |
|
|
| |
| response = query.query_llm(prompts=["test prompt"], max_output_len=100, bad_words_list=["bad"]) |
|
|
| assert isinstance(response[0], str) |
| assert response[0] == "test response" |
|
|