"""Unit tests for the Grader class — OpenAI calls are always mocked.""" import pathlib import pytest from unittest.mock import MagicMock, patch from deceit_env.server.grader import Grader, GraderResult @pytest.fixture def tmp_grader(tmp_path): return Grader(cache_path=tmp_path / "cache.json", openai_api_key=None) @pytest.fixture def api_grader(tmp_path): return Grader(cache_path=tmp_path / "cache.json", openai_api_key="fake-key") class TestExactMatch: def test_identical_strings(self, tmp_grader): result = tmp_grader.check("Canberra", "Canberra") assert result.correct is True assert result.method == "exact" def test_case_insensitive(self, tmp_grader): result = tmp_grader.check("canberra", "Canberra") assert result.correct is True assert result.method == "exact" def test_trailing_punctuation_stripped(self, tmp_grader): result = tmp_grader.check("Canberra.", "Canberra") assert result.correct is True assert result.method == "exact" def test_extra_whitespace_stripped(self, tmp_grader): result = tmp_grader.check(" Canberra ", "Canberra") assert result.correct is True assert result.method == "exact" def test_wrong_answer_fails_exact(self, tmp_grader): with pytest.raises(RuntimeError, match="no OpenAI API key"): tmp_grader.check("Sydney", "Canberra") def test_empty_answer_returns_incorrect(self, tmp_grader): result = tmp_grader.check("", "Canberra") assert result.correct is False assert result.method == "exact" class TestSemanticMatch: def _mock_openai_response(self, verdict: str): mock_client = MagicMock() mock_choice = MagicMock() mock_choice.message.content = verdict mock_client.chat.completions.create.return_value.choices = [mock_choice] return mock_client def test_semantic_called_when_exact_fails(self, api_grader): mock_client = self._mock_openai_response("YES") with patch("deceit_env.server.grader.OpenAI", return_value=mock_client): result = api_grader.check("The Australian capital", "Canberra") assert result.method == "semantic" assert result.correct is True mock_client.chat.completions.create.assert_called_once() def test_semantic_no_called_when_exact_matches(self, api_grader): mock_client = self._mock_openai_response("YES") with patch("deceit_env.server.grader.OpenAI", return_value=mock_client): api_grader.check("Canberra", "Canberra") mock_client.chat.completions.create.assert_not_called() def test_semantic_returns_false_on_no(self, api_grader): mock_client = self._mock_openai_response("NO") with patch("deceit_env.server.grader.OpenAI", return_value=mock_client): result = api_grader.check("Sydney", "Canberra") assert result.correct is False def test_cache_prevents_duplicate_api_calls(self, api_grader): mock_client = self._mock_openai_response("YES") with patch("deceit_env.server.grader.OpenAI", return_value=mock_client): result1 = api_grader.check("The Australian capital", "Canberra") result2 = api_grader.check("The Australian capital", "Canberra") assert mock_client.chat.completions.create.call_count == 1 assert result1.correct == result2.correct def test_cache_persists_to_disk(self, tmp_path): cache_path = tmp_path / "cache.json" grader1 = Grader(cache_path=cache_path, openai_api_key="fake-key") mock_client = self._mock_openai_response("YES") with patch("deceit_env.server.grader.OpenAI", return_value=mock_client): grader1.check("The Australian capital", "Canberra") grader2 = Grader(cache_path=cache_path, openai_api_key="fake-key") with patch("deceit_env.server.grader.OpenAI", return_value=mock_client): result = grader2.check("The Australian capital", "Canberra") assert mock_client.chat.completions.create.call_count == 1 assert result.correct is True def test_error_raised_without_api_key(self, tmp_grader): with pytest.raises(RuntimeError, match="no OpenAI API key"): tmp_grader.check("Sydney", "Canberra") class TestRateLimitRetry: def test_retries_on_429_then_succeeds(self, api_grader): from openai import RateLimitError import httpx mock_client = MagicMock() mock_choice = MagicMock() mock_choice.message.content = "YES" ok_response = MagicMock() ok_response.choices = [mock_choice] raw_response = MagicMock() raw_response.headers = {} raw_response.status_code = 429 _dummy_request = httpx.Request("POST", "https://api.openai.com/v1/chat/completions") rate_err = RateLimitError("rate limited", response=httpx.Response(429, request=_dummy_request), body={}) mock_client.chat.completions.create.side_effect = [rate_err, ok_response] with patch("deceit_env.server.grader.OpenAI", return_value=mock_client): with patch("time.sleep") as mock_sleep: result = api_grader.check("The Australian capital", "Canberra") assert result.correct is True assert mock_client.chat.completions.create.call_count == 2 mock_sleep.assert_called_once_with(25)