DECEIT / tests /test_grader.py
Jayant-Kernel
feat: add 429 retry wrapper to grader semantic check
b44d7b0 unverified
"""Unit tests for the Grader class — OpenAI calls are always mocked."""
import pathlib
import pytest
from unittest.mock import MagicMock, patch
from deceit_env.server.grader import Grader, GraderResult
@pytest.fixture
def tmp_grader(tmp_path):
return Grader(cache_path=tmp_path / "cache.json", openai_api_key=None)
@pytest.fixture
def api_grader(tmp_path):
return Grader(cache_path=tmp_path / "cache.json", openai_api_key="fake-key")
class TestExactMatch:
def test_identical_strings(self, tmp_grader):
result = tmp_grader.check("Canberra", "Canberra")
assert result.correct is True
assert result.method == "exact"
def test_case_insensitive(self, tmp_grader):
result = tmp_grader.check("canberra", "Canberra")
assert result.correct is True
assert result.method == "exact"
def test_trailing_punctuation_stripped(self, tmp_grader):
result = tmp_grader.check("Canberra.", "Canberra")
assert result.correct is True
assert result.method == "exact"
def test_extra_whitespace_stripped(self, tmp_grader):
result = tmp_grader.check(" Canberra ", "Canberra")
assert result.correct is True
assert result.method == "exact"
def test_wrong_answer_fails_exact(self, tmp_grader):
with pytest.raises(RuntimeError, match="no OpenAI API key"):
tmp_grader.check("Sydney", "Canberra")
def test_empty_answer_returns_incorrect(self, tmp_grader):
result = tmp_grader.check("", "Canberra")
assert result.correct is False
assert result.method == "exact"
class TestSemanticMatch:
def _mock_openai_response(self, verdict: str):
mock_client = MagicMock()
mock_choice = MagicMock()
mock_choice.message.content = verdict
mock_client.chat.completions.create.return_value.choices = [mock_choice]
return mock_client
def test_semantic_called_when_exact_fails(self, api_grader):
mock_client = self._mock_openai_response("YES")
with patch("deceit_env.server.grader.OpenAI", return_value=mock_client):
result = api_grader.check("The Australian capital", "Canberra")
assert result.method == "semantic"
assert result.correct is True
mock_client.chat.completions.create.assert_called_once()
def test_semantic_no_called_when_exact_matches(self, api_grader):
mock_client = self._mock_openai_response("YES")
with patch("deceit_env.server.grader.OpenAI", return_value=mock_client):
api_grader.check("Canberra", "Canberra")
mock_client.chat.completions.create.assert_not_called()
def test_semantic_returns_false_on_no(self, api_grader):
mock_client = self._mock_openai_response("NO")
with patch("deceit_env.server.grader.OpenAI", return_value=mock_client):
result = api_grader.check("Sydney", "Canberra")
assert result.correct is False
def test_cache_prevents_duplicate_api_calls(self, api_grader):
mock_client = self._mock_openai_response("YES")
with patch("deceit_env.server.grader.OpenAI", return_value=mock_client):
result1 = api_grader.check("The Australian capital", "Canberra")
result2 = api_grader.check("The Australian capital", "Canberra")
assert mock_client.chat.completions.create.call_count == 1
assert result1.correct == result2.correct
def test_cache_persists_to_disk(self, tmp_path):
cache_path = tmp_path / "cache.json"
grader1 = Grader(cache_path=cache_path, openai_api_key="fake-key")
mock_client = self._mock_openai_response("YES")
with patch("deceit_env.server.grader.OpenAI", return_value=mock_client):
grader1.check("The Australian capital", "Canberra")
grader2 = Grader(cache_path=cache_path, openai_api_key="fake-key")
with patch("deceit_env.server.grader.OpenAI", return_value=mock_client):
result = grader2.check("The Australian capital", "Canberra")
assert mock_client.chat.completions.create.call_count == 1
assert result.correct is True
def test_error_raised_without_api_key(self, tmp_grader):
with pytest.raises(RuntimeError, match="no OpenAI API key"):
tmp_grader.check("Sydney", "Canberra")
class TestRateLimitRetry:
def test_retries_on_429_then_succeeds(self, api_grader):
from openai import RateLimitError
import httpx
mock_client = MagicMock()
mock_choice = MagicMock()
mock_choice.message.content = "YES"
ok_response = MagicMock()
ok_response.choices = [mock_choice]
raw_response = MagicMock()
raw_response.headers = {}
raw_response.status_code = 429
_dummy_request = httpx.Request("POST", "https://api.openai.com/v1/chat/completions")
rate_err = RateLimitError("rate limited", response=httpx.Response(429, request=_dummy_request), body={})
mock_client.chat.completions.create.side_effect = [rate_err, ok_response]
with patch("deceit_env.server.grader.OpenAI", return_value=mock_client):
with patch("time.sleep") as mock_sleep:
result = api_grader.check("The Australian capital", "Canberra")
assert result.correct is True
assert mock_client.chat.completions.create.call_count == 2
mock_sleep.assert_called_once_with(25)