Spaces:
Sleeping
Sleeping
| import pytest | |
| from env.reward import RewardCalculator | |
| from env.models import Action, QueryResult | |
| from env.tasks import MonthlySignupsTask | |
| class MockTask: | |
| """Mock task for testing reward calculator""" | |
| def __init__(self): | |
| self.relevant_tables = ["users", "orders"] | |
| self.ground_truth = 100 | |
| self.difficulty = "easy" | |
| self.max_steps = 10 | |
| def grade(self, answer): | |
| return 1.0 if answer == str(self.ground_truth) else 0.0 | |
| def get_hints(self, step): | |
| return [] | |
| class TestRewardCalculator: | |
| """Test the reward calculation logic""" | |
| def setup_method(self): | |
| self.calc = RewardCalculator() | |
| self.task = MockTask() | |
| def test_no_error_query_reward(self): | |
| """Query without error gets +0.15""" | |
| action = Action(sql_query="SELECT 1 FROM users") | |
| result = QueryResult(columns=["1"], rows=[[1]], error=None) | |
| reward = self.calc.calculate(action, result, self.task, 1, [], False) | |
| assert reward >= 0.15 | |
| def test_relevant_table_reward(self): | |
| """Query touching relevant table gets +0.10""" | |
| action = Action(sql_query="SELECT * FROM users") | |
| result = QueryResult(columns=["id"], rows=[[1]], error=None) | |
| reward = self.calc.calculate(action, result, self.task, 1, [], False) | |
| assert reward >= 0.10 | |
| def test_non_empty_result_reward(self): | |
| """Query with rows gets +0.05""" | |
| action = Action(sql_query="SELECT 1") | |
| result = QueryResult(columns=["1"], rows=[[1]], error=None) | |
| reward = self.calc.calculate(action, result, self.task, 1, [], False) | |
| assert reward >= 0.05 | |
| def test_error_query_no_reward(self): | |
| """Query with error gets no step rewards""" | |
| action = Action(sql_query="SELECT * FROM nonexistent") | |
| result = QueryResult(columns=[], rows=[], error="Table not found") | |
| reward = self.calc.calculate(action, result, self.task, 1, [], False) | |
| assert reward == 0.0 | |
| def test_efficiency_penalty_after_step_3(self): | |
| """Steps beyond 3 get -0.02 per step""" | |
| action = Action(sql_query="SELECT 1") | |
| result = QueryResult(columns=["1"], rows=[[1]], error=None) | |
| reward = self.calc.calculate(action, result, self.task, 5, [], False) | |
| # 0.15 + 0.10 + 0.05 + 0.05 - (0.02 * 2) = 0.31 | |
| assert reward < 0.35 | |
| def test_infinite_loop_penalty(self): | |
| """Same query 3 times gets -0.10""" | |
| action = Action(sql_query="SELECT 1") | |
| result = QueryResult(columns=["1"], rows=[[1]], error=None) | |
| query_history = ["SELECT 1", "SELECT 1", "SELECT 1"] | |
| reward = self.calc.calculate(action, result, self.task, 4, query_history, False) | |
| assert reward < 0.30 | |
| def test_terminal_submit_grade_reward(self): | |
| """Terminal submit gets up to 0.60 based on grade""" | |
| action = Action(submit_answer="100") | |
| result = None | |
| # Use step 1 to avoid efficiency penalty | |
| reward = self.calc.calculate(action, result, self.task, 1, [], True) | |
| # grade(100) = 1.0 * 0.60 = 0.60 | |
| assert reward >= 0.60 | |
| def test_terminal_submit_wrong_answer(self): | |
| """Wrong answer gets partial terminal reward""" | |
| action = Action(submit_answer="999") | |
| result = None | |
| reward = self.calc.calculate(action, result, self.task, 5, [], True) | |
| # grade(999) = 0.0 * 0.60 = 0.0 | |
| assert reward < 0.10 | |
| def test_reward_clamped_to_0_1(self): | |
| """Reward should be clamped between 0 and 1""" | |
| # Create task that always grades 1.0 | |
| task = MockTask() | |
| # Many steps should accumulate penalty but stay >= 0 | |
| action = Action(sql_query="SELECT 1") | |
| result = QueryResult(columns=["1"], rows=[[1]], error=None) | |
| reward = self.calc.calculate(action, result, task, 50, [], False) | |
| assert 0.0 <= reward <= 1.0 | |
| class TestRewardBreakdown: | |
| """Test specific reward components""" | |
| def test_max_step_reward_calculation(self): | |
| """Test maximum possible reward at good query""" | |
| action = Action(sql_query="SELECT * FROM users") | |
| result = QueryResult(columns=["id"], rows=[[1], [2], [3]], error=None) | |
| calc = RewardCalculator() | |
| task = MockTask() | |
| reward = calc.calculate(action, result, task, 1, [], False) | |
| # 0.15 (no error) + 0.10 (relevant table) + 0.05 (has rows) + 0.05 (reasonable size) | |
| expected = 0.35 | |
| assert abs(reward - expected) < 0.01 | |