import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from env.generator import TaskGenerationEngine from env.verifier import VerifierSystem from env.rewards import RewardSystem from env.environment import AutomathreasonerEnvironment from env.models import AutomathreasonerAction def test_generator(): engine = TaskGenerationEngine() # Test arithmetic prob, diff, ans = engine.generate_arithmetic(complexity=1) assert prob and ans # Test overall generate task task = engine.generate_task(target_difficulty_band=2.0) assert "problem" in task assert "solution" in task assert "difficulty" in task def test_verifier(): verifier = VerifierSystem() # Exact match assert verifier.check_exact_match("42", "42") assert verifier.check_exact_match(" 42 ", "42") # Numeric tolerance assert verifier.check_numeric_tolerance("3.14159", "3.1415") assert not verifier.check_numeric_tolerance("4.1415", "3.1415") # Python execution assert verifier.check_python_execution("2 + 2", "4") # Full verification c, q = verifier.verify("Because 2 + 2 is 4", "4", "4") assert c == 1.0 assert q > 0.0 # Should have some mock reasoning score def test_rewards(): reward_sys = RewardSystem(max_len=1000) history = [{"final_answer": "42"}] # Test diversity drop on repeat d = reward_sys.compute_diversity("42", history) assert d == -1.0 # Normal compute r, comps = reward_sys.compute_reward( correctness=1.0, reasoning_quality=1.0, action_str="step 1: do math. = 42", final_answer="42", history=[], times_seen_problem=0 ) assert r > 0.0 def test_environment_step(): env = AutomathreasonerEnvironment() obs = env.reset() assert obs.problem_text != "" assert obs.difficulty_level > 0 assert len(obs.history) == 0 # Create action where they just pass dummy stuff action = AutomathreasonerAction( reasoning="I am guessing the answer.", final_answer="0" ) obs_after = env.step(action) assert obs_after.reward is not None assert len(obs_after.history) == 1 assert "reward_components" in obs_after.metadata