AutoMathReasoner / tests /test_env.py
Pratap-K's picture
AutoMathReasoner
98fc9b6
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from env.generator import TaskGenerationEngine
from env.verifier import VerifierSystem
from env.rewards import RewardSystem
from env.environment import AutomathreasonerEnvironment
from env.models import AutomathreasonerAction
def test_generator():
engine = TaskGenerationEngine()
# Test arithmetic
prob, diff, ans = engine.generate_arithmetic(complexity=1)
assert prob and ans
# Test overall generate task
task = engine.generate_task(target_difficulty_band=2.0)
assert "problem" in task
assert "solution" in task
assert "difficulty" in task
def test_verifier():
verifier = VerifierSystem()
# Exact match
assert verifier.check_exact_match("42", "42")
assert verifier.check_exact_match(" 42 ", "42")
# Numeric tolerance
assert verifier.check_numeric_tolerance("3.14159", "3.1415")
assert not verifier.check_numeric_tolerance("4.1415", "3.1415")
# Python execution
assert verifier.check_python_execution("2 + 2", "4")
# Full verification
c, q = verifier.verify("Because 2 + 2 is 4", "4", "4")
assert c == 1.0
assert q > 0.0 # Should have some mock reasoning score
def test_rewards():
reward_sys = RewardSystem(max_len=1000)
history = [{"final_answer": "42"}]
# Test diversity drop on repeat
d = reward_sys.compute_diversity("42", history)
assert d == -1.0
# Normal compute
r, comps = reward_sys.compute_reward(
correctness=1.0,
reasoning_quality=1.0,
action_str="step 1: do math. = 42",
final_answer="42",
history=[],
times_seen_problem=0
)
assert r > 0.0
def test_environment_step():
env = AutomathreasonerEnvironment()
obs = env.reset()
assert obs.problem_text != ""
assert obs.difficulty_level > 0
assert len(obs.history) == 0
# Create action where they just pass dummy stuff
action = AutomathreasonerAction(
reasoning="I am guessing the answer.",
final_answer="0"
)
obs_after = env.step(action)
assert obs_after.reward is not None
assert len(obs_after.history) == 1
assert "reward_components" in obs_after.metadata