File size: 2,315 Bytes
98fc9b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from env.generator import TaskGenerationEngine
from env.verifier import VerifierSystem
from env.rewards import RewardSystem
from env.environment import AutomathreasonerEnvironment
from env.models import AutomathreasonerAction

def test_generator():
    engine = TaskGenerationEngine()
    
    # Test arithmetic
    prob, diff, ans = engine.generate_arithmetic(complexity=1)
    assert prob and ans
    
    # Test overall generate task
    task = engine.generate_task(target_difficulty_band=2.0)
    assert "problem" in task
    assert "solution" in task
    assert "difficulty" in task

def test_verifier():
    verifier = VerifierSystem()
    
    # Exact match
    assert verifier.check_exact_match("42", "42")
    assert verifier.check_exact_match(" 42 ", "42")
    
    # Numeric tolerance
    assert verifier.check_numeric_tolerance("3.14159", "3.1415")
    assert not verifier.check_numeric_tolerance("4.1415", "3.1415")
    
    # Python execution
    assert verifier.check_python_execution("2 + 2", "4")
    
    # Full verification
    c, q = verifier.verify("Because 2 + 2 is 4", "4", "4")
    assert c == 1.0
    assert q > 0.0  # Should have some mock reasoning score

def test_rewards():
    reward_sys = RewardSystem(max_len=1000)
    history = [{"final_answer": "42"}]
    
    # Test diversity drop on repeat
    d = reward_sys.compute_diversity("42", history)
    assert d == -1.0
    
    # Normal compute
    r, comps = reward_sys.compute_reward(
        correctness=1.0, 
        reasoning_quality=1.0, 
        action_str="step 1: do math. = 42", 
        final_answer="42",
        history=[], 
        times_seen_problem=0
    )
    assert r > 0.0

def test_environment_step():
    env = AutomathreasonerEnvironment()
    obs = env.reset()
    
    assert obs.problem_text != ""
    assert obs.difficulty_level > 0
    assert len(obs.history) == 0
    
    # Create action where they just pass dummy stuff
    action = AutomathreasonerAction(
        reasoning="I am guessing the answer.",
        final_answer="0"
    )
    
    obs_after = env.step(action)
    assert obs_after.reward is not None
    assert len(obs_after.history) == 1
    assert "reward_components" in obs_after.metadata