File size: 4,560 Bytes
a15535e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""Tests for visible verifier, held-out evaluator, and R-Zero reward functions."""
from forgeenv.tasks.models import ExecutionResult, Task
from forgeenv.training.reward_functions import (
    compute_alignment_score,
    compute_drift_gen_reward,
    compute_repetition_penalty,
    compute_uncertainty_reward,
)
from forgeenv.verifier.held_out_evaluator import compute_held_out_scores
from forgeenv.verifier.visible_verifier import compute_visible_reward

SAMPLE_TASK = Task(
    task_id="test_001",
    description="Test task",
    script_content=(
        "from transformers import Trainer\n"
        "trainer = Trainer()\n"
        "trainer.train()\n"
    ),
    difficulty="easy",
)


def test_visible_reward_success():
    result = ExecutionResult(
        exit_code=0,
        stdout="step=1 loss=3.5\nstep=2 loss=2.1\nTRAINING_COMPLETE",
        stderr="",
        wall_time_ms=1000,
        checkpoint_exists=True,
        script_content=SAMPLE_TASK.script_content,
    )
    reward, breakdown = compute_visible_reward(result, SAMPLE_TASK)
    assert reward > 0, f"Successful run should have positive reward, got {reward}"
    assert breakdown["script_executes"] == 1.0
    assert breakdown["loss_decreased"] > 0


def test_visible_reward_failure():
    result = ExecutionResult(
        exit_code=1,
        stdout="",
        stderr="Error",
        wall_time_ms=100,
        script_content=SAMPLE_TASK.script_content,
    )
    reward, breakdown = compute_visible_reward(result, SAMPLE_TASK)
    assert breakdown["script_executes"] == 0.0
    assert reward <= 0.0


def test_held_out_success():
    result = ExecutionResult(
        exit_code=0,
        stdout="step=1 loss=3.5\nstep=2 loss=2.1\neval_accuracy=0.78\nTRAINING_COMPLETE",
        stderr="",
        wall_time_ms=1000,
        checkpoint_exists=True,
        script_content=SAMPLE_TASK.script_content,
    )
    scores = compute_held_out_scores(result, SAMPLE_TASK)
    assert scores["executed_cleanly"] == 1.0
    assert scores["loss_decreased"] > 0
    assert scores["hidden_tests_passed"] == 1.0
    assert scores["intent_preserved"] == 1.0


def test_held_out_workaround_detection():
    """Bare except wrapping all code should reduce no_forbidden_workarounds."""
    result = ExecutionResult(
        exit_code=0,
        stdout="TRAINING_COMPLETE",
        stderr="",
        wall_time_ms=100,
        checkpoint_exists=True,
        script_content="try:\n    bad()\nexcept:\n    pass\n",
    )
    scores = compute_held_out_scores(result, SAMPLE_TASK)
    assert scores["no_forbidden_workarounds"] < 1.0


def test_uncertainty_peaks_at_half():
    r_half = compute_uncertainty_reward([True, False, True, False, True, False])
    r_all = compute_uncertainty_reward([True, True, True, True])
    r_none = compute_uncertainty_reward([False, False, False, False])

    assert r_half > r_all
    assert r_half > r_none
    assert abs(r_all) < 0.01
    assert abs(r_none) < 0.01


def test_uncertainty_handles_empty():
    assert compute_uncertainty_reward([]) == 0.0


def test_repetition_penalty_higher_for_duplicates():
    batch = [
        "rename evaluate to eval_model",
        "rename evaluate to eval_model",
        "rename evaluate to eval_model",
        "change import path for trainer",
    ]
    p_dup = compute_repetition_penalty(batch[0], batch)
    p_unique = compute_repetition_penalty(batch[3], batch)
    assert p_dup >= p_unique


def test_drift_gen_reward_combines_signals():
    """Composite reward should rise with uncertainty and fall with repetition."""
    high_unc_unique = compute_drift_gen_reward(
        "unique unique unique tokens",
        [True, False, True, False],
        ["totally different a b c", "unique unique unique tokens"],
    )
    high_unc_repeated = compute_drift_gen_reward(
        "same same same same",
        [True, False, True, False],
        ["same same same same", "same same same same", "same same same same"],
    )
    assert high_unc_unique >= high_unc_repeated


def test_alignment_score_perfect_correlation():
    visible = [0.0, 0.25, 0.5, 0.75, 1.0]
    held_out = [0.0, 0.25, 0.5, 0.75, 1.0]
    assert compute_alignment_score(visible, held_out) > 0.99


def test_alignment_score_anti_correlation():
    visible = [1.0, 0.5, 0.0]
    held_out = [0.0, 0.5, 1.0]
    assert compute_alignment_score(visible, held_out) < -0.99


def test_alignment_score_constant_returns_zero():
    """No variance in either array → no signal → 0.0."""
    assert compute_alignment_score([0.5, 0.5, 0.5], [0.1, 0.9, 0.4]) == 0.0