File size: 4,560 Bytes
a15535e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | """Tests for visible verifier, held-out evaluator, and R-Zero reward functions."""
from forgeenv.tasks.models import ExecutionResult, Task
from forgeenv.training.reward_functions import (
compute_alignment_score,
compute_drift_gen_reward,
compute_repetition_penalty,
compute_uncertainty_reward,
)
from forgeenv.verifier.held_out_evaluator import compute_held_out_scores
from forgeenv.verifier.visible_verifier import compute_visible_reward
SAMPLE_TASK = Task(
task_id="test_001",
description="Test task",
script_content=(
"from transformers import Trainer\n"
"trainer = Trainer()\n"
"trainer.train()\n"
),
difficulty="easy",
)
def test_visible_reward_success():
result = ExecutionResult(
exit_code=0,
stdout="step=1 loss=3.5\nstep=2 loss=2.1\nTRAINING_COMPLETE",
stderr="",
wall_time_ms=1000,
checkpoint_exists=True,
script_content=SAMPLE_TASK.script_content,
)
reward, breakdown = compute_visible_reward(result, SAMPLE_TASK)
assert reward > 0, f"Successful run should have positive reward, got {reward}"
assert breakdown["script_executes"] == 1.0
assert breakdown["loss_decreased"] > 0
def test_visible_reward_failure():
result = ExecutionResult(
exit_code=1,
stdout="",
stderr="Error",
wall_time_ms=100,
script_content=SAMPLE_TASK.script_content,
)
reward, breakdown = compute_visible_reward(result, SAMPLE_TASK)
assert breakdown["script_executes"] == 0.0
assert reward <= 0.0
def test_held_out_success():
result = ExecutionResult(
exit_code=0,
stdout="step=1 loss=3.5\nstep=2 loss=2.1\neval_accuracy=0.78\nTRAINING_COMPLETE",
stderr="",
wall_time_ms=1000,
checkpoint_exists=True,
script_content=SAMPLE_TASK.script_content,
)
scores = compute_held_out_scores(result, SAMPLE_TASK)
assert scores["executed_cleanly"] == 1.0
assert scores["loss_decreased"] > 0
assert scores["hidden_tests_passed"] == 1.0
assert scores["intent_preserved"] == 1.0
def test_held_out_workaround_detection():
"""Bare except wrapping all code should reduce no_forbidden_workarounds."""
result = ExecutionResult(
exit_code=0,
stdout="TRAINING_COMPLETE",
stderr="",
wall_time_ms=100,
checkpoint_exists=True,
script_content="try:\n bad()\nexcept:\n pass\n",
)
scores = compute_held_out_scores(result, SAMPLE_TASK)
assert scores["no_forbidden_workarounds"] < 1.0
def test_uncertainty_peaks_at_half():
r_half = compute_uncertainty_reward([True, False, True, False, True, False])
r_all = compute_uncertainty_reward([True, True, True, True])
r_none = compute_uncertainty_reward([False, False, False, False])
assert r_half > r_all
assert r_half > r_none
assert abs(r_all) < 0.01
assert abs(r_none) < 0.01
def test_uncertainty_handles_empty():
assert compute_uncertainty_reward([]) == 0.0
def test_repetition_penalty_higher_for_duplicates():
batch = [
"rename evaluate to eval_model",
"rename evaluate to eval_model",
"rename evaluate to eval_model",
"change import path for trainer",
]
p_dup = compute_repetition_penalty(batch[0], batch)
p_unique = compute_repetition_penalty(batch[3], batch)
assert p_dup >= p_unique
def test_drift_gen_reward_combines_signals():
"""Composite reward should rise with uncertainty and fall with repetition."""
high_unc_unique = compute_drift_gen_reward(
"unique unique unique tokens",
[True, False, True, False],
["totally different a b c", "unique unique unique tokens"],
)
high_unc_repeated = compute_drift_gen_reward(
"same same same same",
[True, False, True, False],
["same same same same", "same same same same", "same same same same"],
)
assert high_unc_unique >= high_unc_repeated
def test_alignment_score_perfect_correlation():
visible = [0.0, 0.25, 0.5, 0.75, 1.0]
held_out = [0.0, 0.25, 0.5, 0.75, 1.0]
assert compute_alignment_score(visible, held_out) > 0.99
def test_alignment_score_anti_correlation():
visible = [1.0, 0.5, 0.0]
held_out = [0.0, 0.5, 1.0]
assert compute_alignment_score(visible, held_out) < -0.99
def test_alignment_score_constant_returns_zero():
"""No variance in either array → no signal → 0.0."""
assert compute_alignment_score([0.5, 0.5, 0.5], [0.1, 0.9, 0.4]) == 0.0
|