Spaces:

ArchCoder
/

Openenv

Sleeping

App Files Files Community

Priyansh Saxena commited on 9 days ago

Commit

b0fdd8b

1 Parent(s): 1700927

test: assert task scores stay in (0,1)

Browse files

Files changed (1) hide show

tests/test_task_score_bounds.py +67 -0

tests/test_task_score_bounds.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import pytest
+from src.pytorch_debug_env.bug_library import BUG_TEMPLATES
+from src.pytorch_debug_env.environment import PyTorchDebugEnv
+from src.pytorch_debug_env.graders import grade_easy, grade_medium, grade_hard
+from src.pytorch_debug_env.models import FinalDiagnosis, Hypothesis, PyTorchDebugAction
+from src.pytorch_debug_env.scenario_generator import ScenarioGenerator
+def _build_action_from_gt(gt: dict) -> PyTorchDebugAction:
+    hypothesis = Hypothesis(
+        bug_type=gt["bug_type"],
+        affected_file=gt["primary_bug_file"],
+        confidence=0.9,
+    )
+    final = FinalDiagnosis(
+        bug_type=gt["bug_type"],
+        affected_file=gt["primary_bug_file"],
+        line_range=gt["line_range"],
+        fix_strategy=gt["fix_strategy"],
+        confidence=0.9,
+    )
+    return PyTorchDebugAction(
+        current_hypothesis=hypothesis,
+        commit_diagnosis=True,
+        final_diagnosis=final,
+    )
+@pytest.mark.parametrize(
+    "task_id,grader",
+    [
+        ("easy", grade_easy),
+        ("medium", grade_medium),
+        ("hard", grade_hard),
+    ],
+)
+@pytest.mark.asyncio
+async def test_task_scores_strict_bounds(task_id, grader):
+    env = PyTorchDebugEnv(generator=ScenarioGenerator(BUG_TEMPLATES))
+    await env.reset(task_id, seed=7)
+    scenario = env.runtime.scenario
+    action = _build_action_from_gt(scenario.ground_truth)
+    score = grader(action.final_diagnosis.model_dump(), scenario.ground_truth)
+    assert 0.0 < score < 1.0
+    result = await env.step(action)
+    assert 0.0 < result["reward"] < 1.0
+    state = await env.state()
+    assert 0.0 < state.final_score < 1.0
+@pytest.mark.parametrize(
+    "grader",
+    [grade_easy, grade_medium, grade_hard],
+)
+def test_empty_action_is_clamped(grader):
+    gt = {
+        "bug_type": "missing_zero_grad",
+        "primary_bug_file": "train.py",
+        "related_files": [],
+        "line_range": [10, 12],
+        "fix_strategy": "Call optimizer.zero_grad() before loss.backward()",
+    }
+    score = grader({}, gt)
+    assert 0.0 < score < 1.0