python_env / tests /test_environment.py
uvpatel7271's picture
Upload folder using huggingface_hub
c8e832f verified
from models import PythonCodeReviewAction
from server.env import PythonCodeReviewEnvironment
def test_reset_cycles_tasks_in_order():
env = PythonCodeReviewEnvironment()
first = env.reset()
second = env.reset()
third = env.reset()
assert first.task_id == "syntax-fix-easy"
assert second.task_id == "bug-fix-medium"
assert third.task_id == "optimization-hard"
def test_invalid_edit_code_penalizes_action():
env = PythonCodeReviewEnvironment()
env.reset(task_id="syntax-fix-easy")
observation = env.step(PythonCodeReviewAction(action_type="edit_code", code=""))
assert observation.reward < 0
assert observation.reward_details.invalid_action_penalty == 0.1
assert "requires code" in observation.last_action_status
def test_easy_task_gets_full_score_after_fix():
env = PythonCodeReviewEnvironment()
env.reset(task_id="syntax-fix-easy")
env.step(
PythonCodeReviewAction(
action_type="edit_code",
code="""def normalize_username(raw_name: str) -> str:
cleaned = raw_name.strip().lower()
if not cleaned:
return "anonymous"
return cleaned.replace(" ", "_")
""",
)
)
observation = env.step(PythonCodeReviewAction(action_type="submit_solution"))
assert observation.done is True
assert observation.score == 1.0
def test_medium_task_reports_partial_visible_progress():
env = PythonCodeReviewEnvironment()
env.reset(task_id="bug-fix-medium")
observation = env.step(PythonCodeReviewAction(action_type="run_tests"))
assert observation.score < 1.0
assert "visible checks" in observation.test_results
def test_hard_task_reference_solution_scores_high():
env = PythonCodeReviewEnvironment()
env.reset(task_id="optimization-hard")
env.step(
PythonCodeReviewAction(
action_type="edit_code",
code="""from collections import Counter
from typing import Iterable
def summarize_user_activity(events: Iterable[dict]) -> list[tuple[str, int]]:
\"\"\"Aggregate user activity counts in one pass.\"\"\"
counts = Counter(event["user_id"] for event in events)
return sorted(counts.items(), key=lambda item: (-item[1], item[0]))
""",
)
)
observation = env.step(PythonCodeReviewAction(action_type="submit_solution"))
assert observation.done is True
assert observation.score >= 0.9