python_env / graders /syntax.py
uvpatel7271's picture
Upload folder using huggingface_hub
c8e832f verified
"""Task graders for syntax and bug-fix tasks."""
from __future__ import annotations
from graders.common import clamp_score, compiles, normalized_diff_score, style_score, syntax_error_message
from graders.optimization import grade_optimization_task
from graders.pytest_runner import run_pytest_suite
from models import TaskGrade
from tasks.task_bank import TaskSpec
def grade_syntax_task(candidate_code: str, task: TaskSpec) -> TaskGrade:
"""Grade syntax repair tasks with partial credit for progress toward the reference."""
error = syntax_error_message(candidate_code)
diff_score = normalized_diff_score(candidate_code, task.reference_code)
style_base = style_score(candidate_code, task.style_max_line_length)
if not error:
return TaskGrade(
score=1.0,
syntax_score=1.0,
quality_score=style_base,
details={"compile_error": ""},
)
partial = clamp_score(0.15 + (0.55 * diff_score))
return TaskGrade(
score=partial,
syntax_score=0.0,
quality_score=diff_score * style_base,
details={"compile_error": error},
)
def grade_bug_fix_task(candidate_code: str, task: TaskSpec, include_hidden: bool = True) -> TaskGrade:
"""Grade logic bug tasks with pytest pass fraction."""
if not compiles(candidate_code):
error = syntax_error_message(candidate_code)
return TaskGrade(score=0.0, syntax_score=0.0, details={"compile_error": error})
tests = list(task.visible_tests)
if include_hidden:
tests.extend(task.hidden_tests)
execution = run_pytest_suite(candidate_code, tests, timeout_s=3.0)
if execution.timed_out:
return TaskGrade(
score=0.0,
syntax_score=1.0,
tests_passed=execution.passed,
tests_total=execution.total,
timed_out=True,
details={"compile_error": "", "tests": execution.output},
)
pass_fraction = execution.passed / execution.total if execution.total else 0.0
quality = style_score(candidate_code, task.style_max_line_length)
return TaskGrade(
score=clamp_score(pass_fraction),
syntax_score=1.0,
tests_passed=execution.passed,
tests_total=execution.total,
quality_score=quality,
details={"compile_error": "", "tests": execution.output},
)
def grade_task(candidate_code: str, task: TaskSpec, include_hidden: bool = True) -> TaskGrade:
"""Dispatch to the correct deterministic grader for one task."""
if task.task_kind == "syntax_fix":
return grade_syntax_task(candidate_code, task)
if task.task_kind == "bug_fix":
return grade_bug_fix_task(candidate_code, task, include_hidden=include_hidden)
return grade_optimization_task(candidate_code, task)