Spaces:
Build error
Build error
File size: 4,291 Bytes
c29f1fd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | """Optimization task grader."""
from __future__ import annotations
try:
from ..Models import TaskGrade
from ..tasks.catalog import ReviewTask
except ImportError:
from Models import TaskGrade
from tasks.catalog import ReviewTask
from .shared import (
base_grade,
benchmark_candidate,
compile_code,
component_score,
execute_cases,
quality_metrics,
shaped_score,
similarity_score,
summarize_results,
)
def grade_optimization_task(
task: ReviewTask,
code: str,
*,
include_hidden: bool,
timeout_s: float = 3.0,
) -> TaskGrade:
"""Grade an optimization/refactor task with correctness, quality, and runtime."""
compiled, compile_error = compile_code(code)
quality = quality_metrics(code, task.function_name)
details = {
"compile_error": compile_error,
"quality_notes": quality["quality_notes"],
"style_score": quality["style_score"],
"visibility": "full" if include_hidden else "public",
}
if not compiled:
progress = 0.02 + 0.1 * similarity_score(code, task.reference_code)
details["test_results"] = []
details["test_summary"] = "Code does not compile."
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.01),
tests_passed=0,
tests_total=len(task.public_cases) + (len(task.hidden_cases) if include_hidden else 0),
quality_score=component_score(0.01),
runtime_score=component_score(0.01),
timed_out=False,
details=details,
)
cases = task.public_cases + (task.hidden_cases if include_hidden else [])
result = execute_cases(code, task.function_name, cases, timeout_s=timeout_s)
if result.get("timed_out"):
details["test_results"] = []
details["test_summary"] = result["error"]
progress = 0.1 + 0.18 * quality["score"]
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.95),
tests_passed=0,
tests_total=len(cases),
quality_score=quality["score"],
runtime_score=component_score(0.01),
timed_out=True,
details=details,
)
if "error" in result:
details["test_results"] = []
details["test_summary"] = result["error"]
progress = 0.1 + 0.2 * quality["score"]
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.95),
tests_passed=0,
tests_total=len(cases),
quality_score=quality["score"],
runtime_score=component_score(0.01),
timed_out=False,
details=details,
)
data = result["data"]
pass_rate = data["passed"] / max(data["total"], 1)
runtime_score = component_score(0.01)
benchmark_summary = "Benchmark deferred until hidden evaluation."
timed_out = False
if include_hidden and pass_rate == 1.0:
benchmark = benchmark_candidate(task, code, timeout_s=timeout_s)
runtime_score = benchmark["runtime_score"]
timed_out = benchmark.get("timed_out", False)
benchmark_summary = benchmark["details"]
if timed_out:
runtime_score = component_score(0.01)
details["test_results"] = data["results"]
details["test_summary"] = summarize_results("Test results", data["results"])
details["benchmark"] = benchmark_summary
runtime_progress = 0.0 if benchmark_summary == "Benchmark deferred until hidden evaluation." else runtime_score
if include_hidden:
progress = min(1.0, 0.05 + 0.6 * pass_rate + 0.2 * quality["score"] + 0.15 * runtime_progress)
else:
progress = min(1.0, 0.05 + 0.7 * pass_rate + 0.25 * quality["score"])
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.95),
tests_passed=data["passed"],
tests_total=data["total"],
quality_score=quality["score"],
runtime_score=runtime_score,
timed_out=timed_out,
details=details,
)
|