Spaces:
Runtime error
Runtime error
File size: 4,169 Bytes
0695520 692f802 0695520 692f802 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 5d806ad 0695520 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | """Optimization task grader."""
from __future__ import annotations
try:
from ..models import TaskGrade
from ..tasks.catalog import ReviewTask
except ImportError:
from models import TaskGrade
from tasks.catalog import ReviewTask
from .shared import (
base_grade,
benchmark_candidate,
compile_code,
component_score,
execute_cases,
quality_metrics,
shaped_score,
similarity_score,
summarize_results,
)
def grade_optimization_task(
task: ReviewTask,
code: str,
*,
include_hidden: bool,
timeout_s: float = 3.0,
) -> TaskGrade:
"""Grade an optimization/refactor task with correctness, quality, and runtime."""
compiled, compile_error = compile_code(code)
quality = quality_metrics(code, task.function_name)
details = {
"compile_error": compile_error,
"quality_notes": quality["quality_notes"],
"style_score": quality["style_score"],
"visibility": "full" if include_hidden else "public",
}
if not compiled:
progress = 0.02 + 0.1 * similarity_score(code, task.reference_code)
details["test_results"] = []
details["test_summary"] = "Code does not compile."
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.01),
tests_passed=0,
tests_total=len(task.public_cases) + (len(task.hidden_cases) if include_hidden else 0),
quality_score=component_score(0.01),
runtime_score=component_score(0.01),
timed_out=False,
details=details,
)
cases = task.public_cases + (task.hidden_cases if include_hidden else [])
result = execute_cases(code, task.function_name, cases, timeout_s=timeout_s)
if result.get("timed_out"):
details["test_results"] = []
details["test_summary"] = result["error"]
progress = 0.1 + 0.18 * quality["score"]
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.95),
tests_passed=0,
tests_total=len(cases),
quality_score=quality["score"],
runtime_score=component_score(0.01),
timed_out=True,
details=details,
)
if "error" in result:
details["test_results"] = []
details["test_summary"] = result["error"]
progress = 0.1 + 0.2 * quality["score"]
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.95),
tests_passed=0,
tests_total=len(cases),
quality_score=quality["score"],
runtime_score=component_score(0.01),
timed_out=False,
details=details,
)
data = result["data"]
pass_rate = data["passed"] / max(data["total"], 1)
runtime_score = component_score(0.01)
benchmark_summary = "Benchmark deferred until hidden evaluation."
timed_out = False
if include_hidden and pass_rate == 1.0:
benchmark = benchmark_candidate(task, code, timeout_s=timeout_s)
runtime_score = benchmark["runtime_score"]
timed_out = benchmark.get("timed_out", False)
benchmark_summary = benchmark["details"]
if timed_out:
runtime_score = component_score(0.01)
details["test_results"] = data["results"]
details["test_summary"] = summarize_results("Test results", data["results"])
details["benchmark"] = benchmark_summary
runtime_progress = 0.0 if benchmark_summary == "Benchmark deferred until hidden evaluation." else runtime_score
if include_hidden:
progress = min(1.0, 0.05 + 0.6 * pass_rate + 0.2 * quality["score"] + 0.15 * runtime_progress)
else:
progress = min(1.0, 0.05 + 0.7 * pass_rate + 0.25 * quality["score"])
return base_grade(
score=shaped_score(progress),
syntax_score=component_score(0.95),
tests_passed=data["passed"],
tests_total=data["total"],
quality_score=quality["score"],
runtime_score=runtime_score,
timed_out=timed_out,
details=details,
)
|