""" grader.py (Task 3 – Rule Checker) ------------------------------------ Deterministic grader for function-identification submissions. Grade table ─────────── 1 → submitted function is the exact target (case-insensitive) 0.50 → submitted function is a direct internal subfunction of the target 0.001 → anything else """ import json from math import exp from typing import Dict, Any class Task3Grader: """ Grades a Task 3 submit_function submission. Parameters ---------- target_function : dict with at least 'name' and 'code' keys property_specification : the property the target function violates """ REWARD_CORRECT = 1 REWARD_PARTIAL = 0.5 REWARD_WRONG = 0.001 def __init__(self, target_function: Dict[str, Any], property_specification: Dict | str, max_steps: int) -> None: self.target_function = target_function self.property_specification = property_specification self.max_steps = max_steps self._decay = 0.01 def _clamp(self, reward: float) -> float: return max(0.001, min(0.999, reward)) def grade(self, submitted_function: str, steps: int, cummulative_cost: int) -> float: """Returns deterministic grade strictly in (0, 1).""" norm = submitted_function.strip().lower() reward = self.REWARD_WRONG if norm == self.target_function["name"].strip().lower(): reward = self.REWARD_CORRECT elif norm in self.target_function.get("code", "").strip().lower(): reward = self.REWARD_PARTIAL penalty = self._decay ** (-(steps * cummulative_cost) / self.max_steps) return self._clamp(reward * penalty) def get_canonical_answer(self) -> Dict[str, Dict | str]: """For debugging / logging only — do not expose to the agent.""" return { "target_function": self.target_function, "property_specification": json.dumps(self.property_specification) if isinstance(self.property_specification, dict) else self.property_specification, }