Spaces:
Running
Running
ajaxwin commited on
Commit Β·
1b91307
1
Parent(s): 2ee85c9
task3 evalutated
Browse files- agents/task3.py +16 -6
- data/contracts.json +42 -7
- eval.py +1 -1
- tasks/task3/environment.py +5 -10
- tasks/task3/grader.py +10 -15
agents/task3.py
CHANGED
|
@@ -43,21 +43,31 @@ def oracle_t3(env: Task3Environment, seed: int, verbose: bool = False) -> Dict[s
|
|
| 43 |
"cumulative_reward": result.observation.cumulative_reward}
|
| 44 |
|
| 45 |
|
| 46 |
-
def subfunction_t3(env: Task3Environment, seed: int) -> Dict[str, Any]:
|
| 47 |
"""Submits the first partial-credit subfunction if one exists, else 'constructor'."""
|
| 48 |
r = env.reset(seed=seed)
|
| 49 |
obs = r.observation
|
| 50 |
contracts = load_contracts()
|
| 51 |
-
|
| 52 |
for c in contracts:
|
| 53 |
if c["contract_name"] == obs.contract_name:
|
| 54 |
-
|
| 55 |
-
if fn:
|
| 56 |
-
partial_fns = fn.get("task3", {}).get("partial_credit_functions", [])
|
| 57 |
break
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
|
| 60 |
params={"function_name": submit_name}))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
v = result.reward.value
|
| 62 |
score = 1.0 if v >= 4.9 else (0.3 if v >= 1.0 else 0.0)
|
| 63 |
return {"seed": seed, "grader_score": score, "submitted": submit_name,
|
|
|
|
| 43 |
"cumulative_reward": result.observation.cumulative_reward}
|
| 44 |
|
| 45 |
|
| 46 |
+
def subfunction_t3(env: Task3Environment, seed: int, verbose: bool = False) -> Dict[str, Any]:
|
| 47 |
"""Submits the first partial-credit subfunction if one exists, else 'constructor'."""
|
| 48 |
r = env.reset(seed=seed)
|
| 49 |
obs = r.observation
|
| 50 |
contracts = load_contracts()
|
| 51 |
+
target_contract = {}
|
| 52 |
for c in contracts:
|
| 53 |
if c["contract_name"] == obs.contract_name:
|
| 54 |
+
target_contract = c
|
|
|
|
|
|
|
| 55 |
break
|
| 56 |
+
|
| 57 |
+
submit_name = "constructor"
|
| 58 |
+
if (target_contract and "call_graph" in target_contract and env.state().target_function in
|
| 59 |
+
target_contract["call_graph"] and target_contract["call_graph"][env.state().target_function]):
|
| 60 |
+
submit_name = target_contract["call_graph"][env.state().target_function][0]
|
| 61 |
+
|
| 62 |
+
|
| 63 |
result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
|
| 64 |
params={"function_name": submit_name}))
|
| 65 |
+
if verbose:
|
| 66 |
+
prop = obs.extra.get("property_english", "")[:60]
|
| 67 |
+
print(f" {obs.contract_name}.{env.state().target_function}() \"{prop}\"")
|
| 68 |
+
print(f" Submitting subfunction: {submit_name}")
|
| 69 |
+
print(f" Reward received: {result.reward.value}")
|
| 70 |
+
|
| 71 |
v = result.reward.value
|
| 72 |
score = 1.0 if v >= 4.9 else (0.3 if v >= 1.0 else 0.0)
|
| 73 |
return {"seed": seed, "grader_score": score, "submitted": submit_name,
|
data/contracts.json
CHANGED
|
@@ -4306,7 +4306,12 @@
|
|
| 4306 |
"mitigation": "Add a check to ensure that the scaled amount to burn is greater than zero before performing the burn and transfer, or avoid transfer when the burned amount is zero."
|
| 4307 |
},
|
| 4308 |
"property": "The total assets of a user should decrease exactly by the amount of underlying withdrawn. Rounding should not allow a user to receive underlying without a corresponding decrease in AToken balance.",
|
| 4309 |
-
"property_specification":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4310 |
},
|
| 4311 |
{
|
| 4312 |
"name": "mint",
|
|
@@ -5046,7 +5051,12 @@
|
|
| 5046 |
"mitigation": "Add a check to ensure that the amount after conversion to ray is greater than zero before minting, or avoid minting when the minted amount would be zero."
|
| 5047 |
},
|
| 5048 |
"property": "When a user borrows (mints debt), their debt balance should increase exactly by the borrowed amount (plus accrued interest). Rounding should not allow a user to receive underlying without a corresponding increase in debt.",
|
| 5049 |
-
"property_specification":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5050 |
},
|
| 5051 |
{
|
| 5052 |
"name": "burn",
|
|
@@ -5519,7 +5529,12 @@
|
|
| 5519 |
"mitigation": "Add require(owner != address(0), 'INVALID_OWNER');"
|
| 5520 |
},
|
| 5521 |
"property": "The contract must have a non-zero owner to allow administrative functions to be callable.",
|
| 5522 |
-
"property_specification":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5523 |
},
|
| 5524 |
{
|
| 5525 |
"name": "deposit",
|
|
@@ -5553,7 +5568,12 @@
|
|
| 5553 |
"mitigation": "Fixed in PR#82 merged in commit 385b397."
|
| 5554 |
},
|
| 5555 |
"property": "After a deposit, the vault should remain in a consistent state where subsequent operations do not revert due to internal accounting mismatches.",
|
| 5556 |
-
"property_specification":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5557 |
},
|
| 5558 |
{
|
| 5559 |
"name": "depositATokens",
|
|
@@ -5587,7 +5607,12 @@
|
|
| 5587 |
"mitigation": "Fixed in PR#80, merged in commit 34ad6e3."
|
| 5588 |
},
|
| 5589 |
"property": "Depositing aTokens (which already represent Aave positions) should not be limited by the Aave pool's supply cap because it does not increase net supply to the pool.",
|
| 5590 |
-
"property_specification":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5591 |
},
|
| 5592 |
{
|
| 5593 |
"name": "depositWithSig",
|
|
@@ -5870,7 +5895,12 @@
|
|
| 5870 |
"mitigation": "Fixed in PR#86 merged in commit 385b397."
|
| 5871 |
},
|
| 5872 |
"property": "After any deposit or withdrawal, lastVaultBalance should equal ATOKEN.balanceOf(vault) to ensure accurate fee calculation on future yield.",
|
| 5873 |
-
"property_specification":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5874 |
},
|
| 5875 |
{
|
| 5876 |
"name": "withdrawATokens",
|
|
@@ -6422,7 +6452,12 @@
|
|
| 6422 |
"mitigation": "Fixed in PR#82 merged in commit 385b397."
|
| 6423 |
},
|
| 6424 |
"property": "All yield generated by the vault, including gifts, should be subject to fee accrual before fees are withdrawn.",
|
| 6425 |
-
"property_specification":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6426 |
},
|
| 6427 |
{
|
| 6428 |
"name": "claimRewards",
|
|
|
|
| 4306 |
"mitigation": "Add a check to ensure that the scaled amount to burn is greater than zero before performing the burn and transfer, or avoid transfer when the burned amount is zero."
|
| 4307 |
},
|
| 4308 |
"property": "The total assets of a user should decrease exactly by the amount of underlying withdrawn. Rounding should not allow a user to receive underlying without a corresponding decrease in AToken balance.",
|
| 4309 |
+
"property_specification": {
|
| 4310 |
+
"precondition": "User has AToken balance B, underlying asset balance of AToken contract is sufficient.",
|
| 4311 |
+
"operation": "burn(user, receiver, amount, index) where amount > 0 but amount.rayDiv(index) == 0 due to rounding.",
|
| 4312 |
+
"postcondition": "User's AToken balance decreases by the scaled amount (which is zero) and underlying amount is transferred, so user's net position decreases by amount of underlying.",
|
| 4313 |
+
"actual": "User's AToken balance does not change (burn of zero), but user receives underlying amount, resulting in a net gain of underlying without any reduction in AToken balance, violating the expected invariant that AToken burn should correspond to underlying transfer."
|
| 4314 |
+
}
|
| 4315 |
},
|
| 4316 |
{
|
| 4317 |
"name": "mint",
|
|
|
|
| 5051 |
"mitigation": "Add a check to ensure that the amount after conversion to ray is greater than zero before minting, or avoid minting when the minted amount would be zero."
|
| 5052 |
},
|
| 5053 |
"property": "When a user borrows (mints debt), their debt balance should increase exactly by the borrowed amount (plus accrued interest). Rounding should not allow a user to receive underlying without a corresponding increase in debt.",
|
| 5054 |
+
"property_specification": {
|
| 5055 |
+
"precondition": "User has debt balance D, total supply S",
|
| 5056 |
+
"operation": "mint(user, amount, rate) where amount > 0 but amount.wadToRay() == 0 due to rounding.",
|
| 5057 |
+
"postcondition": "User's debt balance increases by amount (scaled appropriately)",
|
| 5058 |
+
"actual": "User's debt balance does not increase (mint of zero), but the LendingPool would transfer underlying to the user, resulting in a net gain without debt increase, violating the invariant that debt minting should correspond to underlying received."
|
| 5059 |
+
}
|
| 5060 |
},
|
| 5061 |
{
|
| 5062 |
"name": "burn",
|
|
|
|
| 5529 |
"mitigation": "Add require(owner != address(0), 'INVALID_OWNER');"
|
| 5530 |
},
|
| 5531 |
"property": "The contract must have a non-zero owner to allow administrative functions to be callable.",
|
| 5532 |
+
"property_specification": {
|
| 5533 |
+
"precondition": "Owner address can be any address.",
|
| 5534 |
+
"operation": "initialize(owner, ...) where owner == address(0).",
|
| 5535 |
+
"postcondition": "The contract owner is address(0) and administrative functions are permanently locked.",
|
| 5536 |
+
"actual": "The owner becomes zero address, violating the invariant that owner must be a valid address capable of executing privileged functions."
|
| 5537 |
+
}
|
| 5538 |
},
|
| 5539 |
{
|
| 5540 |
"name": "deposit",
|
|
|
|
| 5568 |
"mitigation": "Fixed in PR#82 merged in commit 385b397."
|
| 5569 |
},
|
| 5570 |
"property": "After a deposit, the vault should remain in a consistent state where subsequent operations do not revert due to internal accounting mismatches.",
|
| 5571 |
+
"property_specification": {
|
| 5572 |
+
"precondition": "Vault state is consistent, lastVaultBalance equals ATOKEN.balanceOf(vault).",
|
| 5573 |
+
"operation": "deposit(assets) where assets is not a multiple of liquidity index.",
|
| 5574 |
+
"postcondition": "Vault remains consistent, lastVaultBalance == ATOKEN.balanceOf(vault) after deposit.",
|
| 5575 |
+
"actual": "lastVaultBalance is incremented by assets, but ATOKEN.balanceOf increases by a different amount due to rounding, leading to inconsistency that causes future accrueYield() calls to revert."
|
| 5576 |
+
}
|
| 5577 |
},
|
| 5578 |
{
|
| 5579 |
"name": "depositATokens",
|
|
|
|
| 5607 |
"mitigation": "Fixed in PR#80, merged in commit 34ad6e3."
|
| 5608 |
},
|
| 5609 |
"property": "Depositing aTokens (which already represent Aave positions) should not be limited by the Aave pool's supply cap because it does not increase net supply to the pool.",
|
| 5610 |
+
"property_specification": {
|
| 5611 |
+
"precondition": "Aave pool has a supply cap, and the total supplied is near the cap. The vault holds aTokens whose underlying value would push the cap over if deposited as underlying.",
|
| 5612 |
+
"operation": "depositATokens(assets) with assets amount that converts to underlying value exceeding remaining cap.",
|
| 5613 |
+
"postcondition": "Deposit should succeed because no new underlying is supplied to Aave.",
|
| 5614 |
+
"actual": "The function reverts due to the supply cap check, preventing legitimate aToken deposits."
|
| 5615 |
+
}
|
| 5616 |
},
|
| 5617 |
{
|
| 5618 |
"name": "depositWithSig",
|
|
|
|
| 5895 |
"mitigation": "Fixed in PR#86 merged in commit 385b397."
|
| 5896 |
},
|
| 5897 |
"property": "After any deposit or withdrawal, lastVaultBalance should equal ATOKEN.balanceOf(vault) to ensure accurate fee calculation on future yield.",
|
| 5898 |
+
"property_specification": {
|
| 5899 |
+
"precondition": "lastVaultBalance = ATOKEN.balanceOf(vault) = B.",
|
| 5900 |
+
"operation": "withdraw(assets) where assets is not a multiple of the liquidity index.",
|
| 5901 |
+
"postcondition": "lastVaultBalance' = ATOKEN.balanceOf(vault)' = B - assets_actual.",
|
| 5902 |
+
"actual": "lastVaultBalance is decreased by assets (exact), but ATOKEN.balanceOf decreases by a different amount due to rounding, causing a mismatch that leads to fee miscalculation."
|
| 5903 |
+
}
|
| 5904 |
},
|
| 5905 |
{
|
| 5906 |
"name": "withdrawATokens",
|
|
|
|
| 6452 |
"mitigation": "Fixed in PR#82 merged in commit 385b397."
|
| 6453 |
},
|
| 6454 |
"property": "All yield generated by the vault, including gifts, should be subject to fee accrual before fees are withdrawn.",
|
| 6455 |
+
"property_specification": {
|
| 6456 |
+
"precondition": "Vault has lastUpdated = T (current block), and some user gifts aTokens to the vault in the same block after a transaction that set lastUpdated.",
|
| 6457 |
+
"operation": "withdrawFees() called after the gift.",
|
| 6458 |
+
"postcondition": "The gift amount is included in new yield and fees are charged on it.",
|
| 6459 |
+
"actual": "getClaimableFees() returns accumulatedFees from before the gift because block.timestamp == lastUpdated, so no new fees are calculated, allowing the gift to bypass fee charges."
|
| 6460 |
+
}
|
| 6461 |
},
|
| 6462 |
{
|
| 6463 |
"name": "claimRewards",
|
eval.py
CHANGED
|
@@ -182,7 +182,7 @@ def run_task3_eval(n: int, seed_offset: int, verbose: bool) -> Dict[str, Any]:
|
|
| 182 |
|
| 183 |
# Subfunction (partial credit)
|
| 184 |
print("\nβΆ Subfunction (partial-credit callee if exists, else constructor):")
|
| 185 |
-
sub_eps = [subfunction_t3(env, seed_offset + i) for i in range(n)]
|
| 186 |
sub_avg = _avg(sub_eps)
|
| 187 |
submitted_sub = list({e.get("submitted", "?") for e in sub_eps})
|
| 188 |
print(f" Subfunction avg: {sub_avg:.3f} submitted: {submitted_sub}")
|
|
|
|
| 182 |
|
| 183 |
# Subfunction (partial credit)
|
| 184 |
print("\nβΆ Subfunction (partial-credit callee if exists, else constructor):")
|
| 185 |
+
sub_eps = [subfunction_t3(env, seed_offset + i, verbose) for i in range(n)]
|
| 186 |
sub_avg = _avg(sub_eps)
|
| 187 |
submitted_sub = list({e.get("submitted", "?") for e in sub_eps})
|
| 188 |
print(f" Subfunction avg: {sub_avg:.3f} submitted: {submitted_sub}")
|
tasks/task3/environment.py
CHANGED
|
@@ -83,8 +83,6 @@ class Task3Environment(BaseEnv):
|
|
| 83 |
self._seen: Set[str] = set()
|
| 84 |
|
| 85 |
# ββ OpenEnv interface βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 86 |
-
|
| 87 |
-
# ! Need to change alot here
|
| 88 |
|
| 89 |
def reset(self, seed: Optional[int] = None) -> ResetResult:
|
| 90 |
if seed is not None:
|
|
@@ -93,14 +91,12 @@ class Task3Environment(BaseEnv):
|
|
| 93 |
self._contract, self._target_fn = sample_task3_episode(
|
| 94 |
self._contracts, self._rng
|
| 95 |
)
|
| 96 |
-
t3 = self._target_fn["task3"]
|
| 97 |
self._grader = Task3Grader(
|
| 98 |
-
target_function=self._target_fn
|
| 99 |
-
|
| 100 |
-
property_english=t3.get("property_english", ""), # ! doesn't exist
|
| 101 |
)
|
| 102 |
self._step_count = 0
|
| 103 |
-
self._cum_reward = 0.0
|
| 104 |
self._done = False
|
| 105 |
self._submitted = False
|
| 106 |
self._query_hist = []
|
|
@@ -111,7 +107,7 @@ class Task3Environment(BaseEnv):
|
|
| 111 |
last_result=(
|
| 112 |
f"New episode started.\n"
|
| 113 |
f"Contract : {self._contract['contract_name']}\n\n"
|
| 114 |
-
f"Property : {
|
| 115 |
f"Find the function in this contract that violates the property above.\n"
|
| 116 |
f"Use list_functions then get_function_code to investigate.\n"
|
| 117 |
f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
|
|
@@ -154,7 +150,6 @@ class Task3Environment(BaseEnv):
|
|
| 154 |
# ββ Internal helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 155 |
|
| 156 |
def _build_obs(self, last_action: Optional[str], last_result: str) -> Observation:
|
| 157 |
-
t3 = self._target_fn.get("task3", {})
|
| 158 |
return Observation(
|
| 159 |
task_id=TASK_ID,
|
| 160 |
contract_name=self._contract.get("contract_name", ""),
|
|
@@ -166,7 +161,7 @@ class Task3Environment(BaseEnv):
|
|
| 166 |
cumulative_reward=self._cum_reward,
|
| 167 |
done=self._done,
|
| 168 |
extra={
|
| 169 |
-
"property_english":
|
| 170 |
"solidity_version": self._contract.get("metadata", {}).get("solidity_version", ""),
|
| 171 |
"hint": (
|
| 172 |
"Read the property, then inspect function code to find which one violates it. "
|
|
|
|
| 83 |
self._seen: Set[str] = set()
|
| 84 |
|
| 85 |
# ββ OpenEnv interface βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
| 86 |
|
| 87 |
def reset(self, seed: Optional[int] = None) -> ResetResult:
|
| 88 |
if seed is not None:
|
|
|
|
| 91 |
self._contract, self._target_fn = sample_task3_episode(
|
| 92 |
self._contracts, self._rng
|
| 93 |
)
|
|
|
|
| 94 |
self._grader = Task3Grader(
|
| 95 |
+
target_function=self._target_fn,
|
| 96 |
+
property_specification=self._target_fn.get("property_specification", "")
|
|
|
|
| 97 |
)
|
| 98 |
self._step_count = 0
|
| 99 |
+
self._cum_reward = 0.0
|
| 100 |
self._done = False
|
| 101 |
self._submitted = False
|
| 102 |
self._query_hist = []
|
|
|
|
| 107 |
last_result=(
|
| 108 |
f"New episode started.\n"
|
| 109 |
f"Contract : {self._contract['contract_name']}\n\n"
|
| 110 |
+
f"Property : {self._target_fn.get('property', '')}\n\n"
|
| 111 |
f"Find the function in this contract that violates the property above.\n"
|
| 112 |
f"Use list_functions then get_function_code to investigate.\n"
|
| 113 |
f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
|
|
|
|
| 150 |
# ββ Internal helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 151 |
|
| 152 |
def _build_obs(self, last_action: Optional[str], last_result: str) -> Observation:
|
|
|
|
| 153 |
return Observation(
|
| 154 |
task_id=TASK_ID,
|
| 155 |
contract_name=self._contract.get("contract_name", ""),
|
|
|
|
| 161 |
cumulative_reward=self._cum_reward,
|
| 162 |
done=self._done,
|
| 163 |
extra={
|
| 164 |
+
"property_english": self._target_fn.get("property", ""),
|
| 165 |
"solidity_version": self._contract.get("metadata", {}).get("solidity_version", ""),
|
| 166 |
"hint": (
|
| 167 |
"Read the property, then inspect function code to find which one violates it. "
|
tasks/task3/grader.py
CHANGED
|
@@ -17,7 +17,8 @@ Reward table (ONE submission per episode)
|
|
| 17 |
"""
|
| 18 |
|
| 19 |
from __future__ import annotations
|
| 20 |
-
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
class Task3Grader:
|
|
@@ -29,7 +30,6 @@ class Task3Grader:
|
|
| 29 |
target_function : exact name of the rule-breaking function
|
| 30 |
partial_credit_functions: list of internal functions that get partial credit
|
| 31 |
(direct callees of the target that are contract functions)
|
| 32 |
-
property_english : the English property text (for feedback messages)
|
| 33 |
"""
|
| 34 |
|
| 35 |
SCORE_CORRECT = 1.0
|
|
@@ -40,22 +40,16 @@ class Task3Grader:
|
|
| 40 |
REWARD_PARTIAL = 1.5
|
| 41 |
REWARD_WRONG = -1.5
|
| 42 |
|
| 43 |
-
def __init__(
|
| 44 |
-
self
|
| 45 |
-
|
| 46 |
-
partial_credit_functions: List[str],
|
| 47 |
-
property_english: str = "",
|
| 48 |
-
) -> None:
|
| 49 |
-
self.target_function = target_function.lower()
|
| 50 |
-
self.partial_credit_functions = [f.lower() for f in partial_credit_functions]
|
| 51 |
-
self.property_english = property_english
|
| 52 |
|
| 53 |
def grade(self, submitted_function: str) -> float:
|
| 54 |
"""Returns deterministic score in {0.0, 0.3, 1.0}."""
|
| 55 |
norm = submitted_function.strip().lower()
|
| 56 |
-
if norm == self.target_function:
|
| 57 |
return self.SCORE_CORRECT
|
| 58 |
-
if norm in self.
|
| 59 |
return self.SCORE_PARTIAL
|
| 60 |
return self.SCORE_WRONG
|
| 61 |
|
|
@@ -72,9 +66,10 @@ class Task3Grader:
|
|
| 72 |
score = self.grade(submitted_function)
|
| 73 |
return score, self.reward_for_score(score)
|
| 74 |
|
| 75 |
-
def get_canonical_answer(self) -> Dict[str,
|
| 76 |
"""For debugging / logging only β do not expose to the agent."""
|
| 77 |
return {
|
| 78 |
"target_function": self.target_function,
|
| 79 |
-
"
|
|
|
|
| 80 |
}
|
|
|
|
| 17 |
"""
|
| 18 |
|
| 19 |
from __future__ import annotations
|
| 20 |
+
import json
|
| 21 |
+
from typing import Dict, Any
|
| 22 |
|
| 23 |
|
| 24 |
class Task3Grader:
|
|
|
|
| 30 |
target_function : exact name of the rule-breaking function
|
| 31 |
partial_credit_functions: list of internal functions that get partial credit
|
| 32 |
(direct callees of the target that are contract functions)
|
|
|
|
| 33 |
"""
|
| 34 |
|
| 35 |
SCORE_CORRECT = 1.0
|
|
|
|
| 40 |
REWARD_PARTIAL = 1.5
|
| 41 |
REWARD_WRONG = -1.5
|
| 42 |
|
| 43 |
+
def __init__(self, target_function: Dict[str, Any], property_specification: Dict | str) -> None:
|
| 44 |
+
self.target_function = target_function
|
| 45 |
+
self.property_specification = property_specification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
def grade(self, submitted_function: str) -> float:
|
| 48 |
"""Returns deterministic score in {0.0, 0.3, 1.0}."""
|
| 49 |
norm = submitted_function.strip().lower()
|
| 50 |
+
if norm == self.target_function["name"].strip().lower():
|
| 51 |
return self.SCORE_CORRECT
|
| 52 |
+
if norm in self.target_function.get("code", "").strip().lower():
|
| 53 |
return self.SCORE_PARTIAL
|
| 54 |
return self.SCORE_WRONG
|
| 55 |
|
|
|
|
| 66 |
score = self.grade(submitted_function)
|
| 67 |
return score, self.reward_for_score(score)
|
| 68 |
|
| 69 |
+
def get_canonical_answer(self) -> Dict[str, Dict | str]:
|
| 70 |
"""For debugging / logging only β do not expose to the agent."""
|
| 71 |
return {
|
| 72 |
"target_function": self.target_function,
|
| 73 |
+
"property_specification": json.dumps(self.property_specification)
|
| 74 |
+
if isinstance(self.property_specification, dict) else self.property_specification,
|
| 75 |
}
|