Spaces:

Codex47
/

SmartContractAudit

Running

App Files Files Community

ajaxwin commited on 14 days ago

Commit

1b91307

1 Parent(s): 2ee85c9

task3 evalutated

Browse files

Files changed (5) hide show

agents/task3.py +16 -6
data/contracts.json +42 -7
eval.py +1 -1
tasks/task3/environment.py +5 -10
tasks/task3/grader.py +10 -15

agents/task3.py CHANGED Viewed

@@ -43,21 +43,31 @@ def oracle_t3(env: Task3Environment, seed: int, verbose: bool = False) -> Dict[s
             "cumulative_reward": result.observation.cumulative_reward}
-def subfunction_t3(env: Task3Environment, seed: int) -> Dict[str, Any]:
     """Submits the first partial-credit subfunction if one exists, else 'constructor'."""
     r   = env.reset(seed=seed)
     obs = r.observation
     contracts = load_contracts()
-    partial_fns = []
     for c in contracts:
         if c["contract_name"] == obs.contract_name:
-            fn = get_function_by_name(c, env.state().target_function)
-            if fn:
-                partial_fns = fn.get("task3", {}).get("partial_credit_functions", [])
             break
-    submit_name = partial_fns[0] if partial_fns else "constructor"
     result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
                               params={"function_name": submit_name}))
     v = result.reward.value
     score = 1.0 if v >= 4.9 else (0.3 if v >= 1.0 else 0.0)
     return {"seed": seed, "grader_score": score, "submitted": submit_name,

             "cumulative_reward": result.observation.cumulative_reward}
+def subfunction_t3(env: Task3Environment, seed: int, verbose: bool = False) -> Dict[str, Any]:
     """Submits the first partial-credit subfunction if one exists, else 'constructor'."""
     r   = env.reset(seed=seed)
     obs = r.observation
     contracts = load_contracts()
+    target_contract = {}
     for c in contracts:
         if c["contract_name"] == obs.contract_name:
+            target_contract = c
             break
+    submit_name = "constructor"
+    if (target_contract and "call_graph" in target_contract and env.state().target_function in
+        target_contract["call_graph"] and target_contract["call_graph"][env.state().target_function]):
+        submit_name = target_contract["call_graph"][env.state().target_function][0]
     result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
                               params={"function_name": submit_name}))
+    if verbose:
+        prop = obs.extra.get("property_english", "")[:60]
+        print(f"    {obs.contract_name}.{env.state().target_function}()  \"{prop}\"")
+        print(f"    Submitting subfunction: {submit_name}")
+        print(f"    Reward received: {result.reward.value}")
     v = result.reward.value
     score = 1.0 if v >= 4.9 else (0.3 if v >= 1.0 else 0.0)
     return {"seed": seed, "grader_score": score, "submitted": submit_name,

data/contracts.json CHANGED Viewed

@@ -4306,7 +4306,12 @@
                     "mitigation": "Add a check to ensure that the scaled amount to burn is greater than zero before performing the burn and transfer, or avoid transfer when the burned amount is zero."
                 },
                 "property": "The total assets of a user should decrease exactly by the amount of underlying withdrawn. Rounding should not allow a user to receive underlying without a corresponding decrease in AToken balance.",
-                "property_specification": "precondition: User has AToken balance B, underlying asset balance of AToken contract is sufficient. operation: burn(user, receiver, amount, index) where amount > 0 but amount.rayDiv(index) == 0 due to rounding. postcondition: User's AToken balance decreases by the scaled amount (which is zero) and underlying amount is transferred, so user's net position decreases by amount of underlying. actual vulnerability: User's AToken balance does not change (burn of zero), but user receives underlying amount, resulting in a net gain of underlying without any reduction in AToken balance, violating the expected invariant that AToken burn should correspond to underlying transfer."
             },
             {
                 "name": "mint",
@@ -5046,7 +5051,12 @@
                     "mitigation": "Add a check to ensure that the amount after conversion to ray is greater than zero before minting, or avoid minting when the minted amount would be zero."
                 },
                 "property": "When a user borrows (mints debt), their debt balance should increase exactly by the borrowed amount (plus accrued interest). Rounding should not allow a user to receive underlying without a corresponding increase in debt.",
-                "property_specification": "precondition: User has debt balance D, total supply S. operation: mint(user, amount, rate) where amount > 0 but amount.wadToRay() == 0 due to rounding. postcondition: User's debt balance increases by amount (scaled appropriately). actual vulnerability: User's debt balance does not increase (mint of zero), but the LendingPool would transfer underlying to the user, resulting in a net gain without debt increase, violating the invariant that debt minting should correspond to underlying received."
             },
             {
                 "name": "burn",
@@ -5519,7 +5529,12 @@
                     "mitigation": "Add require(owner != address(0), 'INVALID_OWNER');"
                 },
                 "property": "The contract must have a non-zero owner to allow administrative functions to be callable.",
-                "property_specification": "precondition: Owner address can be any address. operation: initialize(owner, ...) where owner == address(0). postcondition: The contract owner is address(0) and administrative functions are permanently locked. actual vulnerability: The owner becomes zero address, violating the invariant that owner must be a valid address capable of executing privileged functions."
             },
             {
                 "name": "deposit",
@@ -5553,7 +5568,12 @@
                     "mitigation": "Fixed in PR#82 merged in commit 385b397."
                 },
                 "property": "After a deposit, the vault should remain in a consistent state where subsequent operations do not revert due to internal accounting mismatches.",
-                "property_specification": "precondition: Vault state is consistent, lastVaultBalance equals ATOKEN.balanceOf(vault). operation: deposit(assets) where assets is not a multiple of liquidity index. postcondition: Vault remains consistent, lastVaultBalance == ATOKEN.balanceOf(vault) after deposit. actual vulnerability: lastVaultBalance is incremented by assets, but ATOKEN.balanceOf increases by a different amount due to rounding, leading to inconsistency that causes future accrueYield() calls to revert."
             },
             {
                 "name": "depositATokens",
@@ -5587,7 +5607,12 @@
                     "mitigation": "Fixed in PR#80, merged in commit 34ad6e3."
                 },
                 "property": "Depositing aTokens (which already represent Aave positions) should not be limited by the Aave pool's supply cap because it does not increase net supply to the pool.",
-                "property_specification": "precondition: Aave pool has a supply cap, and the total supplied is near the cap. The vault holds aTokens whose underlying value would push the cap over if deposited as underlying. operation: depositATokens(assets) with assets amount that converts to underlying value exceeding remaining cap. postcondition: Deposit should succeed because no new underlying is supplied to Aave. actual vulnerability: The function reverts due to the supply cap check, preventing legitimate aToken deposits."
             },
             {
                 "name": "depositWithSig",
@@ -5870,7 +5895,12 @@
                     "mitigation": "Fixed in PR#86 merged in commit 385b397."
                 },
                 "property": "After any deposit or withdrawal, lastVaultBalance should equal ATOKEN.balanceOf(vault) to ensure accurate fee calculation on future yield.",
-                "property_specification": "precondition: lastVaultBalance = ATOKEN.balanceOf(vault) = B. operation: withdraw(assets) where assets is not a multiple of the liquidity index. postcondition: lastVaultBalance' = ATOKEN.balanceOf(vault)' = B - assets_actual. actual vulnerability: lastVaultBalance is decreased by assets (exact), but ATOKEN.balanceOf decreases by a different amount due to rounding, causing a mismatch that leads to fee miscalculation."
             },
             {
                 "name": "withdrawATokens",
@@ -6422,7 +6452,12 @@
                     "mitigation": "Fixed in PR#82 merged in commit 385b397."
                 },
                 "property": "All yield generated by the vault, including gifts, should be subject to fee accrual before fees are withdrawn.",
-                "property_specification": "precondition: Vault has lastUpdated = T (current block), and some user gifts aTokens to the vault in the same block after a transaction that set lastUpdated. operation: withdrawFees() called after the gift. postcondition: The gift amount is included in new yield and fees are charged on it. actual vulnerability: getClaimableFees() returns accumulatedFees from before the gift because block.timestamp == lastUpdated, so no new fees are calculated, allowing the gift to bypass fee charges."
             },
             {
                 "name": "claimRewards",

                     "mitigation": "Add a check to ensure that the scaled amount to burn is greater than zero before performing the burn and transfer, or avoid transfer when the burned amount is zero."
                 },
                 "property": "The total assets of a user should decrease exactly by the amount of underlying withdrawn. Rounding should not allow a user to receive underlying without a corresponding decrease in AToken balance.",
+                "property_specification": {
+                    "precondition": "User has AToken balance B, underlying asset balance of AToken contract is sufficient.",
+                    "operation": "burn(user, receiver, amount, index) where amount > 0 but amount.rayDiv(index) == 0 due to rounding.",
+                    "postcondition": "User's AToken balance decreases by the scaled amount (which is zero) and underlying amount is transferred, so user's net position decreases by amount of underlying.",
+                    "actual": "User's AToken balance does not change (burn of zero), but user receives underlying amount, resulting in a net gain of underlying without any reduction in AToken balance, violating the expected invariant that AToken burn should correspond to underlying transfer."
+                }
             },
             {
                 "name": "mint",
                     "mitigation": "Add a check to ensure that the amount after conversion to ray is greater than zero before minting, or avoid minting when the minted amount would be zero."
                 },
                 "property": "When a user borrows (mints debt), their debt balance should increase exactly by the borrowed amount (plus accrued interest). Rounding should not allow a user to receive underlying without a corresponding increase in debt.",
+                "property_specification": {
+                    "precondition": "User has debt balance D, total supply S",
+                    "operation": "mint(user, amount, rate) where amount > 0 but amount.wadToRay() == 0 due to rounding.",
+                    "postcondition": "User's debt balance increases by amount (scaled appropriately)",
+                    "actual": "User's debt balance does not increase (mint of zero), but the LendingPool would transfer underlying to the user, resulting in a net gain without debt increase, violating the invariant that debt minting should correspond to underlying received."
+                }
             },
             {
                 "name": "burn",
                     "mitigation": "Add require(owner != address(0), 'INVALID_OWNER');"
                 },
                 "property": "The contract must have a non-zero owner to allow administrative functions to be callable.",
+                "property_specification": {
+                    "precondition": "Owner address can be any address.",
+                    "operation": "initialize(owner, ...) where owner == address(0).",
+                    "postcondition": "The contract owner is address(0) and administrative functions are permanently locked.",
+                    "actual": "The owner becomes zero address, violating the invariant that owner must be a valid address capable of executing privileged functions."
+                }
             },
             {
                 "name": "deposit",
                     "mitigation": "Fixed in PR#82 merged in commit 385b397."
                 },
                 "property": "After a deposit, the vault should remain in a consistent state where subsequent operations do not revert due to internal accounting mismatches.",
+                "property_specification": {
+                    "precondition": "Vault state is consistent, lastVaultBalance equals ATOKEN.balanceOf(vault).",
+                    "operation": "deposit(assets) where assets is not a multiple of liquidity index.",
+                    "postcondition": "Vault remains consistent, lastVaultBalance == ATOKEN.balanceOf(vault) after deposit.",
+                    "actual": "lastVaultBalance is incremented by assets, but ATOKEN.balanceOf increases by a different amount due to rounding, leading to inconsistency that causes future accrueYield() calls to revert."
+                }
             },
             {
                 "name": "depositATokens",
                     "mitigation": "Fixed in PR#80, merged in commit 34ad6e3."
                 },
                 "property": "Depositing aTokens (which already represent Aave positions) should not be limited by the Aave pool's supply cap because it does not increase net supply to the pool.",
+                "property_specification": {
+                    "precondition": "Aave pool has a supply cap, and the total supplied is near the cap. The vault holds aTokens whose underlying value would push the cap over if deposited as underlying.",
+                    "operation": "depositATokens(assets) with assets amount that converts to underlying value exceeding remaining cap.",
+                    "postcondition": "Deposit should succeed because no new underlying is supplied to Aave.",
+                    "actual": "The function reverts due to the supply cap check, preventing legitimate aToken deposits."
+                }
             },
             {
                 "name": "depositWithSig",
                     "mitigation": "Fixed in PR#86 merged in commit 385b397."
                 },
                 "property": "After any deposit or withdrawal, lastVaultBalance should equal ATOKEN.balanceOf(vault) to ensure accurate fee calculation on future yield.",
+                "property_specification": {
+                    "precondition": "lastVaultBalance = ATOKEN.balanceOf(vault) = B.",
+                    "operation": "withdraw(assets) where assets is not a multiple of the liquidity index.",
+                    "postcondition": "lastVaultBalance' = ATOKEN.balanceOf(vault)' = B - assets_actual.",
+                    "actual": "lastVaultBalance is decreased by assets (exact), but ATOKEN.balanceOf decreases by a different amount due to rounding, causing a mismatch that leads to fee miscalculation."
+                }
             },
             {
                 "name": "withdrawATokens",
                     "mitigation": "Fixed in PR#82 merged in commit 385b397."
                 },
                 "property": "All yield generated by the vault, including gifts, should be subject to fee accrual before fees are withdrawn.",
+                "property_specification": {
+                    "precondition": "Vault has lastUpdated = T (current block), and some user gifts aTokens to the vault in the same block after a transaction that set lastUpdated.",
+                    "operation": "withdrawFees() called after the gift.",
+                    "postcondition": "The gift amount is included in new yield and fees are charged on it.",
+                    "actual": "getClaimableFees() returns accumulatedFees from before the gift because block.timestamp == lastUpdated, so no new fees are calculated, allowing the gift to bypass fee charges."
+                }
             },
             {
                 "name": "claimRewards",

eval.py CHANGED Viewed

@@ -182,7 +182,7 @@ def run_task3_eval(n: int, seed_offset: int, verbose: bool) -> Dict[str, Any]:
     # Subfunction (partial credit)
     print("\n▶ Subfunction (partial-credit callee if exists, else constructor):")
-    sub_eps = [subfunction_t3(env, seed_offset + i) for i in range(n)]
     sub_avg = _avg(sub_eps)
     submitted_sub = list({e.get("submitted", "?") for e in sub_eps})
     print(f"  Subfunction  avg: {sub_avg:.3f}  submitted: {submitted_sub}")

     # Subfunction (partial credit)
     print("\n▶ Subfunction (partial-credit callee if exists, else constructor):")
+    sub_eps = [subfunction_t3(env, seed_offset + i, verbose) for i in range(n)]
     sub_avg = _avg(sub_eps)
     submitted_sub = list({e.get("submitted", "?") for e in sub_eps})
     print(f"  Subfunction  avg: {sub_avg:.3f}  submitted: {submitted_sub}")

tasks/task3/environment.py CHANGED Viewed

@@ -83,8 +83,6 @@ class Task3Environment(BaseEnv):
         self._seen:       Set[str] = set()
     # ── OpenEnv interface ─────────────────────────────────────────────────────
-    # ! Need to change alot here
     def reset(self, seed: Optional[int] = None) -> ResetResult:
         if seed is not None:
@@ -93,14 +91,12 @@ class Task3Environment(BaseEnv):
         self._contract, self._target_fn = sample_task3_episode(
             self._contracts, self._rng
         )
-        t3 = self._target_fn["task3"]
         self._grader = Task3Grader(
-            target_function=self._target_fn["name"],
-            partial_credit_functions=t3.get("partial_credit_functions", []),        # ! doesn't exists
-            property_english=t3.get("property_english", ""),        # ! doesn't exist
         )
         self._step_count = 0
-        self._cum_reward = 0.0
         self._done       = False
         self._submitted  = False
         self._query_hist = []
@@ -111,7 +107,7 @@ class Task3Environment(BaseEnv):
             last_result=(
                 f"New episode started.\n"
                 f"Contract : {self._contract['contract_name']}\n\n"
-                f"Property : {t3['property_english']}\n\n"
                 f"Find the function in this contract that violates the property above.\n"
                 f"Use list_functions then get_function_code to investigate.\n"
                 f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
@@ -154,7 +150,6 @@ class Task3Environment(BaseEnv):
     # ── Internal helpers ──────────────────────────────────────────────────────
     def _build_obs(self, last_action: Optional[str], last_result: str) -> Observation:
-        t3 = self._target_fn.get("task3", {})
         return Observation(
             task_id=TASK_ID,
             contract_name=self._contract.get("contract_name", ""),
@@ -166,7 +161,7 @@ class Task3Environment(BaseEnv):
             cumulative_reward=self._cum_reward,
             done=self._done,
             extra={
-                "property_english": t3.get("property_english", ""),
                 "solidity_version": self._contract.get("metadata", {}).get("solidity_version", ""),
                 "hint": (
                     "Read the property, then inspect function code to find which one violates it. "

         self._seen:       Set[str] = set()
     # ── OpenEnv interface ─────────────────────────────────────────────────────
     def reset(self, seed: Optional[int] = None) -> ResetResult:
         if seed is not None:
         self._contract, self._target_fn = sample_task3_episode(
             self._contracts, self._rng
         )
         self._grader = Task3Grader(
+            target_function=self._target_fn,
+            property_specification=self._target_fn.get("property_specification", "")
         )
         self._step_count = 0
+        self._cum_reward = 0.0
         self._done       = False
         self._submitted  = False
         self._query_hist = []
             last_result=(
                 f"New episode started.\n"
                 f"Contract : {self._contract['contract_name']}\n\n"
+                f"Property : {self._target_fn.get('property', '')}\n\n"
                 f"Find the function in this contract that violates the property above.\n"
                 f"Use list_functions then get_function_code to investigate.\n"
                 f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
     # ── Internal helpers ──────────────────────────────────────────────────────
     def _build_obs(self, last_action: Optional[str], last_result: str) -> Observation:
         return Observation(
             task_id=TASK_ID,
             contract_name=self._contract.get("contract_name", ""),
             cumulative_reward=self._cum_reward,
             done=self._done,
             extra={
+                "property_english": self._target_fn.get("property", ""),
                 "solidity_version": self._contract.get("metadata", {}).get("solidity_version", ""),
                 "hint": (
                     "Read the property, then inspect function code to find which one violates it. "

tasks/task3/grader.py CHANGED Viewed

@@ -17,7 +17,8 @@ Reward table (ONE submission per episode)
 """
 from __future__ import annotations
-from typing import Dict, List, Optional
 class Task3Grader:
@@ -29,7 +30,6 @@ class Task3Grader:
     target_function         : exact name of the rule-breaking function
     partial_credit_functions: list of internal functions that get partial credit
                               (direct callees of the target that are contract functions)
-    property_english        : the English property text (for feedback messages)
     """
     SCORE_CORRECT  = 1.0
@@ -40,22 +40,16 @@ class Task3Grader:
     REWARD_PARTIAL = 1.5
     REWARD_WRONG   = -1.5
-    def __init__(
-        self,
-        target_function: str,
-        partial_credit_functions: List[str],
-        property_english: str = "",
-    ) -> None:
-        self.target_function          = target_function.lower()
-        self.partial_credit_functions = [f.lower() for f in partial_credit_functions]
-        self.property_english         = property_english
     def grade(self, submitted_function: str) -> float:
         """Returns deterministic score in {0.0, 0.3, 1.0}."""
         norm = submitted_function.strip().lower()
-        if norm == self.target_function:
             return self.SCORE_CORRECT
-        if norm in self.partial_credit_functions:
             return self.SCORE_PARTIAL
         return self.SCORE_WRONG
@@ -72,9 +66,10 @@ class Task3Grader:
         score = self.grade(submitted_function)
         return score, self.reward_for_score(score)
-    def get_canonical_answer(self) -> Dict[str, object]:
         """For debugging / logging only — do not expose to the agent."""
         return {
             "target_function":          self.target_function,
-            "partial_credit_functions": self.partial_credit_functions,
         }

 """
 from __future__ import annotations
+import json
+from typing import Dict, Any
 class Task3Grader:
     target_function         : exact name of the rule-breaking function
     partial_credit_functions: list of internal functions that get partial credit
                               (direct callees of the target that are contract functions)
     """
     SCORE_CORRECT  = 1.0
     REWARD_PARTIAL = 1.5
     REWARD_WRONG   = -1.5
+    def __init__(self, target_function: Dict[str, Any], property_specification: Dict | str) -> None:
+        self.target_function = target_function
+        self.property_specification = property_specification
     def grade(self, submitted_function: str) -> float:
         """Returns deterministic score in {0.0, 0.3, 1.0}."""
         norm = submitted_function.strip().lower()
+        if norm == self.target_function["name"].strip().lower():
             return self.SCORE_CORRECT
+        if norm in self.target_function.get("code", "").strip().lower():
             return self.SCORE_PARTIAL
         return self.SCORE_WRONG
         score = self.grade(submitted_function)
         return score, self.reward_for_score(score)
+    def get_canonical_answer(self) -> Dict[str, Dict | str]:
         """For debugging / logging only — do not expose to the agent."""
         return {
             "target_function":          self.target_function,
+            "property_specification": json.dumps(self.property_specification)
+                if isinstance(self.property_specification, dict) else self.property_specification,
         }