ajaxwin commited on
Commit
1b91307
Β·
1 Parent(s): 2ee85c9

task3 evalutated

Browse files
agents/task3.py CHANGED
@@ -43,21 +43,31 @@ def oracle_t3(env: Task3Environment, seed: int, verbose: bool = False) -> Dict[s
43
  "cumulative_reward": result.observation.cumulative_reward}
44
 
45
 
46
- def subfunction_t3(env: Task3Environment, seed: int) -> Dict[str, Any]:
47
  """Submits the first partial-credit subfunction if one exists, else 'constructor'."""
48
  r = env.reset(seed=seed)
49
  obs = r.observation
50
  contracts = load_contracts()
51
- partial_fns = []
52
  for c in contracts:
53
  if c["contract_name"] == obs.contract_name:
54
- fn = get_function_by_name(c, env.state().target_function)
55
- if fn:
56
- partial_fns = fn.get("task3", {}).get("partial_credit_functions", [])
57
  break
58
- submit_name = partial_fns[0] if partial_fns else "constructor"
 
 
 
 
 
 
59
  result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
60
  params={"function_name": submit_name}))
 
 
 
 
 
 
61
  v = result.reward.value
62
  score = 1.0 if v >= 4.9 else (0.3 if v >= 1.0 else 0.0)
63
  return {"seed": seed, "grader_score": score, "submitted": submit_name,
 
43
  "cumulative_reward": result.observation.cumulative_reward}
44
 
45
 
46
+ def subfunction_t3(env: Task3Environment, seed: int, verbose: bool = False) -> Dict[str, Any]:
47
  """Submits the first partial-credit subfunction if one exists, else 'constructor'."""
48
  r = env.reset(seed=seed)
49
  obs = r.observation
50
  contracts = load_contracts()
51
+ target_contract = {}
52
  for c in contracts:
53
  if c["contract_name"] == obs.contract_name:
54
+ target_contract = c
 
 
55
  break
56
+
57
+ submit_name = "constructor"
58
+ if (target_contract and "call_graph" in target_contract and env.state().target_function in
59
+ target_contract["call_graph"] and target_contract["call_graph"][env.state().target_function]):
60
+ submit_name = target_contract["call_graph"][env.state().target_function][0]
61
+
62
+
63
  result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
64
  params={"function_name": submit_name}))
65
+ if verbose:
66
+ prop = obs.extra.get("property_english", "")[:60]
67
+ print(f" {obs.contract_name}.{env.state().target_function}() \"{prop}\"")
68
+ print(f" Submitting subfunction: {submit_name}")
69
+ print(f" Reward received: {result.reward.value}")
70
+
71
  v = result.reward.value
72
  score = 1.0 if v >= 4.9 else (0.3 if v >= 1.0 else 0.0)
73
  return {"seed": seed, "grader_score": score, "submitted": submit_name,
data/contracts.json CHANGED
@@ -4306,7 +4306,12 @@
4306
  "mitigation": "Add a check to ensure that the scaled amount to burn is greater than zero before performing the burn and transfer, or avoid transfer when the burned amount is zero."
4307
  },
4308
  "property": "The total assets of a user should decrease exactly by the amount of underlying withdrawn. Rounding should not allow a user to receive underlying without a corresponding decrease in AToken balance.",
4309
- "property_specification": "precondition: User has AToken balance B, underlying asset balance of AToken contract is sufficient. operation: burn(user, receiver, amount, index) where amount > 0 but amount.rayDiv(index) == 0 due to rounding. postcondition: User's AToken balance decreases by the scaled amount (which is zero) and underlying amount is transferred, so user's net position decreases by amount of underlying. actual vulnerability: User's AToken balance does not change (burn of zero), but user receives underlying amount, resulting in a net gain of underlying without any reduction in AToken balance, violating the expected invariant that AToken burn should correspond to underlying transfer."
 
 
 
 
 
4310
  },
4311
  {
4312
  "name": "mint",
@@ -5046,7 +5051,12 @@
5046
  "mitigation": "Add a check to ensure that the amount after conversion to ray is greater than zero before minting, or avoid minting when the minted amount would be zero."
5047
  },
5048
  "property": "When a user borrows (mints debt), their debt balance should increase exactly by the borrowed amount (plus accrued interest). Rounding should not allow a user to receive underlying without a corresponding increase in debt.",
5049
- "property_specification": "precondition: User has debt balance D, total supply S. operation: mint(user, amount, rate) where amount > 0 but amount.wadToRay() == 0 due to rounding. postcondition: User's debt balance increases by amount (scaled appropriately). actual vulnerability: User's debt balance does not increase (mint of zero), but the LendingPool would transfer underlying to the user, resulting in a net gain without debt increase, violating the invariant that debt minting should correspond to underlying received."
 
 
 
 
 
5050
  },
5051
  {
5052
  "name": "burn",
@@ -5519,7 +5529,12 @@
5519
  "mitigation": "Add require(owner != address(0), 'INVALID_OWNER');"
5520
  },
5521
  "property": "The contract must have a non-zero owner to allow administrative functions to be callable.",
5522
- "property_specification": "precondition: Owner address can be any address. operation: initialize(owner, ...) where owner == address(0). postcondition: The contract owner is address(0) and administrative functions are permanently locked. actual vulnerability: The owner becomes zero address, violating the invariant that owner must be a valid address capable of executing privileged functions."
 
 
 
 
 
5523
  },
5524
  {
5525
  "name": "deposit",
@@ -5553,7 +5568,12 @@
5553
  "mitigation": "Fixed in PR#82 merged in commit 385b397."
5554
  },
5555
  "property": "After a deposit, the vault should remain in a consistent state where subsequent operations do not revert due to internal accounting mismatches.",
5556
- "property_specification": "precondition: Vault state is consistent, lastVaultBalance equals ATOKEN.balanceOf(vault). operation: deposit(assets) where assets is not a multiple of liquidity index. postcondition: Vault remains consistent, lastVaultBalance == ATOKEN.balanceOf(vault) after deposit. actual vulnerability: lastVaultBalance is incremented by assets, but ATOKEN.balanceOf increases by a different amount due to rounding, leading to inconsistency that causes future accrueYield() calls to revert."
 
 
 
 
 
5557
  },
5558
  {
5559
  "name": "depositATokens",
@@ -5587,7 +5607,12 @@
5587
  "mitigation": "Fixed in PR#80, merged in commit 34ad6e3."
5588
  },
5589
  "property": "Depositing aTokens (which already represent Aave positions) should not be limited by the Aave pool's supply cap because it does not increase net supply to the pool.",
5590
- "property_specification": "precondition: Aave pool has a supply cap, and the total supplied is near the cap. The vault holds aTokens whose underlying value would push the cap over if deposited as underlying. operation: depositATokens(assets) with assets amount that converts to underlying value exceeding remaining cap. postcondition: Deposit should succeed because no new underlying is supplied to Aave. actual vulnerability: The function reverts due to the supply cap check, preventing legitimate aToken deposits."
 
 
 
 
 
5591
  },
5592
  {
5593
  "name": "depositWithSig",
@@ -5870,7 +5895,12 @@
5870
  "mitigation": "Fixed in PR#86 merged in commit 385b397."
5871
  },
5872
  "property": "After any deposit or withdrawal, lastVaultBalance should equal ATOKEN.balanceOf(vault) to ensure accurate fee calculation on future yield.",
5873
- "property_specification": "precondition: lastVaultBalance = ATOKEN.balanceOf(vault) = B. operation: withdraw(assets) where assets is not a multiple of the liquidity index. postcondition: lastVaultBalance' = ATOKEN.balanceOf(vault)' = B - assets_actual. actual vulnerability: lastVaultBalance is decreased by assets (exact), but ATOKEN.balanceOf decreases by a different amount due to rounding, causing a mismatch that leads to fee miscalculation."
 
 
 
 
 
5874
  },
5875
  {
5876
  "name": "withdrawATokens",
@@ -6422,7 +6452,12 @@
6422
  "mitigation": "Fixed in PR#82 merged in commit 385b397."
6423
  },
6424
  "property": "All yield generated by the vault, including gifts, should be subject to fee accrual before fees are withdrawn.",
6425
- "property_specification": "precondition: Vault has lastUpdated = T (current block), and some user gifts aTokens to the vault in the same block after a transaction that set lastUpdated. operation: withdrawFees() called after the gift. postcondition: The gift amount is included in new yield and fees are charged on it. actual vulnerability: getClaimableFees() returns accumulatedFees from before the gift because block.timestamp == lastUpdated, so no new fees are calculated, allowing the gift to bypass fee charges."
 
 
 
 
 
6426
  },
6427
  {
6428
  "name": "claimRewards",
 
4306
  "mitigation": "Add a check to ensure that the scaled amount to burn is greater than zero before performing the burn and transfer, or avoid transfer when the burned amount is zero."
4307
  },
4308
  "property": "The total assets of a user should decrease exactly by the amount of underlying withdrawn. Rounding should not allow a user to receive underlying without a corresponding decrease in AToken balance.",
4309
+ "property_specification": {
4310
+ "precondition": "User has AToken balance B, underlying asset balance of AToken contract is sufficient.",
4311
+ "operation": "burn(user, receiver, amount, index) where amount > 0 but amount.rayDiv(index) == 0 due to rounding.",
4312
+ "postcondition": "User's AToken balance decreases by the scaled amount (which is zero) and underlying amount is transferred, so user's net position decreases by amount of underlying.",
4313
+ "actual": "User's AToken balance does not change (burn of zero), but user receives underlying amount, resulting in a net gain of underlying without any reduction in AToken balance, violating the expected invariant that AToken burn should correspond to underlying transfer."
4314
+ }
4315
  },
4316
  {
4317
  "name": "mint",
 
5051
  "mitigation": "Add a check to ensure that the amount after conversion to ray is greater than zero before minting, or avoid minting when the minted amount would be zero."
5052
  },
5053
  "property": "When a user borrows (mints debt), their debt balance should increase exactly by the borrowed amount (plus accrued interest). Rounding should not allow a user to receive underlying without a corresponding increase in debt.",
5054
+ "property_specification": {
5055
+ "precondition": "User has debt balance D, total supply S",
5056
+ "operation": "mint(user, amount, rate) where amount > 0 but amount.wadToRay() == 0 due to rounding.",
5057
+ "postcondition": "User's debt balance increases by amount (scaled appropriately)",
5058
+ "actual": "User's debt balance does not increase (mint of zero), but the LendingPool would transfer underlying to the user, resulting in a net gain without debt increase, violating the invariant that debt minting should correspond to underlying received."
5059
+ }
5060
  },
5061
  {
5062
  "name": "burn",
 
5529
  "mitigation": "Add require(owner != address(0), 'INVALID_OWNER');"
5530
  },
5531
  "property": "The contract must have a non-zero owner to allow administrative functions to be callable.",
5532
+ "property_specification": {
5533
+ "precondition": "Owner address can be any address.",
5534
+ "operation": "initialize(owner, ...) where owner == address(0).",
5535
+ "postcondition": "The contract owner is address(0) and administrative functions are permanently locked.",
5536
+ "actual": "The owner becomes zero address, violating the invariant that owner must be a valid address capable of executing privileged functions."
5537
+ }
5538
  },
5539
  {
5540
  "name": "deposit",
 
5568
  "mitigation": "Fixed in PR#82 merged in commit 385b397."
5569
  },
5570
  "property": "After a deposit, the vault should remain in a consistent state where subsequent operations do not revert due to internal accounting mismatches.",
5571
+ "property_specification": {
5572
+ "precondition": "Vault state is consistent, lastVaultBalance equals ATOKEN.balanceOf(vault).",
5573
+ "operation": "deposit(assets) where assets is not a multiple of liquidity index.",
5574
+ "postcondition": "Vault remains consistent, lastVaultBalance == ATOKEN.balanceOf(vault) after deposit.",
5575
+ "actual": "lastVaultBalance is incremented by assets, but ATOKEN.balanceOf increases by a different amount due to rounding, leading to inconsistency that causes future accrueYield() calls to revert."
5576
+ }
5577
  },
5578
  {
5579
  "name": "depositATokens",
 
5607
  "mitigation": "Fixed in PR#80, merged in commit 34ad6e3."
5608
  },
5609
  "property": "Depositing aTokens (which already represent Aave positions) should not be limited by the Aave pool's supply cap because it does not increase net supply to the pool.",
5610
+ "property_specification": {
5611
+ "precondition": "Aave pool has a supply cap, and the total supplied is near the cap. The vault holds aTokens whose underlying value would push the cap over if deposited as underlying.",
5612
+ "operation": "depositATokens(assets) with assets amount that converts to underlying value exceeding remaining cap.",
5613
+ "postcondition": "Deposit should succeed because no new underlying is supplied to Aave.",
5614
+ "actual": "The function reverts due to the supply cap check, preventing legitimate aToken deposits."
5615
+ }
5616
  },
5617
  {
5618
  "name": "depositWithSig",
 
5895
  "mitigation": "Fixed in PR#86 merged in commit 385b397."
5896
  },
5897
  "property": "After any deposit or withdrawal, lastVaultBalance should equal ATOKEN.balanceOf(vault) to ensure accurate fee calculation on future yield.",
5898
+ "property_specification": {
5899
+ "precondition": "lastVaultBalance = ATOKEN.balanceOf(vault) = B.",
5900
+ "operation": "withdraw(assets) where assets is not a multiple of the liquidity index.",
5901
+ "postcondition": "lastVaultBalance' = ATOKEN.balanceOf(vault)' = B - assets_actual.",
5902
+ "actual": "lastVaultBalance is decreased by assets (exact), but ATOKEN.balanceOf decreases by a different amount due to rounding, causing a mismatch that leads to fee miscalculation."
5903
+ }
5904
  },
5905
  {
5906
  "name": "withdrawATokens",
 
6452
  "mitigation": "Fixed in PR#82 merged in commit 385b397."
6453
  },
6454
  "property": "All yield generated by the vault, including gifts, should be subject to fee accrual before fees are withdrawn.",
6455
+ "property_specification": {
6456
+ "precondition": "Vault has lastUpdated = T (current block), and some user gifts aTokens to the vault in the same block after a transaction that set lastUpdated.",
6457
+ "operation": "withdrawFees() called after the gift.",
6458
+ "postcondition": "The gift amount is included in new yield and fees are charged on it.",
6459
+ "actual": "getClaimableFees() returns accumulatedFees from before the gift because block.timestamp == lastUpdated, so no new fees are calculated, allowing the gift to bypass fee charges."
6460
+ }
6461
  },
6462
  {
6463
  "name": "claimRewards",
eval.py CHANGED
@@ -182,7 +182,7 @@ def run_task3_eval(n: int, seed_offset: int, verbose: bool) -> Dict[str, Any]:
182
 
183
  # Subfunction (partial credit)
184
  print("\nβ–Ά Subfunction (partial-credit callee if exists, else constructor):")
185
- sub_eps = [subfunction_t3(env, seed_offset + i) for i in range(n)]
186
  sub_avg = _avg(sub_eps)
187
  submitted_sub = list({e.get("submitted", "?") for e in sub_eps})
188
  print(f" Subfunction avg: {sub_avg:.3f} submitted: {submitted_sub}")
 
182
 
183
  # Subfunction (partial credit)
184
  print("\nβ–Ά Subfunction (partial-credit callee if exists, else constructor):")
185
+ sub_eps = [subfunction_t3(env, seed_offset + i, verbose) for i in range(n)]
186
  sub_avg = _avg(sub_eps)
187
  submitted_sub = list({e.get("submitted", "?") for e in sub_eps})
188
  print(f" Subfunction avg: {sub_avg:.3f} submitted: {submitted_sub}")
tasks/task3/environment.py CHANGED
@@ -83,8 +83,6 @@ class Task3Environment(BaseEnv):
83
  self._seen: Set[str] = set()
84
 
85
  # ── OpenEnv interface ─────────────────────────────────────────────────────
86
-
87
- # ! Need to change alot here
88
 
89
  def reset(self, seed: Optional[int] = None) -> ResetResult:
90
  if seed is not None:
@@ -93,14 +91,12 @@ class Task3Environment(BaseEnv):
93
  self._contract, self._target_fn = sample_task3_episode(
94
  self._contracts, self._rng
95
  )
96
- t3 = self._target_fn["task3"]
97
  self._grader = Task3Grader(
98
- target_function=self._target_fn["name"],
99
- partial_credit_functions=t3.get("partial_credit_functions", []), # ! doesn't exists
100
- property_english=t3.get("property_english", ""), # ! doesn't exist
101
  )
102
  self._step_count = 0
103
- self._cum_reward = 0.0
104
  self._done = False
105
  self._submitted = False
106
  self._query_hist = []
@@ -111,7 +107,7 @@ class Task3Environment(BaseEnv):
111
  last_result=(
112
  f"New episode started.\n"
113
  f"Contract : {self._contract['contract_name']}\n\n"
114
- f"Property : {t3['property_english']}\n\n"
115
  f"Find the function in this contract that violates the property above.\n"
116
  f"Use list_functions then get_function_code to investigate.\n"
117
  f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
@@ -154,7 +150,6 @@ class Task3Environment(BaseEnv):
154
  # ── Internal helpers ──────────────────────────────────────────────────────
155
 
156
  def _build_obs(self, last_action: Optional[str], last_result: str) -> Observation:
157
- t3 = self._target_fn.get("task3", {})
158
  return Observation(
159
  task_id=TASK_ID,
160
  contract_name=self._contract.get("contract_name", ""),
@@ -166,7 +161,7 @@ class Task3Environment(BaseEnv):
166
  cumulative_reward=self._cum_reward,
167
  done=self._done,
168
  extra={
169
- "property_english": t3.get("property_english", ""),
170
  "solidity_version": self._contract.get("metadata", {}).get("solidity_version", ""),
171
  "hint": (
172
  "Read the property, then inspect function code to find which one violates it. "
 
83
  self._seen: Set[str] = set()
84
 
85
  # ── OpenEnv interface ─────────────────────────────────────────────────────
 
 
86
 
87
  def reset(self, seed: Optional[int] = None) -> ResetResult:
88
  if seed is not None:
 
91
  self._contract, self._target_fn = sample_task3_episode(
92
  self._contracts, self._rng
93
  )
 
94
  self._grader = Task3Grader(
95
+ target_function=self._target_fn,
96
+ property_specification=self._target_fn.get("property_specification", "")
 
97
  )
98
  self._step_count = 0
99
+ self._cum_reward = 0.0
100
  self._done = False
101
  self._submitted = False
102
  self._query_hist = []
 
107
  last_result=(
108
  f"New episode started.\n"
109
  f"Contract : {self._contract['contract_name']}\n\n"
110
+ f"Property : {self._target_fn.get('property', '')}\n\n"
111
  f"Find the function in this contract that violates the property above.\n"
112
  f"Use list_functions then get_function_code to investigate.\n"
113
  f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
 
150
  # ── Internal helpers ──────────────────────────────────────────────────────
151
 
152
  def _build_obs(self, last_action: Optional[str], last_result: str) -> Observation:
 
153
  return Observation(
154
  task_id=TASK_ID,
155
  contract_name=self._contract.get("contract_name", ""),
 
161
  cumulative_reward=self._cum_reward,
162
  done=self._done,
163
  extra={
164
+ "property_english": self._target_fn.get("property", ""),
165
  "solidity_version": self._contract.get("metadata", {}).get("solidity_version", ""),
166
  "hint": (
167
  "Read the property, then inspect function code to find which one violates it. "
tasks/task3/grader.py CHANGED
@@ -17,7 +17,8 @@ Reward table (ONE submission per episode)
17
  """
18
 
19
  from __future__ import annotations
20
- from typing import Dict, List, Optional
 
21
 
22
 
23
  class Task3Grader:
@@ -29,7 +30,6 @@ class Task3Grader:
29
  target_function : exact name of the rule-breaking function
30
  partial_credit_functions: list of internal functions that get partial credit
31
  (direct callees of the target that are contract functions)
32
- property_english : the English property text (for feedback messages)
33
  """
34
 
35
  SCORE_CORRECT = 1.0
@@ -40,22 +40,16 @@ class Task3Grader:
40
  REWARD_PARTIAL = 1.5
41
  REWARD_WRONG = -1.5
42
 
43
- def __init__(
44
- self,
45
- target_function: str,
46
- partial_credit_functions: List[str],
47
- property_english: str = "",
48
- ) -> None:
49
- self.target_function = target_function.lower()
50
- self.partial_credit_functions = [f.lower() for f in partial_credit_functions]
51
- self.property_english = property_english
52
 
53
  def grade(self, submitted_function: str) -> float:
54
  """Returns deterministic score in {0.0, 0.3, 1.0}."""
55
  norm = submitted_function.strip().lower()
56
- if norm == self.target_function:
57
  return self.SCORE_CORRECT
58
- if norm in self.partial_credit_functions:
59
  return self.SCORE_PARTIAL
60
  return self.SCORE_WRONG
61
 
@@ -72,9 +66,10 @@ class Task3Grader:
72
  score = self.grade(submitted_function)
73
  return score, self.reward_for_score(score)
74
 
75
- def get_canonical_answer(self) -> Dict[str, object]:
76
  """For debugging / logging only β€” do not expose to the agent."""
77
  return {
78
  "target_function": self.target_function,
79
- "partial_credit_functions": self.partial_credit_functions,
 
80
  }
 
17
  """
18
 
19
  from __future__ import annotations
20
+ import json
21
+ from typing import Dict, Any
22
 
23
 
24
  class Task3Grader:
 
30
  target_function : exact name of the rule-breaking function
31
  partial_credit_functions: list of internal functions that get partial credit
32
  (direct callees of the target that are contract functions)
 
33
  """
34
 
35
  SCORE_CORRECT = 1.0
 
40
  REWARD_PARTIAL = 1.5
41
  REWARD_WRONG = -1.5
42
 
43
+ def __init__(self, target_function: Dict[str, Any], property_specification: Dict | str) -> None:
44
+ self.target_function = target_function
45
+ self.property_specification = property_specification
 
 
 
 
 
 
46
 
47
  def grade(self, submitted_function: str) -> float:
48
  """Returns deterministic score in {0.0, 0.3, 1.0}."""
49
  norm = submitted_function.strip().lower()
50
+ if norm == self.target_function["name"].strip().lower():
51
  return self.SCORE_CORRECT
52
+ if norm in self.target_function.get("code", "").strip().lower():
53
  return self.SCORE_PARTIAL
54
  return self.SCORE_WRONG
55
 
 
66
  score = self.grade(submitted_function)
67
  return score, self.reward_for_score(score)
68
 
69
+ def get_canonical_answer(self) -> Dict[str, Dict | str]:
70
  """For debugging / logging only β€” do not expose to the agent."""
71
  return {
72
  "target_function": self.target_function,
73
+ "property_specification": json.dumps(self.property_specification)
74
+ if isinstance(self.property_specification, dict) else self.property_specification,
75
  }