Spaces:
Running
Running
ajaxwin commited on
Commit Β·
056cf7b
1
Parent(s): c719864
task3 reviewed
Browse filesproperty_specification added
- README.md +5 -5
- app.py +1 -1
- data/Template.json +15 -12
- data/data_loader.py +6 -7
- demo.py +2 -2
- env/schemas.py +1 -1
- eval.py +1 -1
- openenv.yaml +2 -2
- tasks/task3/actions.py +15 -9
- tasks/task3/environment.py +10 -10
- utils/prompts.py +2 -2
- validate.py +4 -4
README.md
CHANGED
|
@@ -102,7 +102,7 @@ Matching uses **word-set containment + synonym expansion** β words don't need
|
|
| 102 |
| `get_function_code` | `function_name` | β0.10 |
|
| 103 |
| `get_state_variable` | `variable_name` (opt.) | β0.05 |
|
| 104 |
| `get_call_graph` | β | β0.08 |
|
| 105 |
-
| `
|
| 106 |
| `submit_function` | `function_name` | **+5.0 / +1.5 / β1.5**, ONE attempt |
|
| 107 |
|
| 108 |
### Grader (three-tier deterministic)
|
|
@@ -111,7 +111,7 @@ Matching uses **word-set containment + synonym expansion** β words don't need
|
|
| 111 |
- **0.3** β a direct internal subfunction of the target β reward **+1.5**
|
| 112 |
- **0.0** β anything else β reward **β1.5**
|
| 113 |
|
| 114 |
-
`
|
| 115 |
|
| 116 |
---
|
| 117 |
|
|
@@ -125,7 +125,7 @@ All tasks share the same `Observation` structure:
|
|
| 125 |
"contract_name": "SimpleVault",
|
| 126 |
"contract_description": "An ETH vault that allows users to deposit...",
|
| 127 |
"available_actions": ["list_functions", "get_function_metadata", "..."],
|
| 128 |
-
"last_action": "
|
| 129 |
"last_action_result": "Formal property:\nPre: caller != owner...",
|
| 130 |
"step_count": 1,
|
| 131 |
"cumulative_reward": -0.03,
|
|
@@ -217,7 +217,7 @@ r = env.reset(seed=42)
|
|
| 217 |
print(r.observation.extra["property_english"])
|
| 218 |
# "Only the owner should be able to drain the vault..."
|
| 219 |
|
| 220 |
-
s = env.step(Action(action_type=ActionType.
|
| 221 |
s = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
|
| 222 |
params={"function_name": "emergencyDrain"}))
|
| 223 |
print(s.reward.value) # +5.0
|
|
@@ -245,7 +245,7 @@ curl -X POST localhost:7860/reset \
|
|
| 245 |
|
| 246 |
curl -X POST localhost:7860/step \
|
| 247 |
-H "Content-Type: application/json" \
|
| 248 |
-
-d '{"action_type":"
|
| 249 |
|
| 250 |
curl -X POST localhost:7860/step \
|
| 251 |
-H "Content-Type: application/json" \
|
|
|
|
| 102 |
| `get_function_code` | `function_name` | β0.10 |
|
| 103 |
| `get_state_variable` | `variable_name` (opt.) | β0.05 |
|
| 104 |
| `get_call_graph` | β | β0.08 |
|
| 105 |
+
| `get_property_specification` | β | **β0.03** (cheapest β read this first!) |
|
| 106 |
| `submit_function` | `function_name` | **+5.0 / +1.5 / β1.5**, ONE attempt |
|
| 107 |
|
| 108 |
### Grader (three-tier deterministic)
|
|
|
|
| 111 |
- **0.3** β a direct internal subfunction of the target β reward **+1.5**
|
| 112 |
- **0.0** β anything else β reward **β1.5**
|
| 113 |
|
| 114 |
+
`get_property_specification` returns the precise pre/post-condition (`rule_broken_specs`). Reading it costs only β0.03 and usually provides enough information to identify the violating function without inspecting all code.
|
| 115 |
|
| 116 |
---
|
| 117 |
|
|
|
|
| 125 |
"contract_name": "SimpleVault",
|
| 126 |
"contract_description": "An ETH vault that allows users to deposit...",
|
| 127 |
"available_actions": ["list_functions", "get_function_metadata", "..."],
|
| 128 |
+
"last_action": "get_property_specification",
|
| 129 |
"last_action_result": "Formal property:\nPre: caller != owner...",
|
| 130 |
"step_count": 1,
|
| 131 |
"cumulative_reward": -0.03,
|
|
|
|
| 217 |
print(r.observation.extra["property_english"])
|
| 218 |
# "Only the owner should be able to drain the vault..."
|
| 219 |
|
| 220 |
+
s = env.step(Action(action_type=ActionType.GET_PROPERTY_SPECIFICATION))
|
| 221 |
s = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
|
| 222 |
params={"function_name": "emergencyDrain"}))
|
| 223 |
print(s.reward.value) # +5.0
|
|
|
|
| 245 |
|
| 246 |
curl -X POST localhost:7860/step \
|
| 247 |
-H "Content-Type: application/json" \
|
| 248 |
+
-d '{"action_type":"get_property_specification","params":{}}'
|
| 249 |
|
| 250 |
curl -X POST localhost:7860/step \
|
| 251 |
-H "Content-Type: application/json" \
|
app.py
CHANGED
|
@@ -205,7 +205,7 @@ def action_space(task_id: str = "task1_vuln_detection"):
|
|
| 205 |
{"type": "get_function_code", "params": {"function_name": "string"}, "reward": -0.10, "description": "Read full Solidity source of a function"},
|
| 206 |
{"type": "get_state_variable", "params": {"variable_name": "string (opt)"}, "reward": -0.05, "description": "Get a state variable or list all"},
|
| 207 |
{"type": "get_call_graph", "params": {}, "reward": -0.08, "description": "Get function call graph"},
|
| 208 |
-
{"type": "
|
| 209 |
{"type": "submit_function", "params": {"function_name": "string"}, "reward": "+5.0 / +1.5 / -1.5", "description": "Submit answer. ONE attempt. Ends episode."},
|
| 210 |
],
|
| 211 |
}
|
|
|
|
| 205 |
{"type": "get_function_code", "params": {"function_name": "string"}, "reward": -0.10, "description": "Read full Solidity source of a function"},
|
| 206 |
{"type": "get_state_variable", "params": {"variable_name": "string (opt)"}, "reward": -0.05, "description": "Get a state variable or list all"},
|
| 207 |
{"type": "get_call_graph", "params": {}, "reward": -0.08, "description": "Get function call graph"},
|
| 208 |
+
{"type": "get_property_specification", "params": {}, "reward": -0.03, "description": "Get formal pre/post-condition for the property"},
|
| 209 |
{"type": "submit_function", "params": {"function_name": "string"}, "reward": "+5.0 / +1.5 / -1.5", "description": "Submit answer. ONE attempt. Ends episode."},
|
| 210 |
],
|
| 211 |
}
|
data/Template.json
CHANGED
|
@@ -1,14 +1,12 @@
|
|
| 1 |
{
|
| 2 |
"contract_name": "ExampleContract",
|
| 3 |
"file_name": "ExampleContract.sol",
|
| 4 |
-
|
| 5 |
"metadata": {
|
| 6 |
"license": "MIT",
|
| 7 |
"solidity_version": "0.8.0",
|
| 8 |
"description": "Example contract demonstrating the template structure",
|
| 9 |
"author": "Example Author"
|
| 10 |
},
|
| 11 |
-
|
| 12 |
"state_variables": [
|
| 13 |
{
|
| 14 |
"name": "owner",
|
|
@@ -25,7 +23,6 @@
|
|
| 25 |
"description": "User token balances"
|
| 26 |
}
|
| 27 |
],
|
| 28 |
-
|
| 29 |
"functions": [
|
| 30 |
{
|
| 31 |
"name": "transfer",
|
|
@@ -48,7 +45,9 @@
|
|
| 48 |
],
|
| 49 |
"returns": "bool - true on success",
|
| 50 |
"output_property": "Decreases caller's balance by amount, increases recipient's balance by amount. Emits Transfer event. Reverts if recipient is zero address or caller has insufficient balance.",
|
| 51 |
-
"events": [
|
|
|
|
|
|
|
| 52 |
"vulnerable": false,
|
| 53 |
"vulnerability_details": null,
|
| 54 |
"property": null,
|
|
@@ -79,10 +78,14 @@
|
|
| 79 |
"mitigation": "Use checks-effects-interactions pattern: update balance before external call"
|
| 80 |
},
|
| 81 |
"property": "When a user withdraws x amount of ETH, the user's balance should decrease by x. Due to reentrancy, an attacker can call withdraw recursively before balance is updated, draining more than their balance.",
|
| 82 |
-
"property_specification":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
}
|
| 84 |
],
|
| 85 |
-
|
| 86 |
"structs": [
|
| 87 |
{
|
| 88 |
"name": "MintLocalVars",
|
|
@@ -90,7 +93,6 @@
|
|
| 90 |
"description": "Local variables used in mint function to avoid stack too deep errors"
|
| 91 |
}
|
| 92 |
],
|
| 93 |
-
|
| 94 |
"modifiers": [
|
| 95 |
{
|
| 96 |
"name": "onlyOwner",
|
|
@@ -103,12 +105,10 @@
|
|
| 103 |
"purpose": "Prevents reentrancy attacks by using a mutex lock"
|
| 104 |
}
|
| 105 |
],
|
| 106 |
-
|
| 107 |
"inheritance": [
|
| 108 |
"ERC20",
|
| 109 |
"Ownable"
|
| 110 |
],
|
| 111 |
-
|
| 112 |
"call_graph": {
|
| 113 |
"constructor": [
|
| 114 |
"ERC20.constructor()"
|
|
@@ -120,7 +120,6 @@
|
|
| 120 |
"msg.sender.call()"
|
| 121 |
]
|
| 122 |
},
|
| 123 |
-
|
| 124 |
"audit_issues": [
|
| 125 |
{
|
| 126 |
"function": "withdraw",
|
|
@@ -130,10 +129,14 @@
|
|
| 130 |
"status": "Fixed",
|
| 131 |
"mitigation": "Moved balance update before external call (checks-effects-interactions pattern)",
|
| 132 |
"property": "When a user withdraws x amount, the user's balance should decrease by x. Due to reentrancy, an attacker can withdraw multiple times before balance updates, draining more than their balance.",
|
| 133 |
-
"property_specification":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
}
|
| 135 |
],
|
| 136 |
-
|
| 137 |
"events": [
|
| 138 |
{
|
| 139 |
"name": "Transfer",
|
|
|
|
| 1 |
{
|
| 2 |
"contract_name": "ExampleContract",
|
| 3 |
"file_name": "ExampleContract.sol",
|
|
|
|
| 4 |
"metadata": {
|
| 5 |
"license": "MIT",
|
| 6 |
"solidity_version": "0.8.0",
|
| 7 |
"description": "Example contract demonstrating the template structure",
|
| 8 |
"author": "Example Author"
|
| 9 |
},
|
|
|
|
| 10 |
"state_variables": [
|
| 11 |
{
|
| 12 |
"name": "owner",
|
|
|
|
| 23 |
"description": "User token balances"
|
| 24 |
}
|
| 25 |
],
|
|
|
|
| 26 |
"functions": [
|
| 27 |
{
|
| 28 |
"name": "transfer",
|
|
|
|
| 45 |
],
|
| 46 |
"returns": "bool - true on success",
|
| 47 |
"output_property": "Decreases caller's balance by amount, increases recipient's balance by amount. Emits Transfer event. Reverts if recipient is zero address or caller has insufficient balance.",
|
| 48 |
+
"events": [
|
| 49 |
+
"Transfer"
|
| 50 |
+
],
|
| 51 |
"vulnerable": false,
|
| 52 |
"vulnerability_details": null,
|
| 53 |
"property": null,
|
|
|
|
| 78 |
"mitigation": "Use checks-effects-interactions pattern: update balance before external call"
|
| 79 |
},
|
| 80 |
"property": "When a user withdraws x amount of ETH, the user's balance should decrease by x. Due to reentrancy, an attacker can call withdraw recursively before balance is updated, draining more than their balance.",
|
| 81 |
+
"property_specification": {
|
| 82 |
+
"Pre-condition": "User balance = B",
|
| 83 |
+
"Operation": "withdraw(amount)",
|
| 84 |
+
"Expected": "User balance = B - amount",
|
| 85 |
+
"Actual": "Reentrant calls allow user balance = B - (n * amount) where n > 1"
|
| 86 |
+
}
|
| 87 |
}
|
| 88 |
],
|
|
|
|
| 89 |
"structs": [
|
| 90 |
{
|
| 91 |
"name": "MintLocalVars",
|
|
|
|
| 93 |
"description": "Local variables used in mint function to avoid stack too deep errors"
|
| 94 |
}
|
| 95 |
],
|
|
|
|
| 96 |
"modifiers": [
|
| 97 |
{
|
| 98 |
"name": "onlyOwner",
|
|
|
|
| 105 |
"purpose": "Prevents reentrancy attacks by using a mutex lock"
|
| 106 |
}
|
| 107 |
],
|
|
|
|
| 108 |
"inheritance": [
|
| 109 |
"ERC20",
|
| 110 |
"Ownable"
|
| 111 |
],
|
|
|
|
| 112 |
"call_graph": {
|
| 113 |
"constructor": [
|
| 114 |
"ERC20.constructor()"
|
|
|
|
| 120 |
"msg.sender.call()"
|
| 121 |
]
|
| 122 |
},
|
|
|
|
| 123 |
"audit_issues": [
|
| 124 |
{
|
| 125 |
"function": "withdraw",
|
|
|
|
| 129 |
"status": "Fixed",
|
| 130 |
"mitigation": "Moved balance update before external call (checks-effects-interactions pattern)",
|
| 131 |
"property": "When a user withdraws x amount, the user's balance should decrease by x. Due to reentrancy, an attacker can withdraw multiple times before balance updates, draining more than their balance.",
|
| 132 |
+
"property_specification": {
|
| 133 |
+
"Pre-condition": "User balance = B",
|
| 134 |
+
"Operation": "withdraw(amount)",
|
| 135 |
+
"Expected": "User balance = B - amount",
|
| 136 |
+
"Actual": "Reentrant calls allow user balance = B - (n * amount) where n > 1"
|
| 137 |
+
}
|
| 138 |
}
|
| 139 |
],
|
|
|
|
| 140 |
"events": [
|
| 141 |
{
|
| 142 |
"name": "Transfer",
|
data/data_loader.py
CHANGED
|
@@ -163,15 +163,14 @@ def get_all_task3_entries(
|
|
| 163 |
contracts: List[Dict[str, Any]],
|
| 164 |
) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
|
| 165 |
"""
|
| 166 |
-
Returns (contract, function) pairs where function
|
| 167 |
-
|
| 168 |
"""
|
|
|
|
| 169 |
entries = []
|
| 170 |
-
for contract in
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
if t3.get("property_english"):
|
| 174 |
-
entries.append((contract, fn))
|
| 175 |
return entries
|
| 176 |
|
| 177 |
|
|
|
|
| 163 |
contracts: List[Dict[str, Any]],
|
| 164 |
) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
|
| 165 |
"""
|
| 166 |
+
Returns (contract, function) pairs where function is vulnerable
|
| 167 |
+
and has a property field.
|
| 168 |
"""
|
| 169 |
+
vulnerable_entries = get_all_vulnerable_entries(contracts)
|
| 170 |
entries = []
|
| 171 |
+
for contract, fn in vulnerable_entries:
|
| 172 |
+
if fn.get("property", None) is not None:
|
| 173 |
+
entries.append((contract, fn))
|
|
|
|
|
|
|
| 174 |
return entries
|
| 175 |
|
| 176 |
|
demo.py
CHANGED
|
@@ -363,7 +363,7 @@ def run_auto_demo_t2(seed: int = 42, delay: float = 0.9):
|
|
| 363 |
|
| 364 |
DEMO_SCRIPTS_T3 = {
|
| 365 |
42: [
|
| 366 |
-
(ActionType.
|
| 367 |
"Read the formal spec first β cheapest action at -0.03."),
|
| 368 |
(ActionType.LIST_FUNCTIONS, {},
|
| 369 |
"List all functions to survey candidates."),
|
|
@@ -373,7 +373,7 @@ DEMO_SCRIPTS_T3 = {
|
|
| 373 |
"Confident. emergencyDrain violates the access-control property."),
|
| 374 |
],
|
| 375 |
45: [
|
| 376 |
-
(ActionType.
|
| 377 |
"Formal spec: first caller at valid price should win."),
|
| 378 |
(ActionType.LIST_FUNCTIONS, {},
|
| 379 |
"Auction contract β bid() immediately looks suspicious."),
|
|
|
|
| 363 |
|
| 364 |
DEMO_SCRIPTS_T3 = {
|
| 365 |
42: [
|
| 366 |
+
(ActionType.GET_PROPERTY_SPECIFICATION, {},
|
| 367 |
"Read the formal spec first β cheapest action at -0.03."),
|
| 368 |
(ActionType.LIST_FUNCTIONS, {},
|
| 369 |
"List all functions to survey candidates."),
|
|
|
|
| 373 |
"Confident. emergencyDrain violates the access-control property."),
|
| 374 |
],
|
| 375 |
45: [
|
| 376 |
+
(ActionType.GET_PROPERTY_SPECIFICATION, {},
|
| 377 |
"Formal spec: first caller at valid price should win."),
|
| 378 |
(ActionType.LIST_FUNCTIONS, {},
|
| 379 |
"Auction contract β bid() immediately looks suspicious."),
|
env/schemas.py
CHANGED
|
@@ -42,7 +42,7 @@ class ActionType(str, Enum):
|
|
| 42 |
SUBMIT_PROPERTY = "submit_property" # scored 0β5, one attempt
|
| 43 |
|
| 44 |
# ββ Task 3 β Rule Checker ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
-
|
| 46 |
GET_FUNCTION_METADATA = "get_function_metadata" # -0.05
|
| 47 |
SUBMIT_FUNCTION = "submit_function" # +5.0 / +1.5 / -1.5, one attempt
|
| 48 |
|
|
|
|
| 42 |
SUBMIT_PROPERTY = "submit_property" # scored 0β5, one attempt
|
| 43 |
|
| 44 |
# ββ Task 3 β Rule Checker ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
+
GET_PROPERTY_SPECIFICATION = "get_property_specification" # -0.03
|
| 46 |
GET_FUNCTION_METADATA = "get_function_metadata" # -0.05
|
| 47 |
SUBMIT_FUNCTION = "submit_function" # +5.0 / +1.5 / -1.5, one attempt
|
| 48 |
|
eval.py
CHANGED
|
@@ -151,7 +151,7 @@ def oracle_t3(env: Task3Environment, seed: int, verbose: bool = False) -> Dict[s
|
|
| 151 |
if verbose:
|
| 152 |
prop = obs.extra.get("property_english", "")[:60]
|
| 153 |
print(f" {contract}.{fn_name}() \"{prop}\"")
|
| 154 |
-
env.step(Action(action_type=ActionType.
|
| 155 |
env.step(Action(action_type=ActionType.LIST_FUNCTIONS))
|
| 156 |
result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
|
| 157 |
params={"function_name": fn_name}))
|
|
|
|
| 151 |
if verbose:
|
| 152 |
prop = obs.extra.get("property_english", "")[:60]
|
| 153 |
print(f" {contract}.{fn_name}() \"{prop}\"")
|
| 154 |
+
env.step(Action(action_type=ActionType.GET_PROPERTY_SPECIFICATION))
|
| 155 |
env.step(Action(action_type=ActionType.LIST_FUNCTIONS))
|
| 156 |
result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
|
| 157 |
params={"function_name": fn_name}))
|
openenv.yaml
CHANGED
|
@@ -73,7 +73,7 @@ action_space:
|
|
| 73 |
get_function_natspec: {params: {}, reward: -0.08}
|
| 74 |
get_file_natspec: {params: {}, reward: -0.03}
|
| 75 |
get_related_functions: {params: {}, reward: -0.06}
|
| 76 |
-
|
| 77 |
get_similar_rule: {params: {}, reward: -0.20}
|
| 78 |
submit_property: {params: {property: string}, reward: "0.0-5.0 keyword-weighted, one attempt"}
|
| 79 |
task3:
|
|
@@ -82,7 +82,7 @@ action_space:
|
|
| 82 |
get_function_code: {params: {function_name: string}, reward: -0.10}
|
| 83 |
get_state_variable: {params: {variable_name: "string opt"}, reward: -0.05}
|
| 84 |
get_call_graph: {params: {}, reward: -0.08}
|
| 85 |
-
|
| 86 |
submit_function: {params: {function_name: string}, reward: "+5.0 / +1.5 / -1.5, one attempt"}
|
| 87 |
|
| 88 |
reward:
|
|
|
|
| 73 |
get_function_natspec: {params: {}, reward: -0.08}
|
| 74 |
get_file_natspec: {params: {}, reward: -0.03}
|
| 75 |
get_related_functions: {params: {}, reward: -0.06}
|
| 76 |
+
get_signature: {params: {}, reward: -0.04}
|
| 77 |
get_similar_rule: {params: {}, reward: -0.20}
|
| 78 |
submit_property: {params: {property: string}, reward: "0.0-5.0 keyword-weighted, one attempt"}
|
| 79 |
task3:
|
|
|
|
| 82 |
get_function_code: {params: {function_name: string}, reward: -0.10}
|
| 83 |
get_state_variable: {params: {variable_name: "string opt"}, reward: -0.05}
|
| 84 |
get_call_graph: {params: {}, reward: -0.08}
|
| 85 |
+
get_property_specification: {params: {}, reward: -0.03}
|
| 86 |
submit_function: {params: {function_name: string}, reward: "+5.0 / +1.5 / -1.5, one attempt"}
|
| 87 |
|
| 88 |
reward:
|
tasks/task3/actions.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
| 1 |
-
"""
|
|
|
|
|
|
|
| 2 |
|
|
|
|
| 3 |
from typing import Any, Dict, Tuple
|
| 4 |
from data.data_loader import (
|
| 5 |
get_function_by_name,
|
|
@@ -105,17 +108,20 @@ def get_call_graph(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
|
|
| 105 |
Reward(value=-0.08, reason="get_call_graph cost"),
|
| 106 |
)
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
"""Handle GET_FORMALIZED_PROPERTY action."""
|
| 111 |
if ctx._is_repeated(qkey):
|
| 112 |
return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
return (
|
| 117 |
-
f"Formal property:\n{
|
| 118 |
-
Reward(value=-0.03, reason="
|
| 119 |
)
|
| 120 |
|
| 121 |
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Task 3: Identify the function that violates a specified property.
|
| 3 |
+
"""
|
| 4 |
|
| 5 |
+
import json
|
| 6 |
from typing import Any, Dict, Tuple
|
| 7 |
from data.data_loader import (
|
| 8 |
get_function_by_name,
|
|
|
|
| 108 |
Reward(value=-0.08, reason="get_call_graph cost"),
|
| 109 |
)
|
| 110 |
|
| 111 |
+
def get_property_specification(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
|
| 112 |
+
"""Handle GET_PROPERTY_SPECIFICATION action."""
|
|
|
|
| 113 |
if ctx._is_repeated(qkey):
|
| 114 |
return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
|
| 115 |
+
|
| 116 |
+
rule = ctx._target_fn.get("property_specification", {})
|
| 117 |
+
if not rule:
|
| 118 |
+
rule = "No rule specification available for this property."
|
| 119 |
+
|
| 120 |
+
rule_parsed = json.dumps(rule) if isinstance(rule, dict) else rule
|
| 121 |
+
|
| 122 |
return (
|
| 123 |
+
f"Formal property:\n{rule_parsed}",
|
| 124 |
+
Reward(value=-0.03, reason="get_property_specification cost"),
|
| 125 |
)
|
| 126 |
|
| 127 |
|
tasks/task3/environment.py
CHANGED
|
@@ -22,7 +22,7 @@ Actions & rewards
|
|
| 22 |
get_function_code -0.10 full Solidity source of any function
|
| 23 |
get_state_variables -0.05 list or inspect state variables
|
| 24 |
get_call_graph -0.08 function call graph
|
| 25 |
-
|
| 26 |
submit_function terminal: +5.0 / +1.5 / -1.5 (ONE attempt)
|
| 27 |
repeated_query -0.40
|
| 28 |
|
|
@@ -59,7 +59,7 @@ AVAILABLE_ACTIONS = [
|
|
| 59 |
ActionType.GET_FUNCTION_CODE,
|
| 60 |
ActionType.GET_STATE_VARIABLE,
|
| 61 |
ActionType.GET_CALL_GRAPH,
|
| 62 |
-
ActionType.
|
| 63 |
ActionType.SUBMIT_FUNCTION,
|
| 64 |
]
|
| 65 |
|
|
@@ -113,7 +113,7 @@ class Task3Environment(BaseEnv):
|
|
| 113 |
f"Find the function in this contract that violates the property above.\n"
|
| 114 |
f"Use list_functions then get_function_code to investigate.\n"
|
| 115 |
f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
|
| 116 |
-
f"ONE submission allowed."
|
| 117 |
),
|
| 118 |
)
|
| 119 |
return ResetResult(observation=obs, info={"task_id": TASK_ID})
|
|
@@ -190,13 +190,13 @@ class Task3Environment(BaseEnv):
|
|
| 190 |
|
| 191 |
# Mapping from ActionType to handler function
|
| 192 |
handlers = {
|
| 193 |
-
ActionType.LIST_FUNCTIONS:
|
| 194 |
-
ActionType.GET_FUNCTION_METADATA:
|
| 195 |
-
ActionType.GET_FUNCTION_CODE:
|
| 196 |
-
ActionType.GET_STATE_VARIABLE:
|
| 197 |
-
ActionType.GET_CALL_GRAPH:
|
| 198 |
-
ActionType.
|
| 199 |
-
ActionType.SUBMIT_FUNCTION:
|
| 200 |
}
|
| 201 |
|
| 202 |
handler = handlers.get(at)
|
|
|
|
| 22 |
get_function_code -0.10 full Solidity source of any function
|
| 23 |
get_state_variables -0.05 list or inspect state variables
|
| 24 |
get_call_graph -0.08 function call graph
|
| 25 |
+
get_property_specification -0.03 formal pre/post-condition version of property
|
| 26 |
submit_function terminal: +5.0 / +1.5 / -1.5 (ONE attempt)
|
| 27 |
repeated_query -0.40
|
| 28 |
|
|
|
|
| 59 |
ActionType.GET_FUNCTION_CODE,
|
| 60 |
ActionType.GET_STATE_VARIABLE,
|
| 61 |
ActionType.GET_CALL_GRAPH,
|
| 62 |
+
ActionType.GET_PROPERTY_SPECIFICATION,
|
| 63 |
ActionType.SUBMIT_FUNCTION,
|
| 64 |
]
|
| 65 |
|
|
|
|
| 113 |
f"Find the function in this contract that violates the property above.\n"
|
| 114 |
f"Use list_functions then get_function_code to investigate.\n"
|
| 115 |
f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
|
| 116 |
+
f"Only ONE submission allowed."
|
| 117 |
),
|
| 118 |
)
|
| 119 |
return ResetResult(observation=obs, info={"task_id": TASK_ID})
|
|
|
|
| 190 |
|
| 191 |
# Mapping from ActionType to handler function
|
| 192 |
handlers = {
|
| 193 |
+
ActionType.LIST_FUNCTIONS: actions.list_functions,
|
| 194 |
+
ActionType.GET_FUNCTION_METADATA: actions.get_function_metadata,
|
| 195 |
+
ActionType.GET_FUNCTION_CODE: actions.get_function_code,
|
| 196 |
+
ActionType.GET_STATE_VARIABLE: actions.get_state_variable,
|
| 197 |
+
ActionType.GET_CALL_GRAPH: actions.get_call_graph,
|
| 198 |
+
ActionType.GET_PROPERTY_SPECIFICATION: actions.get_property_specification,
|
| 199 |
+
ActionType.SUBMIT_FUNCTION: actions.submit_function,
|
| 200 |
}
|
| 201 |
|
| 202 |
handler = handlers.get(at)
|
utils/prompts.py
CHANGED
|
@@ -83,7 +83,7 @@ Your task is to find the ONE function that violates this property.
|
|
| 83 |
|
| 84 |
## Actions (respond with JSON only, ONE action per turn):
|
| 85 |
{"action": "list_functions", "params": {}}
|
| 86 |
-
{"action": "
|
| 87 |
{"action": "get_function_metadata", "params": {"function_name": "<n>"}}
|
| 88 |
{"action": "get_function_code", "params": {"function_name": "<n>"}}
|
| 89 |
{"action": "get_state_variable", "params": {"variable_name": "<n>"}}
|
|
@@ -93,7 +93,7 @@ Your task is to find the ONE function that violates this property.
|
|
| 93 |
## Strategy:
|
| 94 |
1. Read the property shown as property_english in the observation.
|
| 95 |
2. list_functions to survey candidates.
|
| 96 |
-
3.
|
| 97 |
4. get_function_code on the 1-2 most suspicious functions.
|
| 98 |
5. submit_function when confident β ONE attempt only.
|
| 99 |
|
|
|
|
| 83 |
|
| 84 |
## Actions (respond with JSON only, ONE action per turn):
|
| 85 |
{"action": "list_functions", "params": {}}
|
| 86 |
+
{"action": "get_property_specification", "params": {}}
|
| 87 |
{"action": "get_function_metadata", "params": {"function_name": "<n>"}}
|
| 88 |
{"action": "get_function_code", "params": {"function_name": "<n>"}}
|
| 89 |
{"action": "get_state_variable", "params": {"variable_name": "<n>"}}
|
|
|
|
| 93 |
## Strategy:
|
| 94 |
1. Read the property shown as property_english in the observation.
|
| 95 |
2. list_functions to survey candidates.
|
| 96 |
+
3. get_property_specification for the precise pre/post-condition (cheap: -0.03).
|
| 97 |
4. get_function_code on the 1-2 most suspicious functions.
|
| 98 |
5. submit_function when confident β ONE attempt only.
|
| 99 |
|
validate.py
CHANGED
|
@@ -42,7 +42,7 @@ def check_pydantic_models():
|
|
| 42 |
from env.schemas import Observation, Action, ActionType, Reward, StepResult, ResetResult
|
| 43 |
obs = Observation(task_id="t", contract_name="C", contract_description="D", available_actions=[])
|
| 44 |
for at in [ActionType.LIST_FUNCTIONS, ActionType.SUBMIT_PROPERTY,
|
| 45 |
-
ActionType.
|
| 46 |
Action(action_type=at)
|
| 47 |
Reward(value=-1.5, reason="test")
|
| 48 |
StepResult(observation=obs, reward=Reward(value=0, reason=""), done=False)
|
|
@@ -90,7 +90,7 @@ def check_t3_env():
|
|
| 90 |
assert "property_english" in r.observation.extra
|
| 91 |
prop = r.observation.extra["property_english"]
|
| 92 |
assert len(prop) > 10, "property_english too short"
|
| 93 |
-
for at in [ActionType.LIST_FUNCTIONS, ActionType.
|
| 94 |
ActionType.GET_CALL_GRAPH, ActionType.GET_STATE_VARIABLE]:
|
| 95 |
s = env.step(Action(action_type=at))
|
| 96 |
assert s.reward.value < 0, f"{at.value} should have negative shaping reward"
|
|
@@ -100,7 +100,7 @@ def check_t3_action_costs():
|
|
| 100 |
from env.schemas import Action, ActionType
|
| 101 |
env = Task3Environment(); env.reset(seed=42)
|
| 102 |
costs = {
|
| 103 |
-
ActionType.
|
| 104 |
ActionType.LIST_FUNCTIONS: -0.05,
|
| 105 |
ActionType.GET_CALL_GRAPH: -0.08,
|
| 106 |
}
|
|
@@ -201,7 +201,7 @@ def check_reward_shaping():
|
|
| 201 |
env = Task3Environment(); env.reset(seed=1)
|
| 202 |
rewards = {env.step(Action(action_type=at)).reward.value
|
| 203 |
for at in [ActionType.LIST_FUNCTIONS,
|
| 204 |
-
ActionType.
|
| 205 |
ActionType.GET_CALL_GRAPH]}
|
| 206 |
assert len(rewards) >= 2
|
| 207 |
|
|
|
|
| 42 |
from env.schemas import Observation, Action, ActionType, Reward, StepResult, ResetResult
|
| 43 |
obs = Observation(task_id="t", contract_name="C", contract_description="D", available_actions=[])
|
| 44 |
for at in [ActionType.LIST_FUNCTIONS, ActionType.SUBMIT_PROPERTY,
|
| 45 |
+
ActionType.GET_PROPERTY_SPECIFICATION, ActionType.SUBMIT_FUNCTION]:
|
| 46 |
Action(action_type=at)
|
| 47 |
Reward(value=-1.5, reason="test")
|
| 48 |
StepResult(observation=obs, reward=Reward(value=0, reason=""), done=False)
|
|
|
|
| 90 |
assert "property_english" in r.observation.extra
|
| 91 |
prop = r.observation.extra["property_english"]
|
| 92 |
assert len(prop) > 10, "property_english too short"
|
| 93 |
+
for at in [ActionType.LIST_FUNCTIONS, ActionType.GET_PROPERTY_SPECIFICATION,
|
| 94 |
ActionType.GET_CALL_GRAPH, ActionType.GET_STATE_VARIABLE]:
|
| 95 |
s = env.step(Action(action_type=at))
|
| 96 |
assert s.reward.value < 0, f"{at.value} should have negative shaping reward"
|
|
|
|
| 100 |
from env.schemas import Action, ActionType
|
| 101 |
env = Task3Environment(); env.reset(seed=42)
|
| 102 |
costs = {
|
| 103 |
+
ActionType.GET_PROPERTY_SPECIFICATION: -0.03,
|
| 104 |
ActionType.LIST_FUNCTIONS: -0.05,
|
| 105 |
ActionType.GET_CALL_GRAPH: -0.08,
|
| 106 |
}
|
|
|
|
| 201 |
env = Task3Environment(); env.reset(seed=1)
|
| 202 |
rewards = {env.step(Action(action_type=at)).reward.value
|
| 203 |
for at in [ActionType.LIST_FUNCTIONS,
|
| 204 |
+
ActionType.GET_PROPERTY_SPECIFICATION,
|
| 205 |
ActionType.GET_CALL_GRAPH]}
|
| 206 |
assert len(rewards) >= 2
|
| 207 |
|