ajaxwin commited on
Commit
056cf7b
Β·
1 Parent(s): c719864

task3 reviewed

Browse files

property_specification added

README.md CHANGED
@@ -102,7 +102,7 @@ Matching uses **word-set containment + synonym expansion** β€” words don't need
102
  | `get_function_code` | `function_name` | βˆ’0.10 |
103
  | `get_state_variable` | `variable_name` (opt.) | βˆ’0.05 |
104
  | `get_call_graph` | β€” | βˆ’0.08 |
105
- | `get_formalized_property` | β€” | **βˆ’0.03** (cheapest β€” read this first!) |
106
  | `submit_function` | `function_name` | **+5.0 / +1.5 / βˆ’1.5**, ONE attempt |
107
 
108
  ### Grader (three-tier deterministic)
@@ -111,7 +111,7 @@ Matching uses **word-set containment + synonym expansion** β€” words don't need
111
  - **0.3** β†’ a direct internal subfunction of the target β†’ reward **+1.5**
112
  - **0.0** β†’ anything else β†’ reward **βˆ’1.5**
113
 
114
- `get_formalized_property` returns the precise pre/post-condition (`rule_broken_specs`). Reading it costs only βˆ’0.03 and usually provides enough information to identify the violating function without inspecting all code.
115
 
116
  ---
117
 
@@ -125,7 +125,7 @@ All tasks share the same `Observation` structure:
125
  "contract_name": "SimpleVault",
126
  "contract_description": "An ETH vault that allows users to deposit...",
127
  "available_actions": ["list_functions", "get_function_metadata", "..."],
128
- "last_action": "get_formalized_property",
129
  "last_action_result": "Formal property:\nPre: caller != owner...",
130
  "step_count": 1,
131
  "cumulative_reward": -0.03,
@@ -217,7 +217,7 @@ r = env.reset(seed=42)
217
  print(r.observation.extra["property_english"])
218
  # "Only the owner should be able to drain the vault..."
219
 
220
- s = env.step(Action(action_type=ActionType.GET_FORMALIZED_PROPERTY))
221
  s = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
222
  params={"function_name": "emergencyDrain"}))
223
  print(s.reward.value) # +5.0
@@ -245,7 +245,7 @@ curl -X POST localhost:7860/reset \
245
 
246
  curl -X POST localhost:7860/step \
247
  -H "Content-Type: application/json" \
248
- -d '{"action_type":"get_formalized_property","params":{}}'
249
 
250
  curl -X POST localhost:7860/step \
251
  -H "Content-Type: application/json" \
 
102
  | `get_function_code` | `function_name` | βˆ’0.10 |
103
  | `get_state_variable` | `variable_name` (opt.) | βˆ’0.05 |
104
  | `get_call_graph` | β€” | βˆ’0.08 |
105
+ | `get_property_specification` | β€” | **βˆ’0.03** (cheapest β€” read this first!) |
106
  | `submit_function` | `function_name` | **+5.0 / +1.5 / βˆ’1.5**, ONE attempt |
107
 
108
  ### Grader (three-tier deterministic)
 
111
  - **0.3** β†’ a direct internal subfunction of the target β†’ reward **+1.5**
112
  - **0.0** β†’ anything else β†’ reward **βˆ’1.5**
113
 
114
+ `get_property_specification` returns the precise pre/post-condition (`rule_broken_specs`). Reading it costs only βˆ’0.03 and usually provides enough information to identify the violating function without inspecting all code.
115
 
116
  ---
117
 
 
125
  "contract_name": "SimpleVault",
126
  "contract_description": "An ETH vault that allows users to deposit...",
127
  "available_actions": ["list_functions", "get_function_metadata", "..."],
128
+ "last_action": "get_property_specification",
129
  "last_action_result": "Formal property:\nPre: caller != owner...",
130
  "step_count": 1,
131
  "cumulative_reward": -0.03,
 
217
  print(r.observation.extra["property_english"])
218
  # "Only the owner should be able to drain the vault..."
219
 
220
+ s = env.step(Action(action_type=ActionType.GET_PROPERTY_SPECIFICATION))
221
  s = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
222
  params={"function_name": "emergencyDrain"}))
223
  print(s.reward.value) # +5.0
 
245
 
246
  curl -X POST localhost:7860/step \
247
  -H "Content-Type: application/json" \
248
+ -d '{"action_type":"get_property_specification","params":{}}'
249
 
250
  curl -X POST localhost:7860/step \
251
  -H "Content-Type: application/json" \
app.py CHANGED
@@ -205,7 +205,7 @@ def action_space(task_id: str = "task1_vuln_detection"):
205
  {"type": "get_function_code", "params": {"function_name": "string"}, "reward": -0.10, "description": "Read full Solidity source of a function"},
206
  {"type": "get_state_variable", "params": {"variable_name": "string (opt)"}, "reward": -0.05, "description": "Get a state variable or list all"},
207
  {"type": "get_call_graph", "params": {}, "reward": -0.08, "description": "Get function call graph"},
208
- {"type": "get_formalized_property", "params": {}, "reward": -0.03, "description": "Get formal pre/post-condition for the property"},
209
  {"type": "submit_function", "params": {"function_name": "string"}, "reward": "+5.0 / +1.5 / -1.5", "description": "Submit answer. ONE attempt. Ends episode."},
210
  ],
211
  }
 
205
  {"type": "get_function_code", "params": {"function_name": "string"}, "reward": -0.10, "description": "Read full Solidity source of a function"},
206
  {"type": "get_state_variable", "params": {"variable_name": "string (opt)"}, "reward": -0.05, "description": "Get a state variable or list all"},
207
  {"type": "get_call_graph", "params": {}, "reward": -0.08, "description": "Get function call graph"},
208
+ {"type": "get_property_specification", "params": {}, "reward": -0.03, "description": "Get formal pre/post-condition for the property"},
209
  {"type": "submit_function", "params": {"function_name": "string"}, "reward": "+5.0 / +1.5 / -1.5", "description": "Submit answer. ONE attempt. Ends episode."},
210
  ],
211
  }
data/Template.json CHANGED
@@ -1,14 +1,12 @@
1
  {
2
  "contract_name": "ExampleContract",
3
  "file_name": "ExampleContract.sol",
4
-
5
  "metadata": {
6
  "license": "MIT",
7
  "solidity_version": "0.8.0",
8
  "description": "Example contract demonstrating the template structure",
9
  "author": "Example Author"
10
  },
11
-
12
  "state_variables": [
13
  {
14
  "name": "owner",
@@ -25,7 +23,6 @@
25
  "description": "User token balances"
26
  }
27
  ],
28
-
29
  "functions": [
30
  {
31
  "name": "transfer",
@@ -48,7 +45,9 @@
48
  ],
49
  "returns": "bool - true on success",
50
  "output_property": "Decreases caller's balance by amount, increases recipient's balance by amount. Emits Transfer event. Reverts if recipient is zero address or caller has insufficient balance.",
51
- "events": ["Transfer"],
 
 
52
  "vulnerable": false,
53
  "vulnerability_details": null,
54
  "property": null,
@@ -79,10 +78,14 @@
79
  "mitigation": "Use checks-effects-interactions pattern: update balance before external call"
80
  },
81
  "property": "When a user withdraws x amount of ETH, the user's balance should decrease by x. Due to reentrancy, an attacker can call withdraw recursively before balance is updated, draining more than their balance.",
82
- "property_specification": "Pre-condition: User has balance B. Operation: withdraw(amount). Expected post-condition: User balance = B - amount. Actual vulnerability: Reentrant calls allow multiple withdrawals before balance update, resulting in user balance = B - (n * amount) where n > 1, violating the expected post-condition."
 
 
 
 
 
83
  }
84
  ],
85
-
86
  "structs": [
87
  {
88
  "name": "MintLocalVars",
@@ -90,7 +93,6 @@
90
  "description": "Local variables used in mint function to avoid stack too deep errors"
91
  }
92
  ],
93
-
94
  "modifiers": [
95
  {
96
  "name": "onlyOwner",
@@ -103,12 +105,10 @@
103
  "purpose": "Prevents reentrancy attacks by using a mutex lock"
104
  }
105
  ],
106
-
107
  "inheritance": [
108
  "ERC20",
109
  "Ownable"
110
  ],
111
-
112
  "call_graph": {
113
  "constructor": [
114
  "ERC20.constructor()"
@@ -120,7 +120,6 @@
120
  "msg.sender.call()"
121
  ]
122
  },
123
-
124
  "audit_issues": [
125
  {
126
  "function": "withdraw",
@@ -130,10 +129,14 @@
130
  "status": "Fixed",
131
  "mitigation": "Moved balance update before external call (checks-effects-interactions pattern)",
132
  "property": "When a user withdraws x amount, the user's balance should decrease by x. Due to reentrancy, an attacker can withdraw multiple times before balance updates, draining more than their balance.",
133
- "property_specification": "Pre-condition: User balance = B. Operation: withdraw(amount). Expected: User balance = B - amount. Actual: Reentrant calls allow user balance = B - (n * amount) where n > 1."
 
 
 
 
 
134
  }
135
  ],
136
-
137
  "events": [
138
  {
139
  "name": "Transfer",
 
1
  {
2
  "contract_name": "ExampleContract",
3
  "file_name": "ExampleContract.sol",
 
4
  "metadata": {
5
  "license": "MIT",
6
  "solidity_version": "0.8.0",
7
  "description": "Example contract demonstrating the template structure",
8
  "author": "Example Author"
9
  },
 
10
  "state_variables": [
11
  {
12
  "name": "owner",
 
23
  "description": "User token balances"
24
  }
25
  ],
 
26
  "functions": [
27
  {
28
  "name": "transfer",
 
45
  ],
46
  "returns": "bool - true on success",
47
  "output_property": "Decreases caller's balance by amount, increases recipient's balance by amount. Emits Transfer event. Reverts if recipient is zero address or caller has insufficient balance.",
48
+ "events": [
49
+ "Transfer"
50
+ ],
51
  "vulnerable": false,
52
  "vulnerability_details": null,
53
  "property": null,
 
78
  "mitigation": "Use checks-effects-interactions pattern: update balance before external call"
79
  },
80
  "property": "When a user withdraws x amount of ETH, the user's balance should decrease by x. Due to reentrancy, an attacker can call withdraw recursively before balance is updated, draining more than their balance.",
81
+ "property_specification": {
82
+ "Pre-condition": "User balance = B",
83
+ "Operation": "withdraw(amount)",
84
+ "Expected": "User balance = B - amount",
85
+ "Actual": "Reentrant calls allow user balance = B - (n * amount) where n > 1"
86
+ }
87
  }
88
  ],
 
89
  "structs": [
90
  {
91
  "name": "MintLocalVars",
 
93
  "description": "Local variables used in mint function to avoid stack too deep errors"
94
  }
95
  ],
 
96
  "modifiers": [
97
  {
98
  "name": "onlyOwner",
 
105
  "purpose": "Prevents reentrancy attacks by using a mutex lock"
106
  }
107
  ],
 
108
  "inheritance": [
109
  "ERC20",
110
  "Ownable"
111
  ],
 
112
  "call_graph": {
113
  "constructor": [
114
  "ERC20.constructor()"
 
120
  "msg.sender.call()"
121
  ]
122
  },
 
123
  "audit_issues": [
124
  {
125
  "function": "withdraw",
 
129
  "status": "Fixed",
130
  "mitigation": "Moved balance update before external call (checks-effects-interactions pattern)",
131
  "property": "When a user withdraws x amount, the user's balance should decrease by x. Due to reentrancy, an attacker can withdraw multiple times before balance updates, draining more than their balance.",
132
+ "property_specification": {
133
+ "Pre-condition": "User balance = B",
134
+ "Operation": "withdraw(amount)",
135
+ "Expected": "User balance = B - amount",
136
+ "Actual": "Reentrant calls allow user balance = B - (n * amount) where n > 1"
137
+ }
138
  }
139
  ],
 
140
  "events": [
141
  {
142
  "name": "Transfer",
data/data_loader.py CHANGED
@@ -163,15 +163,14 @@ def get_all_task3_entries(
163
  contracts: List[Dict[str, Any]],
164
  ) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
165
  """
166
- Returns (contract, function) pairs where function has a task3 field
167
- with a non-empty property_english. These are the episode pool for Task 3.
168
  """
 
169
  entries = []
170
- for contract in contracts:
171
- for fn in contract.get("functions", []):
172
- t3 = fn.get("task3", {})
173
- if t3.get("property_english"):
174
- entries.append((contract, fn))
175
  return entries
176
 
177
 
 
163
  contracts: List[Dict[str, Any]],
164
  ) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
165
  """
166
+ Returns (contract, function) pairs where function is vulnerable
167
+ and has a property field.
168
  """
169
+ vulnerable_entries = get_all_vulnerable_entries(contracts)
170
  entries = []
171
+ for contract, fn in vulnerable_entries:
172
+ if fn.get("property", None) is not None:
173
+ entries.append((contract, fn))
 
 
174
  return entries
175
 
176
 
demo.py CHANGED
@@ -363,7 +363,7 @@ def run_auto_demo_t2(seed: int = 42, delay: float = 0.9):
363
 
364
  DEMO_SCRIPTS_T3 = {
365
  42: [
366
- (ActionType.GET_FORMALIZED_PROPERTY, {},
367
  "Read the formal spec first β€” cheapest action at -0.03."),
368
  (ActionType.LIST_FUNCTIONS, {},
369
  "List all functions to survey candidates."),
@@ -373,7 +373,7 @@ DEMO_SCRIPTS_T3 = {
373
  "Confident. emergencyDrain violates the access-control property."),
374
  ],
375
  45: [
376
- (ActionType.GET_FORMALIZED_PROPERTY, {},
377
  "Formal spec: first caller at valid price should win."),
378
  (ActionType.LIST_FUNCTIONS, {},
379
  "Auction contract β€” bid() immediately looks suspicious."),
 
363
 
364
  DEMO_SCRIPTS_T3 = {
365
  42: [
366
+ (ActionType.GET_PROPERTY_SPECIFICATION, {},
367
  "Read the formal spec first β€” cheapest action at -0.03."),
368
  (ActionType.LIST_FUNCTIONS, {},
369
  "List all functions to survey candidates."),
 
373
  "Confident. emergencyDrain violates the access-control property."),
374
  ],
375
  45: [
376
+ (ActionType.GET_PROPERTY_SPECIFICATION, {},
377
  "Formal spec: first caller at valid price should win."),
378
  (ActionType.LIST_FUNCTIONS, {},
379
  "Auction contract β€” bid() immediately looks suspicious."),
env/schemas.py CHANGED
@@ -42,7 +42,7 @@ class ActionType(str, Enum):
42
  SUBMIT_PROPERTY = "submit_property" # scored 0–5, one attempt
43
 
44
  # ── Task 3 – Rule Checker ────────────────────────────────────────────────
45
- GET_FORMALIZED_PROPERTY = "get_formalized_property" # -0.03
46
  GET_FUNCTION_METADATA = "get_function_metadata" # -0.05
47
  SUBMIT_FUNCTION = "submit_function" # +5.0 / +1.5 / -1.5, one attempt
48
 
 
42
  SUBMIT_PROPERTY = "submit_property" # scored 0–5, one attempt
43
 
44
  # ── Task 3 – Rule Checker ────────────────────────────────────────────────
45
+ GET_PROPERTY_SPECIFICATION = "get_property_specification" # -0.03
46
  GET_FUNCTION_METADATA = "get_function_metadata" # -0.05
47
  SUBMIT_FUNCTION = "submit_function" # +5.0 / +1.5 / -1.5, one attempt
48
 
eval.py CHANGED
@@ -151,7 +151,7 @@ def oracle_t3(env: Task3Environment, seed: int, verbose: bool = False) -> Dict[s
151
  if verbose:
152
  prop = obs.extra.get("property_english", "")[:60]
153
  print(f" {contract}.{fn_name}() \"{prop}\"")
154
- env.step(Action(action_type=ActionType.GET_FORMALIZED_PROPERTY))
155
  env.step(Action(action_type=ActionType.LIST_FUNCTIONS))
156
  result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
157
  params={"function_name": fn_name}))
 
151
  if verbose:
152
  prop = obs.extra.get("property_english", "")[:60]
153
  print(f" {contract}.{fn_name}() \"{prop}\"")
154
+ env.step(Action(action_type=ActionType.GET_PROPERTY_SPECIFICATION))
155
  env.step(Action(action_type=ActionType.LIST_FUNCTIONS))
156
  result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION,
157
  params={"function_name": fn_name}))
openenv.yaml CHANGED
@@ -73,7 +73,7 @@ action_space:
73
  get_function_natspec: {params: {}, reward: -0.08}
74
  get_file_natspec: {params: {}, reward: -0.03}
75
  get_related_functions: {params: {}, reward: -0.06}
76
- get_io: {params: {}, reward: -0.04}
77
  get_similar_rule: {params: {}, reward: -0.20}
78
  submit_property: {params: {property: string}, reward: "0.0-5.0 keyword-weighted, one attempt"}
79
  task3:
@@ -82,7 +82,7 @@ action_space:
82
  get_function_code: {params: {function_name: string}, reward: -0.10}
83
  get_state_variable: {params: {variable_name: "string opt"}, reward: -0.05}
84
  get_call_graph: {params: {}, reward: -0.08}
85
- get_formalized_property: {params: {}, reward: -0.03}
86
  submit_function: {params: {function_name: string}, reward: "+5.0 / +1.5 / -1.5, one attempt"}
87
 
88
  reward:
 
73
  get_function_natspec: {params: {}, reward: -0.08}
74
  get_file_natspec: {params: {}, reward: -0.03}
75
  get_related_functions: {params: {}, reward: -0.06}
76
+ get_signature: {params: {}, reward: -0.04}
77
  get_similar_rule: {params: {}, reward: -0.20}
78
  submit_property: {params: {property: string}, reward: "0.0-5.0 keyword-weighted, one attempt"}
79
  task3:
 
82
  get_function_code: {params: {function_name: string}, reward: -0.10}
83
  get_state_variable: {params: {variable_name: "string opt"}, reward: -0.05}
84
  get_call_graph: {params: {}, reward: -0.08}
85
+ get_property_specification: {params: {}, reward: -0.03}
86
  submit_function: {params: {function_name: string}, reward: "+5.0 / +1.5 / -1.5, one attempt"}
87
 
88
  reward:
tasks/task3/actions.py CHANGED
@@ -1,5 +1,8 @@
1
- """Task 3: Identify the function that violates a specified property."""
 
 
2
 
 
3
  from typing import Any, Dict, Tuple
4
  from data.data_loader import (
5
  get_function_by_name,
@@ -105,17 +108,20 @@ def get_call_graph(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
105
  Reward(value=-0.08, reason="get_call_graph cost"),
106
  )
107
 
108
- # TODO: Need to change this, property_formal doesn't exists
109
- def get_formalized_property(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
110
- """Handle GET_FORMALIZED_PROPERTY action."""
111
  if ctx._is_repeated(qkey):
112
  return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
113
- formal = ctx._target_fn.get("task3", {}).get("property_formal", "")
114
- if not formal:
115
- formal = "No formal specification available for this property."
 
 
 
 
116
  return (
117
- f"Formal property:\n{formal}",
118
- Reward(value=-0.03, reason="get_formalized_property cost"),
119
  )
120
 
121
 
 
1
+ """
2
+ Task 3: Identify the function that violates a specified property.
3
+ """
4
 
5
+ import json
6
  from typing import Any, Dict, Tuple
7
  from data.data_loader import (
8
  get_function_by_name,
 
108
  Reward(value=-0.08, reason="get_call_graph cost"),
109
  )
110
 
111
+ def get_property_specification(ctx: Any, qkey: str, params: Dict) -> Tuple[str, Reward]:
112
+ """Handle GET_PROPERTY_SPECIFICATION action."""
 
113
  if ctx._is_repeated(qkey):
114
  return "Repeated query.", Reward(value=-0.40, reason="Repeated query")
115
+
116
+ rule = ctx._target_fn.get("property_specification", {})
117
+ if not rule:
118
+ rule = "No rule specification available for this property."
119
+
120
+ rule_parsed = json.dumps(rule) if isinstance(rule, dict) else rule
121
+
122
  return (
123
+ f"Formal property:\n{rule_parsed}",
124
+ Reward(value=-0.03, reason="get_property_specification cost"),
125
  )
126
 
127
 
tasks/task3/environment.py CHANGED
@@ -22,7 +22,7 @@ Actions & rewards
22
  get_function_code -0.10 full Solidity source of any function
23
  get_state_variables -0.05 list or inspect state variables
24
  get_call_graph -0.08 function call graph
25
- get_formalized_property -0.03 formal pre/post-condition version of property
26
  submit_function terminal: +5.0 / +1.5 / -1.5 (ONE attempt)
27
  repeated_query -0.40
28
 
@@ -59,7 +59,7 @@ AVAILABLE_ACTIONS = [
59
  ActionType.GET_FUNCTION_CODE,
60
  ActionType.GET_STATE_VARIABLE,
61
  ActionType.GET_CALL_GRAPH,
62
- ActionType.GET_FORMALIZED_PROPERTY,
63
  ActionType.SUBMIT_FUNCTION,
64
  ]
65
 
@@ -113,7 +113,7 @@ class Task3Environment(BaseEnv):
113
  f"Find the function in this contract that violates the property above.\n"
114
  f"Use list_functions then get_function_code to investigate.\n"
115
  f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
116
- f"ONE submission allowed."
117
  ),
118
  )
119
  return ResetResult(observation=obs, info={"task_id": TASK_ID})
@@ -190,13 +190,13 @@ class Task3Environment(BaseEnv):
190
 
191
  # Mapping from ActionType to handler function
192
  handlers = {
193
- ActionType.LIST_FUNCTIONS: actions.list_functions,
194
- ActionType.GET_FUNCTION_METADATA: actions.get_function_metadata,
195
- ActionType.GET_FUNCTION_CODE: actions.get_function_code,
196
- ActionType.GET_STATE_VARIABLE: actions.get_state_variable,
197
- ActionType.GET_CALL_GRAPH: actions.get_call_graph,
198
- ActionType.GET_FORMALIZED_PROPERTY: actions.get_formalized_property,
199
- ActionType.SUBMIT_FUNCTION: actions.submit_function,
200
  }
201
 
202
  handler = handlers.get(at)
 
22
  get_function_code -0.10 full Solidity source of any function
23
  get_state_variables -0.05 list or inspect state variables
24
  get_call_graph -0.08 function call graph
25
+ get_property_specification -0.03 formal pre/post-condition version of property
26
  submit_function terminal: +5.0 / +1.5 / -1.5 (ONE attempt)
27
  repeated_query -0.40
28
 
 
59
  ActionType.GET_FUNCTION_CODE,
60
  ActionType.GET_STATE_VARIABLE,
61
  ActionType.GET_CALL_GRAPH,
62
+ ActionType.GET_PROPERTY_SPECIFICATION,
63
  ActionType.SUBMIT_FUNCTION,
64
  ]
65
 
 
113
  f"Find the function in this contract that violates the property above.\n"
114
  f"Use list_functions then get_function_code to investigate.\n"
115
  f"Submit with submit_function, params={{\"function_name\": \"...\"}}.\n"
116
+ f"Only ONE submission allowed."
117
  ),
118
  )
119
  return ResetResult(observation=obs, info={"task_id": TASK_ID})
 
190
 
191
  # Mapping from ActionType to handler function
192
  handlers = {
193
+ ActionType.LIST_FUNCTIONS: actions.list_functions,
194
+ ActionType.GET_FUNCTION_METADATA: actions.get_function_metadata,
195
+ ActionType.GET_FUNCTION_CODE: actions.get_function_code,
196
+ ActionType.GET_STATE_VARIABLE: actions.get_state_variable,
197
+ ActionType.GET_CALL_GRAPH: actions.get_call_graph,
198
+ ActionType.GET_PROPERTY_SPECIFICATION: actions.get_property_specification,
199
+ ActionType.SUBMIT_FUNCTION: actions.submit_function,
200
  }
201
 
202
  handler = handlers.get(at)
utils/prompts.py CHANGED
@@ -83,7 +83,7 @@ Your task is to find the ONE function that violates this property.
83
 
84
  ## Actions (respond with JSON only, ONE action per turn):
85
  {"action": "list_functions", "params": {}}
86
- {"action": "get_formalized_property", "params": {}}
87
  {"action": "get_function_metadata", "params": {"function_name": "<n>"}}
88
  {"action": "get_function_code", "params": {"function_name": "<n>"}}
89
  {"action": "get_state_variable", "params": {"variable_name": "<n>"}}
@@ -93,7 +93,7 @@ Your task is to find the ONE function that violates this property.
93
  ## Strategy:
94
  1. Read the property shown as property_english in the observation.
95
  2. list_functions to survey candidates.
96
- 3. get_formalized_property for the precise pre/post-condition (cheap: -0.03).
97
  4. get_function_code on the 1-2 most suspicious functions.
98
  5. submit_function when confident β€” ONE attempt only.
99
 
 
83
 
84
  ## Actions (respond with JSON only, ONE action per turn):
85
  {"action": "list_functions", "params": {}}
86
+ {"action": "get_property_specification", "params": {}}
87
  {"action": "get_function_metadata", "params": {"function_name": "<n>"}}
88
  {"action": "get_function_code", "params": {"function_name": "<n>"}}
89
  {"action": "get_state_variable", "params": {"variable_name": "<n>"}}
 
93
  ## Strategy:
94
  1. Read the property shown as property_english in the observation.
95
  2. list_functions to survey candidates.
96
+ 3. get_property_specification for the precise pre/post-condition (cheap: -0.03).
97
  4. get_function_code on the 1-2 most suspicious functions.
98
  5. submit_function when confident β€” ONE attempt only.
99
 
validate.py CHANGED
@@ -42,7 +42,7 @@ def check_pydantic_models():
42
  from env.schemas import Observation, Action, ActionType, Reward, StepResult, ResetResult
43
  obs = Observation(task_id="t", contract_name="C", contract_description="D", available_actions=[])
44
  for at in [ActionType.LIST_FUNCTIONS, ActionType.SUBMIT_PROPERTY,
45
- ActionType.GET_FORMALIZED_PROPERTY, ActionType.SUBMIT_FUNCTION]:
46
  Action(action_type=at)
47
  Reward(value=-1.5, reason="test")
48
  StepResult(observation=obs, reward=Reward(value=0, reason=""), done=False)
@@ -90,7 +90,7 @@ def check_t3_env():
90
  assert "property_english" in r.observation.extra
91
  prop = r.observation.extra["property_english"]
92
  assert len(prop) > 10, "property_english too short"
93
- for at in [ActionType.LIST_FUNCTIONS, ActionType.GET_FORMALIZED_PROPERTY,
94
  ActionType.GET_CALL_GRAPH, ActionType.GET_STATE_VARIABLE]:
95
  s = env.step(Action(action_type=at))
96
  assert s.reward.value < 0, f"{at.value} should have negative shaping reward"
@@ -100,7 +100,7 @@ def check_t3_action_costs():
100
  from env.schemas import Action, ActionType
101
  env = Task3Environment(); env.reset(seed=42)
102
  costs = {
103
- ActionType.GET_FORMALIZED_PROPERTY: -0.03,
104
  ActionType.LIST_FUNCTIONS: -0.05,
105
  ActionType.GET_CALL_GRAPH: -0.08,
106
  }
@@ -201,7 +201,7 @@ def check_reward_shaping():
201
  env = Task3Environment(); env.reset(seed=1)
202
  rewards = {env.step(Action(action_type=at)).reward.value
203
  for at in [ActionType.LIST_FUNCTIONS,
204
- ActionType.GET_FORMALIZED_PROPERTY,
205
  ActionType.GET_CALL_GRAPH]}
206
  assert len(rewards) >= 2
207
 
 
42
  from env.schemas import Observation, Action, ActionType, Reward, StepResult, ResetResult
43
  obs = Observation(task_id="t", contract_name="C", contract_description="D", available_actions=[])
44
  for at in [ActionType.LIST_FUNCTIONS, ActionType.SUBMIT_PROPERTY,
45
+ ActionType.GET_PROPERTY_SPECIFICATION, ActionType.SUBMIT_FUNCTION]:
46
  Action(action_type=at)
47
  Reward(value=-1.5, reason="test")
48
  StepResult(observation=obs, reward=Reward(value=0, reason=""), done=False)
 
90
  assert "property_english" in r.observation.extra
91
  prop = r.observation.extra["property_english"]
92
  assert len(prop) > 10, "property_english too short"
93
+ for at in [ActionType.LIST_FUNCTIONS, ActionType.GET_PROPERTY_SPECIFICATION,
94
  ActionType.GET_CALL_GRAPH, ActionType.GET_STATE_VARIABLE]:
95
  s = env.step(Action(action_type=at))
96
  assert s.reward.value < 0, f"{at.value} should have negative shaping reward"
 
100
  from env.schemas import Action, ActionType
101
  env = Task3Environment(); env.reset(seed=42)
102
  costs = {
103
+ ActionType.GET_PROPERTY_SPECIFICATION: -0.03,
104
  ActionType.LIST_FUNCTIONS: -0.05,
105
  ActionType.GET_CALL_GRAPH: -0.08,
106
  }
 
201
  env = Task3Environment(); env.reset(seed=1)
202
  rewards = {env.step(Action(action_type=at)).reward.value
203
  for at in [ActionType.LIST_FUNCTIONS,
204
+ ActionType.GET_PROPERTY_SPECIFICATION,
205
  ActionType.GET_CALL_GRAPH]}
206
  assert len(rewards) >= 2
207