SmartContractAudit / openenv.yaml
ajaxwin
refactor: Update task configurations and grading logic for improved scoring and consistency
dccaaac
name: smart-contract-audit-env
version: "1.2.0"
description: >
Reinforcement learning environment for smart contract security analysis.
Agents interact with real-world Solidity contract data from Certora-audited
projects, practising three real audit tasks: vulnerability detection,
property discovery, and rule checking.
author: "Codex47"
license: MIT
tasks:
- id: task1_vuln_detection
name: Targeted Vulnerability Detection
difficulty: medium
status: active
description: >
Given a Solidity contract (4-6 functions), identify the single vulnerable
function and describe its vulnerability type in 2-3 words.
max_steps: 40
reward_range: [0, 1]
grader: tasks/task1/grader.py
grader_score_range: [0, 1]
- id: task2_property_discovery
name: Property Discovery
difficulty: hard
status: active
description: >
Given a single Solidity function with known properties, discover the
correct natural-language postcondition describing its correct behaviour.
max_steps: 30
reward_range: [0, 1]
grader: tasks/task2/grader.py
grader_score_range: [0.0, 1.0]
- id: task3_rule_checker
name: Rule Checker
difficulty: easy
status: active
description: >
Given a natural-language property and a Solidity contract, identify the
function that violates that property. Partial credit for internal subfunctions.
max_steps: 20
reward_range: [0, 1]
grader: tasks/task3/grader.py
grader_score_range: [0.0, 1.0]
observation_space:
type: object
properties:
task_id: {type: string}
contract_name: {type: string}
contract_description: {type: string}
available_actions: {type: array, items: {type: string}}
last_action: {type: string, nullable: true}
last_action_result: {type: string, nullable: true}
step_count: {type: integer}
cumulative_reward: {type: number}
done: {type: boolean}
extra: {type: object}
action_space:
# General actions applicable across all tasks
general:
unknown: {reward: 0.0} # UNKNOWN action cost
repeated: {reward: -0.22} # REPEATED action cost
resubmit: {reward: 0.0} # RESUBMIT action cost
task1:
list_functions:
params: {}
reward: -0.04
get_function_code:
params: {function_name: string}
reward: -0.14
get_function_summary:
params: {function_name: string}
reward: -0.07
get_file_metadata:
params: {}
reward: -0.02
get_state_variable:
params: {variable_name: "string opt"}
reward: -0.06
get_call_graph:
params: {}
reward: -0.08
submit:
params: {function_name: string, vulnerability_type: string}
reward: 0.0 # terminal reward handled by grader
task2:
get_function_code:
params: {}
reward: -0.14
get_function_natspec:
params: {}
reward: -0.08
get_file_natspec:
params: {}
reward: 0.05
get_related_functions:
params: {}
reward: 0.07
get_signature:
params: {}
reward: 0.04
get_similar_rule:
params: {}
reward: 0.15
submit_property:
params: {property: string}
reward: 0.0 # terminal reward handled by grader
task3:
list_functions:
params: {}
reward: -0.04
get_function_metadata:
params: {function_name: string}
reward: 0.04
get_function_code:
params: {function_name: string}
reward: -0.14
get_state_variable:
params: {variable_name: "string opt"}
reward: -0.06
get_call_graph:
params: {}
reward: -0.08
get_property_specification: # replaces get_formalized_property
params: {}
reward: 0.02
submit_function:
params: {function_name: string}
reward: 0.0 # terminal reward handled by grader
reward:
type: shaped
all_tasks_shared:
repeated_query: -0.40
task1_shaping:
get_function_code_correct: +0.05
get_function_code_wrong: -0.10
get_function_summary_correct: +0.03
get_function_summary_wrong: -0.05
task1_terminal:
range: [0.0, 1.0]
task2_terminal:
range: [0.0, 1.0]
task3_terminal:
range: [0.0, 1.0]
data:
source: "Certora audited DeFi projects"
format: JSON
num_contracts: 4
num_vulnerable_functions: 8
num_property_functions: 11
num_task3_episodes: 8
interface:
http:
reset: "POST /reset"
step: "POST /step"
state: "GET /state"
tasks: "GET /tasks"
health: "GET /health"
action_space: "GET /action_space?task_id=<id>"
observation_space: "GET /observation_space"
python:
reset: "env.reset(seed=None) -> ResetResult"
step: "env.step(action) -> StepResult"
state: "env.state() -> StateResult"