name: smart-contract-audit-env version: "1.2.0" description: > Reinforcement learning environment for smart contract security analysis. Agents interact with real-world Solidity contract data from Certora-audited projects, practising three real audit tasks: vulnerability detection, property discovery, and rule checking. author: "Codex47" license: MIT tasks: - id: task1_vuln_detection name: Targeted Vulnerability Detection difficulty: medium status: active description: > Given a Solidity contract (4-6 functions), identify the single vulnerable function and describe its vulnerability type in 2-3 words. max_steps: 40 reward_range: [0, 1] grader: tasks/task1/grader.py grader_score_range: [0, 1] - id: task2_property_discovery name: Property Discovery difficulty: hard status: active description: > Given a single Solidity function with known properties, discover the correct natural-language postcondition describing its correct behaviour. max_steps: 30 reward_range: [0, 1] grader: tasks/task2/grader.py grader_score_range: [0.0, 1.0] - id: task3_rule_checker name: Rule Checker difficulty: easy status: active description: > Given a natural-language property and a Solidity contract, identify the function that violates that property. Partial credit for internal subfunctions. max_steps: 20 reward_range: [0, 1] grader: tasks/task3/grader.py grader_score_range: [0.0, 1.0] observation_space: type: object properties: task_id: {type: string} contract_name: {type: string} contract_description: {type: string} available_actions: {type: array, items: {type: string}} last_action: {type: string, nullable: true} last_action_result: {type: string, nullable: true} step_count: {type: integer} cumulative_reward: {type: number} done: {type: boolean} extra: {type: object} action_space: # General actions applicable across all tasks general: unknown: {reward: 0.0} # UNKNOWN action cost repeated: {reward: -0.22} # REPEATED action cost resubmit: {reward: 0.0} # RESUBMIT action cost task1: list_functions: params: {} reward: -0.04 get_function_code: params: {function_name: string} reward: -0.14 get_function_summary: params: {function_name: string} reward: -0.07 get_file_metadata: params: {} reward: -0.02 get_state_variable: params: {variable_name: "string opt"} reward: -0.06 get_call_graph: params: {} reward: -0.08 submit: params: {function_name: string, vulnerability_type: string} reward: 0.0 # terminal reward handled by grader task2: get_function_code: params: {} reward: -0.14 get_function_natspec: params: {} reward: -0.08 get_file_natspec: params: {} reward: 0.05 get_related_functions: params: {} reward: 0.07 get_signature: params: {} reward: 0.04 get_similar_rule: params: {} reward: 0.15 submit_property: params: {property: string} reward: 0.0 # terminal reward handled by grader task3: list_functions: params: {} reward: -0.04 get_function_metadata: params: {function_name: string} reward: 0.04 get_function_code: params: {function_name: string} reward: -0.14 get_state_variable: params: {variable_name: "string opt"} reward: -0.06 get_call_graph: params: {} reward: -0.08 get_property_specification: # replaces get_formalized_property params: {} reward: 0.02 submit_function: params: {function_name: string} reward: 0.0 # terminal reward handled by grader reward: type: shaped all_tasks_shared: repeated_query: -0.40 task1_shaping: get_function_code_correct: +0.05 get_function_code_wrong: -0.10 get_function_summary_correct: +0.03 get_function_summary_wrong: -0.05 task1_terminal: range: [0.0, 1.0] task2_terminal: range: [0.0, 1.0] task3_terminal: range: [0.0, 1.0] data: source: "Certora audited DeFi projects" format: JSON num_contracts: 4 num_vulnerable_functions: 8 num_property_functions: 11 num_task3_episodes: 8 interface: http: reset: "POST /reset" step: "POST /step" state: "GET /state" tasks: "GET /tasks" health: "GET /health" action_space: "GET /action_space?task_id=" observation_space: "GET /observation_space" python: reset: "env.reset(seed=None) -> ResetResult" step: "env.step(action) -> StepResult" state: "env.state() -> StateResult"