Spaces:

Codex47
/

SmartContractAudit

Running

ajaxwin

refactor: Update task configurations and grading logic for improved scoring and consistency

dccaaac 5 days ago

4.91 kB

	name: smart-contract-audit-env
	version: "1.2.0"
	description: >
	Reinforcement learning environment for smart contract security analysis.
	Agents interact with real-world Solidity contract data from Certora-audited
	projects, practising three real audit tasks: vulnerability detection,
	property discovery, and rule checking.
	author: "Codex47"
	license: MIT

	tasks:
	- id: task1_vuln_detection
	name: Targeted Vulnerability Detection
	difficulty: medium
	status: active
	description: >
	Given a Solidity contract (4-6 functions), identify the single vulnerable
	function and describe its vulnerability type in 2-3 words.
	max_steps: 40
	reward_range: [0, 1]
	grader: tasks/task1/grader.py
	grader_score_range: [0, 1]

	- id: task2_property_discovery
	name: Property Discovery
	difficulty: hard
	status: active
	description: >
	Given a single Solidity function with known properties, discover the
	correct natural-language postcondition describing its correct behaviour.
	max_steps: 30
	reward_range: [0, 1]
	grader: tasks/task2/grader.py
	grader_score_range: [0.0, 1.0]

	- id: task3_rule_checker
	name: Rule Checker
	difficulty: easy
	status: active
	description: >
	Given a natural-language property and a Solidity contract, identify the
	function that violates that property. Partial credit for internal subfunctions.
	max_steps: 20
	reward_range: [0, 1]
	grader: tasks/task3/grader.py
	grader_score_range: [0.0, 1.0]

	observation_space:
	type: object
	properties:
	task_id: {type: string}
	contract_name: {type: string}
	contract_description: {type: string}
	available_actions: {type: array, items: {type: string}}
	last_action: {type: string, nullable: true}
	last_action_result: {type: string, nullable: true}
	step_count: {type: integer}
	cumulative_reward: {type: number}
	done: {type: boolean}
	extra: {type: object}

	action_space:
	# General actions applicable across all tasks
	general:
	unknown: {reward: 0.0} # UNKNOWN action cost
	repeated: {reward: -0.22} # REPEATED action cost
	resubmit: {reward: 0.0} # RESUBMIT action cost

	task1:
	list_functions:
	params: {}
	reward: -0.04
	get_function_code:
	params: {function_name: string}
	reward: -0.14
	get_function_summary:
	params: {function_name: string}
	reward: -0.07
	get_file_metadata:
	params: {}
	reward: -0.02
	get_state_variable:
	params: {variable_name: "string opt"}
	reward: -0.06
	get_call_graph:
	params: {}
	reward: -0.08
	submit:
	params: {function_name: string, vulnerability_type: string}
	reward: 0.0 # terminal reward handled by grader

	task2:
	get_function_code:
	params: {}
	reward: -0.14
	get_function_natspec:
	params: {}
	reward: -0.08
	get_file_natspec:
	params: {}
	reward: 0.05
	get_related_functions:
	params: {}
	reward: 0.07
	get_signature:
	params: {}
	reward: 0.04
	get_similar_rule:
	params: {}
	reward: 0.15
	submit_property:
	params: {property: string}
	reward: 0.0 # terminal reward handled by grader

	task3:
	list_functions:
	params: {}
	reward: -0.04
	get_function_metadata:
	params: {function_name: string}
	reward: 0.04
	get_function_code:
	params: {function_name: string}
	reward: -0.14
	get_state_variable:
	params: {variable_name: "string opt"}
	reward: -0.06
	get_call_graph:
	params: {}
	reward: -0.08
	get_property_specification: # replaces get_formalized_property
	params: {}
	reward: 0.02
	submit_function:
	params: {function_name: string}
	reward: 0.0 # terminal reward handled by grader

	reward:
	type: shaped
	all_tasks_shared:
	repeated_query: -0.40
	task1_shaping:
	get_function_code_correct: +0.05
	get_function_code_wrong: -0.10
	get_function_summary_correct: +0.03
	get_function_summary_wrong: -0.05
	task1_terminal:
	range: [0.0, 1.0]
	task2_terminal:
	range: [0.0, 1.0]
	task3_terminal:
	range: [0.0, 1.0]

	data:
	source: "Certora audited DeFi projects"
	format: JSON
	num_contracts: 4
	num_vulnerable_functions: 8
	num_property_functions: 11
	num_task3_episodes: 8

	interface:
	http:
	reset: "POST /reset"
	step: "POST /step"
	state: "GET /state"
	tasks: "GET /tasks"
	health: "GET /health"
	action_space: "GET /action_space?task_id=<id>"
	observation_space: "GET /observation_space"
	python:
	reset: "env.reset(seed=None) -> ResetResult"
	step: "env.step(action) -> StepResult"
	state: "env.state() -> StateResult"