name: smart-contract-audit-env
version: "1.2.0"
description: >
  Reinforcement learning environment for smart contract security analysis.
  Agents interact with real-world Solidity contract data from Certora-audited
  projects, practising three real audit tasks: vulnerability detection,
  property discovery, and rule checking.
author: "Codex47"
license: MIT

tasks:
  - id: task1_vuln_detection
    name: Targeted Vulnerability Detection
    difficulty: medium
    status: active
    description: >
      Given a Solidity contract (4-6 functions), identify the single vulnerable
      function and describe its vulnerability type in 2-3 words.
    max_steps: 40
    reward_range: [0, 1]
    grader: tasks/task1/grader.py
    grader_score_range: [0, 1]

  - id: task2_property_discovery
    name: Property Discovery
    difficulty: hard
    status: active
    description: >
      Given a single Solidity function with known properties, discover the
      correct natural-language postcondition describing its correct behaviour.
    max_steps: 30
    reward_range: [0, 1]
    grader: tasks/task2/grader.py
    grader_score_range: [0.0, 1.0]

  - id: task3_rule_checker
    name: Rule Checker
    difficulty: easy
    status: active
    description: >
      Given a natural-language property and a Solidity contract, identify the
      function that violates that property. Partial credit for internal subfunctions.
    max_steps: 20
    reward_range: [0, 1]
    grader: tasks/task3/grader.py
    grader_score_range: [0.0, 1.0]

observation_space:
  type: object
  properties:
    task_id:              {type: string}
    contract_name:        {type: string}
    contract_description: {type: string}
    available_actions:    {type: array, items: {type: string}}
    last_action:          {type: string, nullable: true}
    last_action_result:   {type: string, nullable: true}
    step_count:           {type: integer}
    cumulative_reward:    {type: number}
    done:                 {type: boolean}
    extra:                {type: object}

action_space:
  # General actions applicable across all tasks
  general:
    unknown:   {reward: 0.0}    # UNKNOWN action cost
    repeated:  {reward: -0.22}  # REPEATED action cost
    resubmit:  {reward: 0.0}    # RESUBMIT action cost

  task1:
    list_functions:
      params: {}
      reward: -0.04
    get_function_code:
      params: {function_name: string}
      reward: -0.14
    get_function_summary:
      params: {function_name: string}
      reward: -0.07
    get_file_metadata:
      params: {}
      reward: -0.02
    get_state_variable:
      params: {variable_name: "string opt"}
      reward: -0.06
    get_call_graph:
      params: {}
      reward: -0.08
    submit:
      params: {function_name: string, vulnerability_type: string}
      reward: 0.0   # terminal reward handled by grader

  task2:
    get_function_code:
      params: {}
      reward: -0.14
    get_function_natspec:
      params: {}
      reward: -0.08
    get_file_natspec:
      params: {}
      reward: 0.05
    get_related_functions:
      params: {}
      reward: 0.07
    get_signature:
      params: {}
      reward: 0.04
    get_similar_rule:
      params: {}
      reward: 0.15
    submit_property:
      params: {property: string}
      reward: 0.0   # terminal reward handled by grader

  task3:
    list_functions:
      params: {}
      reward: -0.04
    get_function_metadata:
      params: {function_name: string}
      reward: 0.04
    get_function_code:
      params: {function_name: string}
      reward: -0.14
    get_state_variable:
      params: {variable_name: "string opt"}
      reward: -0.06
    get_call_graph:
      params: {}
      reward: -0.08
    get_property_specification:   # replaces get_formalized_property
      params: {}
      reward: 0.02
    submit_function:
      params: {function_name: string}
      reward: 0.0   # terminal reward handled by grader

reward:
  type: shaped
  all_tasks_shared:
    repeated_query: -0.40
  task1_shaping:
    get_function_code_correct: +0.05
    get_function_code_wrong: -0.10
    get_function_summary_correct: +0.03
    get_function_summary_wrong: -0.05
  task1_terminal:
    range: [0.0, 1.0]
  task2_terminal:
    range: [0.0, 1.0]
  task3_terminal:
    range: [0.0, 1.0]

data:
  source: "Certora audited DeFi projects"
  format: JSON
  num_contracts: 4
  num_vulnerable_functions: 8
  num_property_functions: 11
  num_task3_episodes: 8

interface:
  http:
    reset:             "POST /reset"
    step:              "POST /step"
    state:             "GET /state"
    tasks:             "GET /tasks"
    health:            "GET /health"
    action_space:      "GET /action_space?task_id=<id>"
    observation_space: "GET /observation_space"
  python:
    reset: "env.reset(seed=None) -> ResetResult"
    step:  "env.step(action)     -> StepResult"
    state: "env.state()          -> StateResult"