name: tool_use_env
description: Evaluate AI agents on reliable tool usage under uncertainty

version: 1.0

entrypoint: server.app:app

actions:
  type: object
  properties:
    action_type:
      type: string
      enum:
        - use_calculator
        - use_search
        - answer_directly

observations:
  type: object
  properties:
    query:
      type: string
    tool_output:
      type: string
      nullable: true
    message:
      type: string

reward_range: [0.0, 1.0]

metadata:
  difficulty_levels:
    - easy
    - medium
    - hard

  features:
    - tool_selection
    - partial_rewards
    - decision_making
    - efficiency_penalty