Spaces:
Sleeping
Sleeping
File size: 651 Bytes
d9175ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | name: tool_use_env
description: Evaluate AI agents on reliable tool usage under uncertainty
version: 1.0
entrypoint: server.app:app
actions:
type: object
properties:
action_type:
type: string
enum:
- use_calculator
- use_search
- answer_directly
observations:
type: object
properties:
query:
type: string
tool_output:
type: string
nullable: true
message:
type: string
reward_range: [0.0, 1.0]
metadata:
difficulty_levels:
- easy
- medium
- hard
features:
- tool_selection
- partial_rewards
- decision_making
- efficiency_penalty |