Spaces:
Sleeping
Sleeping
| name: tool_use_env | |
| description: Evaluate AI agents on reliable tool usage under uncertainty | |
| version: 1.0 | |
| entrypoint: server.app:app | |
| actions: | |
| type: object | |
| properties: | |
| action_type: | |
| type: string | |
| enum: | |
| - use_calculator | |
| - use_search | |
| - answer_directly | |
| observations: | |
| type: object | |
| properties: | |
| query: | |
| type: string | |
| tool_output: | |
| type: string | |
| nullable: true | |
| message: | |
| type: string | |
| reward_range: [0.0, 1.0] | |
| metadata: | |
| difficulty_levels: | |
| - easy | |
| - medium | |
| - hard | |
| features: | |
| - tool_selection | |
| - partial_rewards | |
| - decision_making | |
| - efficiency_penalty |