Spaces:

Clove25
/

tool-use-openenv

Sleeping

Upload 41 files

d9175ae verified about 1 month ago

651 Bytes

	name: tool_use_env
	description: Evaluate AI agents on reliable tool usage under uncertainty

	version: 1.0

	entrypoint: server.app:app

	actions:
	type: object
	properties:
	action_type:
	type: string
	enum:
	- use_calculator
	- use_search
	- answer_directly

	observations:
	type: object
	properties:
	query:
	type: string
	tool_output:
	type: string
	nullable: true
	message:
	type: string

	reward_range: [0.0, 1.0]

	metadata:
	difficulty_levels:
	- easy
	- medium
	- hard

	features:
	- tool_selection
	- partial_rewards
	- decision_making
	- efficiency_penalty