Spaces:

Chirag0123
/

codebase-nav-env

Sleeping

v2.0 — agent reliability & evaluation layer

a5c1fa0 12 days ago

1.47 kB

	name: codebase-nav-env
	version: "1.0.0"
	description: >
	An RL environment where an LLM agent navigates an unfamiliar Python codebase,
	finds bugs, and implements features by reading files and running tests.
	Graded by actual pytest execution — fully deterministic.

	author: your-hf-username
	license: MIT

	tasks:
	- id: task1
	name: "Single-file bug repair"
	description: "Find and fix bugs in a Python module so all tests pass."
	difficulty: easy
	max_steps: 20
	reward_range: [0.0, 1.0]

	- id: task2
	name: "Cross-module interface bug"
	description: "Fix a type mismatch between two modules and add a regression test."
	difficulty: medium
	max_steps: 25
	reward_range: [0.0, 1.0]

	- id: task3
	name: "Feature implementation from spec"
	description: "Read FEATURE_SPEC.md and implement the feature across multiple files."
	difficulty: hard
	max_steps: 30
	reward_range: [0.0, 1.0]

	action_space:
	type: text
	schema:
	action_type: string
	path: string (optional)
	content: string (optional)
	query: string (optional)

	observation_space:
	type: structured
	fields:
	- repo_tree: list of file paths
	- task_description: string
	- failing_tests: list of test names
	- files_read: list of paths read so far
	- last_action_result: string
	- steps_remaining: integer
	- current_task: string

	endpoints:
	reset: POST /reset
	step: POST /step
	state: GET /state
	health: GET /health