Spaces:

YUS200619
/

swebench-ind

Sleeping

App Files Files Community

swebench-ind / server /swebench_in_environment.py

YUS200619

feat: Complete Dockerless migration - update environment, rewards, app, and server wrapper

83ea4bd 13 days ago

raw

history blame contribute delete

7.18 kB

	"""
	SWEbench-IN Environment Implementation for OpenEnv server.

	Wraps the SWEbench-IN environment logic into the OpenEnv
	Environment interface (reset/step/state).

	Dockerless: No container management, uses local temp directories.
	"""

	from uuid import uuid4
	import random
	import os

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	from models import SWEbenchINAction, SWEbenchINObservation
	from tasks import TASKS
	from simulator import Simulator
	from rewards import compute_reward

	from dataclasses import dataclass, field


	@dataclass
	class EnvState:
	"""Internal environment state tracking."""
	task_id: int = 0
	step_count: int = 0
	tests_passing_ratio: float = 0.0
	server_running: bool = False
	files_correct: bool = False
	action_history: list = field(default_factory=list)
	reply_texts: list = field(default_factory=list)


	class SWEbenchINEnvironment(Environment):
	"""
	OpenEnv-compliant SWEbench-IN environment (Dockerless).

	Trains an LLM agent to fix broken Linux systems while managing
	stakeholder communication simultaneously. Uses local temp directories
	instead of Docker containers.
	"""

	SUPPORTS_CONCURRENT_SESSIONS: bool = True

	def __init__(self):
	"""Initialize the SWEbench-IN environment."""
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._env_state = EnvState()
	self._simulator = Simulator()
	self._current_task = None
	self._max_steps = 15
	self._done = False

	def reset(self) -> SWEbenchINObservation:
	"""Reset the environment to a new episode."""
	# Sample a random task
	task_id = random.choice(list(TASKS.keys()))

	self._current_task = TASKS[task_id]
	self._done = False
	self._max_steps = self._current_task.max_actions
	self._state = State(episode_id=str(uuid4()), step_count=0)
	self._env_state = EnvState(task_id=task_id)
	self._simulator.setup_task(task_id)

	obs_text = self._simulator.get_initial_observation(task_id)

	return SWEbenchINObservation(
	text=obs_text,
	reward=0.0,
	done=False,
	step_count=0,
	max_steps=self._max_steps,
	tests_passing_ratio=0.0,
	server_running=False,
	)

	def step(self, action: SWEbenchINAction) -> SWEbenchINObservation:
	"""Execute a step in the environment."""
	if self._done:
	return SWEbenchINObservation(
	text="Episode is done. Call reset() to start a new episode.",
	reward=0.0,
	done=True,
	step_count=self._state.step_count,
	max_steps=self._max_steps,
	)

	action_type = action.type
	action_args = action.args

	# Record state before
	state_before = EnvState(
	task_id=self._env_state.task_id,
	step_count=self._env_state.step_count,
	tests_passing_ratio=self._env_state.tests_passing_ratio,
	server_running=self._env_state.server_running,
	files_correct=self._env_state.files_correct,
	)

	# Dispatch action
	obs_text = self._dispatch_action(action_type, action_args)

	# Update state
	self._env_state.action_history.append(f"{action_type}: {action_args}")
	self._env_state.step_count += 1
	self._state.step_count += 1
	self._update_measurements()

	# Check done
	if action_type == "close_case" or self._env_state.step_count >= self._max_steps:
	self._done = True

	# Compute reward
	reward_breakdown = compute_reward(
	container_id=None,
	action_history=self._env_state.action_history,
	state_before=state_before,
	state_after=self._env_state,
	output_dir=self._simulator.output_dir,
	task_id=self._env_state.task_id,
	work_dir=self._simulator.work_dir,
	)

	return SWEbenchINObservation(
	text=obs_text,
	reward=reward_breakdown.total,
	done=self._done,
	step_count=self._env_state.step_count,
	max_steps=self._max_steps,
	tests_passing_ratio=self._env_state.tests_passing_ratio,
	server_running=self._env_state.server_running,
	reward_breakdown={
	"technical": reward_breakdown.technical,
	"boundaries": reward_breakdown.boundaries,
	"communication": reward_breakdown.communication,
	"leave_protection": reward_breakdown.leave_protection,
	"shaping": reward_breakdown.shaping,
	},
	)

	def state(self) -> State:
	"""Get the current environment state."""
	return self._state

	# --- Internal helpers ---

	VALID_ACTIONS = {
	"run_command", "read_file", "write_file", "run_tests",
	"check_server", "reply_slack", "reply_email", "reply_hr", "close_case",
	}

	def _dispatch_action(self, action_type: str, action_args: str) -> str:
	"""Dispatch an action to the simulator."""
	if action_type not in self.VALID_ACTIONS:
	return f"ERROR: Unknown action '{action_type}'"

	if action_type == "run_command":
	return self._simulator.run_bash(action_args)
	elif action_type == "read_file":
	return self._simulator.read_file(action_args)
	elif action_type == "write_file":
	if "\|" in action_args:
	path, content = action_args.split("\|", 1)
	return self._simulator.write_file(path.strip(), content)
	return "ERROR: write_file args must be 'path\|content'"
	elif action_type == "run_tests":
	result = self._simulator.run_pytest()
	return f"Passed: {result['passed']}, Failed: {result['failed']}, Ratio: {result['ratio']:.0%}\n{result['output']}"
	elif action_type == "check_server":
	result = self._simulator.curl_server()
	return f"Status: {result['status_code']}, Success: {result['success']}"
	elif action_type in ("reply_slack", "reply_email", "reply_hr"):
	recipient = action_type.replace("reply_", "").upper()
	self._env_state.reply_texts.append(f"[{recipient}]: {action_args}")
	return self._simulator.write_reply(recipient, action_args)
	elif action_type == "close_case":
	return "Case closed. Episode ending."
	return "ERROR: dispatch failed"

	def _update_measurements(self):
	"""Update state measurements from live environment."""
	server_result = self._simulator.curl_server()
	self._env_state.server_running = server_result["success"]

	test_result = self._simulator.run_pytest()
	self._env_state.tests_passing_ratio = test_result["ratio"]

	reply_path = os.path.join(self._simulator.output_dir, "reply.txt")
	self._env_state.files_correct = (
	os.path.exists(reply_path) and os.path.getsize(reply_path) > 0
	)