Spaces:

YUS200619
/

swebench-ind

Sleeping

App Files Files Community

swebench-ind / app.py

YUS200619

feat: Complete Dockerless migration - update environment, rewards, app, and server wrapper

83ea4bd 13 days ago

raw

history blame contribute delete

2.89 kB

	"""
	app.py — Gradio HF Space entry point for SWEbench-IN.

	Provides a web UI for interacting with the SWEbench-IN environment:
	- Reset to any of the 5 tasks
	- Take individual actions (run_command, read_file, write_file, etc.)
	- View environment state, rewards, and step results
	"""

	import gradio as gr
	from environment import SWEbenchINEnvironment

	env = SWEbenchINEnvironment()


	def run_episode(task_id: int, action_type: str, action_args: str):
	"""Single step interface for Gradio demo."""
	# Parse action
	action = {"type": action_type, "args": action_args}
	obs, reward, done, info = env.step(action)
	state = env.state()

	# Format reward breakdown
	breakdown = info.get("reward_breakdown", {})
	breakdown_str = "\n".join(
	f" {k}: {v:.3f}" for k, v in breakdown.items()
	)

	# Extract text from dict observation
	obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs

	return (
	f"Observation:\n{obs_text}\n\n"
	f"Reward: {reward:.3f}\n"
	f"Done: {done}\n"
	f"Step: {info.get('step_count', '?')}/{info.get('max_steps', '?')}\n"
	f"Tests passing: {state.tests_passing_ratio:.0%}\n"
	f"Server running: {state.server_running}\n\n"
	f"Reward Breakdown:\n{breakdown_str}"
	)


	def reset_env(task_id: int):
	"""Reset environment to a specific task."""
	obs = env.reset(task_id=int(task_id))
	# Extract text from dict observation
	obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs
	return f"Episode reset. Task {int(task_id)} loaded.\n\nInitial observation:\n{obs_text}"


	with gr.Blocks(title="SWEbench-IN") as demo:
	gr.Markdown("# SWEbench-IN — Indian SWE Linux Agent")
	gr.Markdown(
	"An RL environment that trains an LLM to fix broken Linux systems "
	"while managing stakeholder communication simultaneously."
	)

	with gr.Row():
	task_selector = gr.Slider(1, 5, value=1, step=1, label="Task ID")
	reset_btn = gr.Button("Reset Environment")
	reset_output = gr.Textbox(label="Environment State", lines=10)
	reset_btn.click(reset_env, inputs=[task_selector], outputs=[reset_output])

	gr.Markdown("## Take an Action")
	with gr.Row():
	action_type = gr.Dropdown(
	choices=["run_command", "read_file", "write_file", "run_tests",
	"check_server", "reply_slack", "reply_email", "reply_hr",
	"close_case"],
	label="Action Type"
	)
	action_args = gr.Textbox(label="Action Args (command, path, content, etc.)")
	step_btn = gr.Button("Step")
	step_output = gr.Textbox(label="Step Result", lines=12)
	step_btn.click(run_episode,
	inputs=[task_selector, action_type, action_args],
	outputs=[step_output])

	demo.launch(server_name="0.0.0.0", server_port=7860)