swebench-ind / app.py
YUS200619's picture
feat: Complete Dockerless migration - update environment, rewards, app, and server wrapper
83ea4bd
"""
app.py — Gradio HF Space entry point for SWEbench-IN.
Provides a web UI for interacting with the SWEbench-IN environment:
- Reset to any of the 5 tasks
- Take individual actions (run_command, read_file, write_file, etc.)
- View environment state, rewards, and step results
"""
import gradio as gr
from environment import SWEbenchINEnvironment
env = SWEbenchINEnvironment()
def run_episode(task_id: int, action_type: str, action_args: str):
"""Single step interface for Gradio demo."""
# Parse action
action = {"type": action_type, "args": action_args}
obs, reward, done, info = env.step(action)
state = env.state()
# Format reward breakdown
breakdown = info.get("reward_breakdown", {})
breakdown_str = "\n".join(
f" {k}: {v:.3f}" for k, v in breakdown.items()
)
# Extract text from dict observation
obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs
return (
f"Observation:\n{obs_text}\n\n"
f"Reward: {reward:.3f}\n"
f"Done: {done}\n"
f"Step: {info.get('step_count', '?')}/{info.get('max_steps', '?')}\n"
f"Tests passing: {state.tests_passing_ratio:.0%}\n"
f"Server running: {state.server_running}\n\n"
f"Reward Breakdown:\n{breakdown_str}"
)
def reset_env(task_id: int):
"""Reset environment to a specific task."""
obs = env.reset(task_id=int(task_id))
# Extract text from dict observation
obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs
return f"Episode reset. Task {int(task_id)} loaded.\n\nInitial observation:\n{obs_text}"
with gr.Blocks(title="SWEbench-IN") as demo:
gr.Markdown("# SWEbench-IN — Indian SWE Linux Agent")
gr.Markdown(
"An RL environment that trains an LLM to fix broken Linux systems "
"while managing stakeholder communication simultaneously."
)
with gr.Row():
task_selector = gr.Slider(1, 5, value=1, step=1, label="Task ID")
reset_btn = gr.Button("Reset Environment")
reset_output = gr.Textbox(label="Environment State", lines=10)
reset_btn.click(reset_env, inputs=[task_selector], outputs=[reset_output])
gr.Markdown("## Take an Action")
with gr.Row():
action_type = gr.Dropdown(
choices=["run_command", "read_file", "write_file", "run_tests",
"check_server", "reply_slack", "reply_email", "reply_hr",
"close_case"],
label="Action Type"
)
action_args = gr.Textbox(label="Action Args (command, path, content, etc.)")
step_btn = gr.Button("Step")
step_output = gr.Textbox(label="Step Result", lines=12)
step_btn.click(run_episode,
inputs=[task_selector, action_type, action_args],
outputs=[step_output])
demo.launch(server_name="0.0.0.0", server_port=7860)