""" app.py — Gradio HF Space entry point for SWEbench-IN. Provides a web UI for interacting with the SWEbench-IN environment: - Reset to any of the 5 tasks - Take individual actions (run_command, read_file, write_file, etc.) - View environment state, rewards, and step results """ import gradio as gr from environment import SWEbenchINEnvironment env = SWEbenchINEnvironment() def run_episode(task_id: int, action_type: str, action_args: str): """Single step interface for Gradio demo.""" # Parse action action = {"type": action_type, "args": action_args} obs, reward, done, info = env.step(action) state = env.state() # Format reward breakdown breakdown = info.get("reward_breakdown", {}) breakdown_str = "\n".join( f" {k}: {v:.3f}" for k, v in breakdown.items() ) # Extract text from dict observation obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs return ( f"Observation:\n{obs_text}\n\n" f"Reward: {reward:.3f}\n" f"Done: {done}\n" f"Step: {info.get('step_count', '?')}/{info.get('max_steps', '?')}\n" f"Tests passing: {state.tests_passing_ratio:.0%}\n" f"Server running: {state.server_running}\n\n" f"Reward Breakdown:\n{breakdown_str}" ) def reset_env(task_id: int): """Reset environment to a specific task.""" obs = env.reset(task_id=int(task_id)) # Extract text from dict observation obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs return f"Episode reset. Task {int(task_id)} loaded.\n\nInitial observation:\n{obs_text}" with gr.Blocks(title="SWEbench-IN") as demo: gr.Markdown("# SWEbench-IN — Indian SWE Linux Agent") gr.Markdown( "An RL environment that trains an LLM to fix broken Linux systems " "while managing stakeholder communication simultaneously." ) with gr.Row(): task_selector = gr.Slider(1, 5, value=1, step=1, label="Task ID") reset_btn = gr.Button("Reset Environment") reset_output = gr.Textbox(label="Environment State", lines=10) reset_btn.click(reset_env, inputs=[task_selector], outputs=[reset_output]) gr.Markdown("## Take an Action") with gr.Row(): action_type = gr.Dropdown( choices=["run_command", "read_file", "write_file", "run_tests", "check_server", "reply_slack", "reply_email", "reply_hr", "close_case"], label="Action Type" ) action_args = gr.Textbox(label="Action Args (command, path, content, etc.)") step_btn = gr.Button("Step") step_output = gr.Textbox(label="Step Result", lines=12) step_btn.click(run_episode, inputs=[task_selector, action_type, action_args], outputs=[step_output]) demo.launch(server_name="0.0.0.0", server_port=7860)