Spaces:
Sleeping
Sleeping
| """ | |
| app.py — Gradio HF Space entry point for SWEbench-IN. | |
| Provides a web UI for interacting with the SWEbench-IN environment: | |
| - Reset to any of the 5 tasks | |
| - Take individual actions (run_command, read_file, write_file, etc.) | |
| - View environment state, rewards, and step results | |
| """ | |
| import gradio as gr | |
| from environment import SWEbenchINEnvironment | |
| env = SWEbenchINEnvironment() | |
| def run_episode(task_id: int, action_type: str, action_args: str): | |
| """Single step interface for Gradio demo.""" | |
| # Parse action | |
| action = {"type": action_type, "args": action_args} | |
| obs, reward, done, info = env.step(action) | |
| state = env.state() | |
| # Format reward breakdown | |
| breakdown = info.get("reward_breakdown", {}) | |
| breakdown_str = "\n".join( | |
| f" {k}: {v:.3f}" for k, v in breakdown.items() | |
| ) | |
| # Extract text from dict observation | |
| obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs | |
| return ( | |
| f"Observation:\n{obs_text}\n\n" | |
| f"Reward: {reward:.3f}\n" | |
| f"Done: {done}\n" | |
| f"Step: {info.get('step_count', '?')}/{info.get('max_steps', '?')}\n" | |
| f"Tests passing: {state.tests_passing_ratio:.0%}\n" | |
| f"Server running: {state.server_running}\n\n" | |
| f"Reward Breakdown:\n{breakdown_str}" | |
| ) | |
| def reset_env(task_id: int): | |
| """Reset environment to a specific task.""" | |
| obs = env.reset(task_id=int(task_id)) | |
| # Extract text from dict observation | |
| obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs | |
| return f"Episode reset. Task {int(task_id)} loaded.\n\nInitial observation:\n{obs_text}" | |
| with gr.Blocks(title="SWEbench-IN") as demo: | |
| gr.Markdown("# SWEbench-IN — Indian SWE Linux Agent") | |
| gr.Markdown( | |
| "An RL environment that trains an LLM to fix broken Linux systems " | |
| "while managing stakeholder communication simultaneously." | |
| ) | |
| with gr.Row(): | |
| task_selector = gr.Slider(1, 5, value=1, step=1, label="Task ID") | |
| reset_btn = gr.Button("Reset Environment") | |
| reset_output = gr.Textbox(label="Environment State", lines=10) | |
| reset_btn.click(reset_env, inputs=[task_selector], outputs=[reset_output]) | |
| gr.Markdown("## Take an Action") | |
| with gr.Row(): | |
| action_type = gr.Dropdown( | |
| choices=["run_command", "read_file", "write_file", "run_tests", | |
| "check_server", "reply_slack", "reply_email", "reply_hr", | |
| "close_case"], | |
| label="Action Type" | |
| ) | |
| action_args = gr.Textbox(label="Action Args (command, path, content, etc.)") | |
| step_btn = gr.Button("Step") | |
| step_output = gr.Textbox(label="Step Result", lines=12) | |
| step_btn.click(run_episode, | |
| inputs=[task_selector, action_type, action_args], | |
| outputs=[step_output]) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |