Spaces:

YUS200619
/

swebench-ind

Sleeping

File size: 2,888 Bytes

fdce872
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83ea4bd
 
 
fdce872
83ea4bd
fdce872
 
 
 
 
 
 
 
 
 
 
 
83ea4bd
 
 
fdce872

"""
app.py — Gradio HF Space entry point for SWEbench-IN.

Provides a web UI for interacting with the SWEbench-IN environment:
- Reset to any of the 5 tasks
- Take individual actions (run_command, read_file, write_file, etc.)
- View environment state, rewards, and step results
"""

import gradio as gr
from environment import SWEbenchINEnvironment

env = SWEbenchINEnvironment()


def run_episode(task_id: int, action_type: str, action_args: str):
    """Single step interface for Gradio demo."""
    # Parse action
    action = {"type": action_type, "args": action_args}
    obs, reward, done, info = env.step(action)
    state = env.state()

    # Format reward breakdown
    breakdown = info.get("reward_breakdown", {})
    breakdown_str = "\n".join(
        f"  {k}: {v:.3f}" for k, v in breakdown.items()
    )

    # Extract text from dict observation
    obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs

    return (
        f"Observation:\n{obs_text}\n\n"
        f"Reward: {reward:.3f}\n"
        f"Done: {done}\n"
        f"Step: {info.get('step_count', '?')}/{info.get('max_steps', '?')}\n"
        f"Tests passing: {state.tests_passing_ratio:.0%}\n"
        f"Server running: {state.server_running}\n\n"
        f"Reward Breakdown:\n{breakdown_str}"
    )


def reset_env(task_id: int):
    """Reset environment to a specific task."""
    obs = env.reset(task_id=int(task_id))
    # Extract text from dict observation
    obs_text = obs.get("text", str(obs)) if isinstance(obs, dict) else obs
    return f"Episode reset. Task {int(task_id)} loaded.\n\nInitial observation:\n{obs_text}"


with gr.Blocks(title="SWEbench-IN") as demo:
    gr.Markdown("# SWEbench-IN — Indian SWE Linux Agent")
    gr.Markdown(
        "An RL environment that trains an LLM to fix broken Linux systems "
        "while managing stakeholder communication simultaneously."
    )

    with gr.Row():
        task_selector = gr.Slider(1, 5, value=1, step=1, label="Task ID")
        reset_btn = gr.Button("Reset Environment")
    reset_output = gr.Textbox(label="Environment State", lines=10)
    reset_btn.click(reset_env, inputs=[task_selector], outputs=[reset_output])

    gr.Markdown("## Take an Action")
    with gr.Row():
        action_type = gr.Dropdown(
            choices=["run_command", "read_file", "write_file", "run_tests",
                     "check_server", "reply_slack", "reply_email", "reply_hr",
                     "close_case"],
            label="Action Type"
        )
        action_args = gr.Textbox(label="Action Args (command, path, content, etc.)")
    step_btn = gr.Button("Step")
    step_output = gr.Textbox(label="Step Result", lines=12)
    step_btn.click(run_episode,
                   inputs=[task_selector, action_type, action_args],
                   outputs=[step_output])

demo.launch(server_name="0.0.0.0", server_port=7860)