Spaces:
Sleeping
Sleeping
| """ | |
| simulator.py — Dockerless simulator for SWEbench-IN. | |
| Replaces all Docker container operations with: | |
| - A per-episode temp directory (virtual filesystem) | |
| - Local subprocess execution (sandboxed to work_dir) | |
| - In-process pytest via subprocess | |
| - Local Flask server started as a child process | |
| - requests to localhost for server health checks | |
| """ | |
| import ast | |
| import os | |
| import re | |
| import sys | |
| import time | |
| import shutil | |
| import socket | |
| import tempfile | |
| import subprocess | |
| import threading | |
| import requests as http_requests | |
| from tasks import TASKS | |
| class Simulator: | |
| """Dockerless executor for the SWEbench-IN environment.""" | |
| def __init__(self, container_id: str = None): | |
| # container_id kept for API compatibility — ignored | |
| self.work_dir: str = None | |
| self.output_dir: str = None | |
| self.reply_log: list[str] = [] | |
| self._server_proc: subprocess.Popen = None | |
| self._zombie_sock: socket.socket = None | |
| self._server_port: int = 8080 | |
| # ------------------------------------------------------------------ | |
| # Task setup / reset | |
| # ------------------------------------------------------------------ | |
| def setup_task(self, task_id: int) -> str: | |
| """Reset to a fresh temp directory with the broken task files.""" | |
| self._kill_server() | |
| self._kill_zombie() | |
| # Fresh working directory each episode | |
| if self.work_dir and os.path.exists(self.work_dir): | |
| shutil.rmtree(self.work_dir, ignore_errors=True) | |
| self.work_dir = tempfile.mkdtemp(prefix=f"swebench_task{task_id}_") | |
| self.output_dir = os.path.join(self.work_dir, "output") | |
| self.reply_log = [] | |
| self._make_dirs() | |
| task = TASKS[task_id] | |
| # Write broken source files | |
| self._write(os.path.join(self.work_dir, "app.py"), task.broken_app_code) | |
| if task.broken_app_code_2: | |
| self._write(os.path.join(self.work_dir, "utils.py"), task.broken_app_code_2) | |
| # Write tests | |
| self._write( | |
| os.path.join(self.work_dir, "tests", "test_app.py"), | |
| task.test_code, | |
| ) | |
| # Write message files | |
| for fname, content in [ | |
| ("slack.txt", task.slack_message), | |
| ("email.txt", task.email_message), | |
| ("hr.txt", task.hr_message), | |
| ]: | |
| self._write( | |
| os.path.join(self.work_dir, "messages", fname), | |
| content or "", | |
| ) | |
| # Error log | |
| self._write( | |
| os.path.join(self.work_dir, "logs", "error.log"), | |
| f"Task {task_id}: {task.description}", | |
| ) | |
| # Task-specific breakage | |
| if task_id in (4, 5): | |
| # Simulate zombie process blocking port 8080 | |
| self._start_zombie() | |
| return f"Task {task_id} ready in {self.work_dir}" | |
| def get_initial_observation(self, task_id: int) -> str: | |
| task = TASKS[task_id] | |
| parts = [] | |
| log_path = os.path.join(self.work_dir, "logs", "error.log") | |
| if os.path.exists(log_path): | |
| parts.append(f"=== ERROR LOG ===\n{open(log_path).read()}") | |
| if task.slack_message: | |
| parts.append(f"=== SLACK MESSAGE (from Manager) ===\n{task.slack_message}") | |
| if task.email_message: | |
| parts.append(f"=== EMAIL (from Client) ===\n{task.email_message}") | |
| if task.hr_message: | |
| parts.append(f"=== HR MESSAGE ===\n{task.hr_message}") | |
| parts.append(f"\n--- Task: {task.name} ---") | |
| parts.append(f"Description: {task.description}") | |
| parts.append(f"Max actions: {task.max_actions}") | |
| return "\n\n".join(parts) | |
| # ------------------------------------------------------------------ | |
| # Action handlers | |
| # ------------------------------------------------------------------ | |
| def run_bash(self, command: str) -> str: | |
| """Execute a shell command inside work_dir (no Docker).""" | |
| blocked = ["sudo", "rm -rf /", "chmod 777 /"] | |
| for pattern in blocked: | |
| if pattern in command: | |
| return f"BLOCKED: '{pattern}' is forbidden." | |
| # pip install flask — simulate as no-op (flask is available on HF Spaces) | |
| if re.search(r"pip\s+install\s+flask", command): | |
| return "Requirement already satisfied: flask" | |
| # Kill zombie process (tasks 4 & 5) | |
| if any(k in command for k in ["pkill", "fuser -k", "kill"]): | |
| self._kill_zombie() | |
| return "OK: Port 8080 cleared." | |
| # Start Flask server | |
| if re.search(r"python.*app\.py", command) or "flask run" in command: | |
| return self._start_server() | |
| # General command — run locally in work_dir | |
| try: | |
| result = subprocess.run( | |
| command, | |
| shell=True, | |
| cwd=self.work_dir, | |
| capture_output=True, | |
| text=True, | |
| timeout=10, | |
| env={**os.environ, "PYTHONPATH": self.work_dir}, | |
| ) | |
| output = (result.stdout + result.stderr).strip() | |
| return output or "(no output)" | |
| except subprocess.TimeoutExpired: | |
| return "ERROR: Command timed out after 10 seconds." | |
| except Exception as e: | |
| return f"ERROR: {e}" | |
| def read_file(self, path: str) -> str: | |
| """Read a file from work_dir. Accepts /home/user2/... or relative paths.""" | |
| full = self._resolve(path) | |
| try: | |
| return open(full).read() | |
| except FileNotFoundError: | |
| return f"ERROR: File not found: {path}" | |
| except Exception as e: | |
| return f"ERROR: {e}" | |
| def write_file(self, path: str, content: str) -> str: | |
| """Write content to a file in work_dir.""" | |
| full = self._resolve(path) | |
| os.makedirs(os.path.dirname(full), exist_ok=True) | |
| try: | |
| self._write(full, content) | |
| return f"OK: Written to {path}" | |
| except Exception as e: | |
| return f"ERROR: {e}" | |
| def run_pytest(self) -> dict: | |
| """Run pytest in work_dir and return pass/fail counts.""" | |
| try: | |
| result = subprocess.run( | |
| [sys.executable, "-m", "pytest", "tests/", "--tb=short", "-q"], | |
| cwd=self.work_dir, | |
| capture_output=True, | |
| text=True, | |
| timeout=30, | |
| env={**os.environ, "PYTHONPATH": self.work_dir}, | |
| ) | |
| output = result.stdout + result.stderr | |
| passed = int(m.group(1)) if (m := re.search(r"(\d+) passed", output)) else 0 | |
| failed = int(m.group(1)) if (m := re.search(r"(\d+) failed", output)) else 0 | |
| errors = int(m.group(1)) if (m := re.search(r"(\d+) error", output)) else 0 | |
| total = passed + failed + errors | |
| return { | |
| "passed": passed, | |
| "failed": failed + errors, | |
| "ratio": passed / total if total > 0 else 0.0, | |
| "output": output, | |
| } | |
| except subprocess.TimeoutExpired: | |
| return {"passed": 0, "failed": 0, "ratio": 0.0, "output": "ERROR: pytest timed out."} | |
| except Exception as e: | |
| return {"passed": 0, "failed": 0, "ratio": 0.0, "output": f"ERROR: {e}"} | |
| def curl_server(self) -> dict: | |
| """Check if the Flask server is up at localhost:8080.""" | |
| try: | |
| r = http_requests.get( | |
| f"http://localhost:{self._server_port}", timeout=3 | |
| ) | |
| return {"status_code": r.status_code, "success": r.status_code == 200} | |
| except Exception: | |
| return {"status_code": 0, "success": False} | |
| def write_reply(self, recipient: str, content: str) -> str: | |
| """Append a reply to output/reply.txt.""" | |
| formatted = f"[{recipient.upper()}]: {content}\n" | |
| reply_path = os.path.join(self.output_dir, "reply.txt") | |
| os.makedirs(self.output_dir, exist_ok=True) | |
| try: | |
| with open(reply_path, "a") as f: | |
| f.write(formatted) | |
| self.reply_log.append(formatted) | |
| return f"OK: Reply sent to {recipient.upper()}" | |
| except Exception as e: | |
| return f"ERROR: {e}" | |
| # ------------------------------------------------------------------ | |
| # Internal helpers | |
| # ------------------------------------------------------------------ | |
| def _make_dirs(self): | |
| for sub in ("tests", "logs", "messages", "output"): | |
| os.makedirs(os.path.join(self.work_dir, sub), exist_ok=True) | |
| def _write(path: str, content: str): | |
| os.makedirs(os.path.dirname(path), exist_ok=True) | |
| with open(path, "w") as f: | |
| f.write(content) | |
| def _resolve(self, path: str) -> str: | |
| """Translate /home/user2/... or bare relative path to work_dir path.""" | |
| norm = path.replace("/home/user2/", "").lstrip("/") | |
| return os.path.join(self.work_dir, norm) | |
| def _start_server(self) -> str: | |
| """Launch app.py as a child process on port 8080.""" | |
| self._kill_server() | |
| app_path = os.path.join(self.work_dir, "app.py") | |
| if not os.path.exists(app_path): | |
| return "ERROR: app.py not found." | |
| # Syntax check before launching | |
| try: | |
| ast.parse(open(app_path).read()) | |
| except SyntaxError as e: | |
| return f"ERROR: Syntax error in app.py — {e}" | |
| # Check if zombie is blocking the port | |
| if self._port_in_use(self._server_port): | |
| return ( | |
| f"ERROR: Port {self._server_port} is already in use. " | |
| "Kill the blocking process first." | |
| ) | |
| try: | |
| self._server_proc = subprocess.Popen( | |
| [sys.executable, "app.py"], | |
| cwd=self.work_dir, | |
| stdout=subprocess.DEVNULL, | |
| stderr=subprocess.DEVNULL, | |
| env={**os.environ, "PYTHONPATH": self.work_dir}, | |
| ) | |
| except Exception as e: | |
| return f"ERROR: Could not start server — {e}" | |
| # Wait up to 4 s for server to accept connections | |
| for _ in range(8): | |
| time.sleep(0.5) | |
| if self._server_proc.poll() is not None: | |
| return "ERROR: Server crashed on startup." | |
| if not self._port_in_use(self._server_port): | |
| continue | |
| result = self.curl_server() | |
| if result["success"]: | |
| return "OK: Server started on port 8080." | |
| # Server started but hasn't responded yet — return optimistic message | |
| if self._server_proc.poll() is None: | |
| return "OK: Server process started (may need a moment to be ready)." | |
| return "ERROR: Server failed to start." | |
| def _start_zombie(self): | |
| """Block port 8080 with a socket to simulate a zombie process.""" | |
| try: | |
| self._zombie_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | |
| self._zombie_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) | |
| self._zombie_sock.bind(("0.0.0.0", self._server_port)) | |
| self._zombie_sock.listen(1) | |
| except OSError: | |
| self._zombie_sock = None # Port already in use — fine | |
| def _kill_zombie(self): | |
| if self._zombie_sock: | |
| try: | |
| self._zombie_sock.close() | |
| except Exception: | |
| pass | |
| self._zombie_sock = None | |
| time.sleep(0.3) # Brief pause for OS to release the port | |
| def _kill_server(self): | |
| if self._server_proc: | |
| try: | |
| self._server_proc.terminate() | |
| self._server_proc.wait(timeout=3) | |
| except Exception: | |
| try: | |
| self._server_proc.kill() | |
| except Exception: | |
| pass | |
| self._server_proc = None | |
| def _port_in_use(port: int) -> bool: | |
| with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: | |
| return s.connect_ex(("localhost", port)) == 0 |