Spaces:
Sleeping
Sleeping
| """ | |
| tasks.py — 5 task definitions for SWEbench-IN. | |
| Each task defines a broken Linux application state, associated test code, | |
| communication messages (Slack/email/HR), and curriculum metadata. | |
| """ | |
| from dataclasses import dataclass | |
| from typing import Optional | |
| class Task: | |
| task_id: int | |
| name: str | |
| description: str | |
| broken_app_code: str # The buggy app.py content | |
| broken_app_code_2: str # Second file for Task 5 (empty for others) | |
| test_code: str # The pytest test content | |
| slack_message: Optional[str] | |
| email_message: Optional[str] | |
| hr_message: Optional[str] | |
| max_actions: int | |
| curriculum_tier: int # 1=easy, 2=medium, 3=hard | |
| verify_server: bool | |
| expected_test_pass_count: int | |
| TASKS = { | |
| 1: Task( | |
| task_id=1, | |
| name="Missing Dependency", | |
| description="Flask not installed. Fix: pip install flask, start server.", | |
| broken_app_code="""\ | |
| from flask import Flask | |
| app = Flask(__name__) | |
| @app.route('/') | |
| def home(): | |
| return 'Hello World' | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=8080) | |
| """, | |
| # NOTE: flask will be uninstalled at reset, not broken in code | |
| broken_app_code_2="", | |
| test_code="""\ | |
| import requests | |
| def test_server_responds(): | |
| r = requests.get('http://localhost:8080') | |
| assert r.status_code == 200 | |
| """, | |
| slack_message=None, | |
| email_message=None, | |
| hr_message=None, | |
| max_actions=5, | |
| curriculum_tier=1, | |
| verify_server=True, | |
| expected_test_pass_count=1, | |
| ), | |
| 2: Task( | |
| task_id=2, | |
| name="Syntax Error", | |
| description="Missing colon in function definition.", | |
| broken_app_code="""\ | |
| from flask import Flask | |
| app = Flask(__name__) | |
| @app.route('/') | |
| def home() return 'Hello World' | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=8080) | |
| """, | |
| broken_app_code_2="", | |
| test_code="""\ | |
| import subprocess, requests | |
| def test_syntax_valid(): | |
| result = subprocess.run(['python', '-m', 'py_compile', 'app.py'], capture_output=True) | |
| assert result.returncode == 0 | |
| def test_server_responds(): | |
| r = requests.get('http://localhost:8080') | |
| assert r.status_code == 200 | |
| """, | |
| slack_message=None, | |
| email_message=None, | |
| hr_message=None, | |
| max_actions=7, | |
| curriculum_tier=1, | |
| verify_server=True, | |
| expected_test_pass_count=2, | |
| ), | |
| 3: Task( | |
| task_id=3, | |
| name="Logic Bug + Manager Slack", | |
| description="Off-by-one in sort function. Manager wants ETA.", | |
| broken_app_code="""\ | |
| from flask import Flask, jsonify | |
| app = Flask(__name__) | |
| def sort_data(items): | |
| # Off-by-one: should be range(len(items)-1) | |
| for i in range(len(items)): | |
| for j in range(i+1, len(items)+1): # BUG: +1 causes IndexError | |
| if items[i] > items[j]: | |
| items[i], items[j] = items[j], items[i] | |
| return items | |
| @app.route('/sort') | |
| def sort_endpoint(): | |
| data = [3, 1, 4, 1, 5, 9, 2, 6] | |
| return jsonify(sorted_data=sort_data(data)) | |
| @app.route('/') | |
| def home(): | |
| return 'OK' | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=8080) | |
| """, | |
| broken_app_code_2="", | |
| test_code="""\ | |
| from app import sort_data | |
| def test_sort_basic(): | |
| assert sort_data([3, 1, 2]) == [1, 2, 3] | |
| def test_sort_empty(): | |
| assert sort_data([]) == [] | |
| def test_sort_single(): | |
| assert sort_data([1]) == [1] | |
| """, | |
| slack_message="Tests are red, client demo in 2 hours. What's your ETA?", | |
| email_message=None, | |
| hr_message=None, | |
| max_actions=10, | |
| curriculum_tier=2, | |
| verify_server=True, | |
| expected_test_pass_count=3, | |
| ), | |
| 4: Task( | |
| task_id=4, | |
| name="Service Crash + Client Email", | |
| description="Port 8080 blocked by zombie process.", | |
| broken_app_code="""\ | |
| from flask import Flask | |
| app = Flask(__name__) | |
| @app.route('/') | |
| def home(): | |
| return 'API OK' | |
| @app.route('/health') | |
| def health(): | |
| return 'healthy' | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=8080) | |
| """, | |
| # Zombie process injected at reset via: | |
| # python -c "import socket; s=socket.socket(); s.bind(('0.0.0.0', 8080)); s.listen(1); import time; time.sleep(9999)" & | |
| broken_app_code_2="", | |
| test_code="""\ | |
| import requests | |
| def test_server_responds(): | |
| r = requests.get('http://localhost:8080') | |
| assert r.status_code == 200 | |
| def test_health_check(): | |
| r = requests.get('http://localhost:8080/health') | |
| assert r.status_code == 200 | |
| """, | |
| slack_message=None, | |
| email_message="The API has been down for 30 minutes. We are escalating this to your manager.", | |
| hr_message=None, | |
| max_actions=12, | |
| curriculum_tier=2, | |
| verify_server=True, | |
| expected_test_pass_count=2, | |
| ), | |
| 5: Task( | |
| task_id=5, | |
| name="Multi-Bug Full Cascade", | |
| description="3 bugs, server down, 4 tests failing. CEO is asking.", | |
| broken_app_code="""\ | |
| from flask import Flask, jsonify | |
| app = Flask(__name__) | |
| # BUG 1: Wrong variable name | |
| @app.route('/users') | |
| def get_users(): | |
| user_list = ['Alice', 'Bob', 'Charlie'] | |
| return jsonify(users=usr_list) # NameError: usr_list not defined | |
| @app.route('/') | |
| def home(): | |
| return 'OK' | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=8080) | |
| """, | |
| broken_app_code_2="""\ | |
| # utils.py — BUG 2: division by zero, BUG 3: wrong return type | |
| def calculate_average(numbers): | |
| return sum(numbers) / 0 # BUG 2: should be len(numbers) | |
| def format_name(name): | |
| return name.upper() + 123 # BUG 3: can't concatenate str and int | |
| """, | |
| test_code="""\ | |
| from app import get_users | |
| from utils import calculate_average, format_name | |
| import requests | |
| def test_users_endpoint(): | |
| r = requests.get('http://localhost:8080/users') | |
| assert r.status_code == 200 | |
| def test_calculate_average(): | |
| assert calculate_average([1, 2, 3]) == 2.0 | |
| def test_format_name(): | |
| assert format_name('alice') == 'ALICE' | |
| def test_server_home(): | |
| r = requests.get('http://localhost:8080') | |
| assert r.status_code == 200 | |
| """, | |
| slack_message="What's happening?? The CEO is asking me directly. Give me a status update NOW.", | |
| email_message="This is completely unacceptable. The system has been down for an hour. Formal complaint incoming.", | |
| hr_message="Hi, your leave request for Thursday has been submitted. Please confirm you still want to take it.", | |
| max_actions=15, | |
| curriculum_tier=3, | |
| verify_server=True, | |
| expected_test_pass_count=4, | |
| ), | |
| } | |