swebench-ind / tasks.py
YUS200619's picture
Upload folder using huggingface_hub
fdce872 verified
"""
tasks.py — 5 task definitions for SWEbench-IN.
Each task defines a broken Linux application state, associated test code,
communication messages (Slack/email/HR), and curriculum metadata.
"""
from dataclasses import dataclass
from typing import Optional
@dataclass
class Task:
task_id: int
name: str
description: str
broken_app_code: str # The buggy app.py content
broken_app_code_2: str # Second file for Task 5 (empty for others)
test_code: str # The pytest test content
slack_message: Optional[str]
email_message: Optional[str]
hr_message: Optional[str]
max_actions: int
curriculum_tier: int # 1=easy, 2=medium, 3=hard
verify_server: bool
expected_test_pass_count: int
TASKS = {
1: Task(
task_id=1,
name="Missing Dependency",
description="Flask not installed. Fix: pip install flask, start server.",
broken_app_code="""\
from flask import Flask
app = Flask(__name__)
@app.route('/')
def home():
return 'Hello World'
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)
""",
# NOTE: flask will be uninstalled at reset, not broken in code
broken_app_code_2="",
test_code="""\
import requests
def test_server_responds():
r = requests.get('http://localhost:8080')
assert r.status_code == 200
""",
slack_message=None,
email_message=None,
hr_message=None,
max_actions=5,
curriculum_tier=1,
verify_server=True,
expected_test_pass_count=1,
),
2: Task(
task_id=2,
name="Syntax Error",
description="Missing colon in function definition.",
broken_app_code="""\
from flask import Flask
app = Flask(__name__)
@app.route('/')
def home() return 'Hello World'
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)
""",
broken_app_code_2="",
test_code="""\
import subprocess, requests
def test_syntax_valid():
result = subprocess.run(['python', '-m', 'py_compile', 'app.py'], capture_output=True)
assert result.returncode == 0
def test_server_responds():
r = requests.get('http://localhost:8080')
assert r.status_code == 200
""",
slack_message=None,
email_message=None,
hr_message=None,
max_actions=7,
curriculum_tier=1,
verify_server=True,
expected_test_pass_count=2,
),
3: Task(
task_id=3,
name="Logic Bug + Manager Slack",
description="Off-by-one in sort function. Manager wants ETA.",
broken_app_code="""\
from flask import Flask, jsonify
app = Flask(__name__)
def sort_data(items):
# Off-by-one: should be range(len(items)-1)
for i in range(len(items)):
for j in range(i+1, len(items)+1): # BUG: +1 causes IndexError
if items[i] > items[j]:
items[i], items[j] = items[j], items[i]
return items
@app.route('/sort')
def sort_endpoint():
data = [3, 1, 4, 1, 5, 9, 2, 6]
return jsonify(sorted_data=sort_data(data))
@app.route('/')
def home():
return 'OK'
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)
""",
broken_app_code_2="",
test_code="""\
from app import sort_data
def test_sort_basic():
assert sort_data([3, 1, 2]) == [1, 2, 3]
def test_sort_empty():
assert sort_data([]) == []
def test_sort_single():
assert sort_data([1]) == [1]
""",
slack_message="Tests are red, client demo in 2 hours. What's your ETA?",
email_message=None,
hr_message=None,
max_actions=10,
curriculum_tier=2,
verify_server=True,
expected_test_pass_count=3,
),
4: Task(
task_id=4,
name="Service Crash + Client Email",
description="Port 8080 blocked by zombie process.",
broken_app_code="""\
from flask import Flask
app = Flask(__name__)
@app.route('/')
def home():
return 'API OK'
@app.route('/health')
def health():
return 'healthy'
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)
""",
# Zombie process injected at reset via:
# python -c "import socket; s=socket.socket(); s.bind(('0.0.0.0', 8080)); s.listen(1); import time; time.sleep(9999)" &
broken_app_code_2="",
test_code="""\
import requests
def test_server_responds():
r = requests.get('http://localhost:8080')
assert r.status_code == 200
def test_health_check():
r = requests.get('http://localhost:8080/health')
assert r.status_code == 200
""",
slack_message=None,
email_message="The API has been down for 30 minutes. We are escalating this to your manager.",
hr_message=None,
max_actions=12,
curriculum_tier=2,
verify_server=True,
expected_test_pass_count=2,
),
5: Task(
task_id=5,
name="Multi-Bug Full Cascade",
description="3 bugs, server down, 4 tests failing. CEO is asking.",
broken_app_code="""\
from flask import Flask, jsonify
app = Flask(__name__)
# BUG 1: Wrong variable name
@app.route('/users')
def get_users():
user_list = ['Alice', 'Bob', 'Charlie']
return jsonify(users=usr_list) # NameError: usr_list not defined
@app.route('/')
def home():
return 'OK'
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)
""",
broken_app_code_2="""\
# utils.py — BUG 2: division by zero, BUG 3: wrong return type
def calculate_average(numbers):
return sum(numbers) / 0 # BUG 2: should be len(numbers)
def format_name(name):
return name.upper() + 123 # BUG 3: can't concatenate str and int
""",
test_code="""\
from app import get_users
from utils import calculate_average, format_name
import requests
def test_users_endpoint():
r = requests.get('http://localhost:8080/users')
assert r.status_code == 200
def test_calculate_average():
assert calculate_average([1, 2, 3]) == 2.0
def test_format_name():
assert format_name('alice') == 'ALICE'
def test_server_home():
r = requests.get('http://localhost:8080')
assert r.status_code == 200
""",
slack_message="What's happening?? The CEO is asking me directly. Give me a status update NOW.",
email_message="This is completely unacceptable. The system has been down for an hour. Formal complaint incoming.",
hr_message="Hi, your leave request for Thursday has been submitted. Please confirm you still want to take it.",
max_actions=15,
curriculum_tier=3,
verify_server=True,
expected_test_pass_count=4,
),
}