""" Task definitions: Easy → Medium → Hard Each task has: code snippet, context, ground truth bugs, and grading rubric. """ TASKS = { "easy_syntax": { "task_id": "easy_syntax", "task_name": "Python Syntax Error Detection", "difficulty": "easy", "language": "python", "max_steps": 5, "context": ( "This Python function is supposed to calculate a discounted price. " "It should raise a ValueError if discount exceeds 100%, otherwise " "return the price after applying the discount. Find and fix any errors." ), "code_snippet": """\ def calculate_discount(price, discount_percent): if discount_percent > 100 raise ValueError("Discount cannot exceed 100%") discount = price * (discount_percent / 100) return price - discount result = calculate_discount(200, 15) print(result) """, "ground_truth": { "issue_type": "syntax_error", "bug_line": 2, "keywords": ["colon", "syntax", "if statement", "missing :", ":"], "fix_keywords": ["if discount_percent > 100:"], "description": "Missing colon at end of if statement on line 2", }, }, "medium_logic": { "task_id": "medium_logic", "task_name": "Logic Bug: Off-by-One in Palindrome Check", "difficulty": "medium", "language": "python", "max_steps": 8, "context": ( "This function checks whether a given string is a palindrome " "(reads the same forwards and backwards, ignoring spaces and case). " "It passes some basic tests but fails on others. Find the logic bug and fix it." ), "code_snippet": """\ def is_palindrome(s: str) -> bool: s = s.lower().replace(" ", "") for i in range(len(s) // 2): if s[i] != s[-i]: # Compare from both ends return False return True # Expected: True for "racecar", "A man a plan a canal Panama" # Expected: False for "hello", "world" print(is_palindrome("racecar")) # Should be True print(is_palindrome("hello")) # Should be False print(is_palindrome("A man a plan a canal Panama")) # Should be True """, "ground_truth": { "issue_type": "logic_bug", "bug_line": 4, "keywords": [ "off-by-one", "index", "-i", "-(i+1)", "s[-i]", "s[0]", "zero", "first character", "always equal" ], "fix_keywords": ["s[-(i+1)]", "s[-i-1]", "-(i+1)"], "description": ( "s[-i] when i=0 evaluates to s[0] (the first character), " "so it always equals s[i] at i=0. Should be s[-(i+1)]." ), "test_cases": [ {"input": "racecar", "expected": True}, {"input": "hello", "expected": False}, {"input": "amanaplanacanalpanama", "expected": True}, {"input": "abba", "expected": True}, {"input": "abc", "expected": False}, ], }, }, "hard_security": { "task_id": "hard_security", "task_name": "Security Vulnerability: SQL Injection & Path Traversal", "difficulty": "hard", "language": "python", "max_steps": 10, "context": ( "This is a user authentication module for a web application. " "It handles login and serves user-uploaded profile documents. " "Perform a thorough security review — identify ALL vulnerabilities " "and provide a fixed, secure version of the code." ), "code_snippet": """\ import sqlite3 import os def authenticate_user(username: str, password: str) -> bool: \"\"\"Authenticate user against the database.\"\"\" conn = sqlite3.connect("users.db") cursor = conn.cursor() # Build query with user input directly query = ( "SELECT * FROM users WHERE username='" + username + "' AND password='" + password + "'" ) cursor.execute(query) result = cursor.fetchone() conn.close() return result is not None def get_user_document(username: str, filename: str) -> str: \"\"\"Return contents of a user's uploaded document.\"\"\" base_dir = "/app/user_docs" filepath = os.path.join(base_dir, username, filename) with open(filepath, "r") as f: return f.read() def hash_password(password: str) -> str: \"\"\"Hash password before storage.\"\"\" import hashlib return hashlib.md5(password.encode()).hexdigest() """, "ground_truth": { "vulnerabilities": [ { "issue_type": "security_vulnerability", "name": "SQL Injection", "keywords": [ "sql injection", "injection", "parameterized", "prepared statement", "user input", "string concatenation", "sanitize", "placeholder", "?" ], "fix_keywords": [ "?", "parameterized", "cursor.execute(query, (username", "execute(query, " ], "severity": "critical", }, { "issue_type": "security_vulnerability", "name": "Path Traversal", "keywords": [ "path traversal", "directory traversal", "../", "os.path.abspath", "startswith", "realpath", "sanitize", "filename", "escape" ], "fix_keywords": [ "abspath", "realpath", "startswith", "normpath" ], "severity": "high", }, { "issue_type": "security_vulnerability", "name": "Weak Password Hashing (MD5)", "keywords": [ "md5", "weak", "hash", "bcrypt", "argon2", "sha256", "pbkdf2", "salt", "password hashing" ], "fix_keywords": [ "bcrypt", "argon2", "pbkdf2", "hashlib.sha256", "passlib", "werkzeug" ], "severity": "high", }, ], }, }, } def get_task(task_id: str) -> dict: if task_id not in TASKS: raise ValueError(f"Unknown task: {task_id}. Available: {list(TASKS.keys())}") return TASKS[task_id] def list_tasks() -> list: return [ { "task_id": t["task_id"], "task_name": t["task_name"], "difficulty": t["difficulty"], "language": t["language"], "max_steps": t["max_steps"], } for t in TASKS.values() ]