Spaces:

SimranShaikh
/

code-review-env

Sleeping

App Files Files Community

code-review-env / environment /tasks.py

SimranShaikh

commit

220b4a7 verified 6 days ago

raw

history blame contribute delete

7 kB

	"""
	Task definitions: Easy → Medium → Hard
	Each task has: code snippet, context, ground truth bugs, and grading rubric.
	"""

	TASKS = {
	"easy_syntax": {
	"task_id": "easy_syntax",
	"task_name": "Python Syntax Error Detection",
	"difficulty": "easy",
	"language": "python",
	"max_steps": 5,
	"context": (
	"This Python function is supposed to calculate a discounted price. "
	"It should raise a ValueError if discount exceeds 100%, otherwise "
	"return the price after applying the discount. Find and fix any errors."
	),
	"code_snippet": """\
	def calculate_discount(price, discount_percent):
	if discount_percent > 100
	raise ValueError("Discount cannot exceed 100%")
	discount = price * (discount_percent / 100)
	return price - discount

	result = calculate_discount(200, 15)
	print(result)
	""",
	"ground_truth": {
	"issue_type": "syntax_error",
	"bug_line": 2,
	"keywords": ["colon", "syntax", "if statement", "missing :", ":"],
	"fix_keywords": ["if discount_percent > 100:"],
	"description": "Missing colon at end of if statement on line 2",
	},
	},

	"medium_logic": {
	"task_id": "medium_logic",
	"task_name": "Logic Bug: Off-by-One in Palindrome Check",
	"difficulty": "medium",
	"language": "python",
	"max_steps": 8,
	"context": (
	"This function checks whether a given string is a palindrome "
	"(reads the same forwards and backwards, ignoring spaces and case). "
	"It passes some basic tests but fails on others. Find the logic bug and fix it."
	),
	"code_snippet": """\
	def is_palindrome(s: str) -> bool:
	s = s.lower().replace(" ", "")
	for i in range(len(s) // 2):
	if s[i] != s[-i]: # Compare from both ends
	return False
	return True

	# Expected: True for "racecar", "A man a plan a canal Panama"
	# Expected: False for "hello", "world"
	print(is_palindrome("racecar")) # Should be True
	print(is_palindrome("hello")) # Should be False
	print(is_palindrome("A man a plan a canal Panama")) # Should be True
	""",
	"ground_truth": {
	"issue_type": "logic_bug",
	"bug_line": 4,
	"keywords": [
	"off-by-one", "index", "-i", "-(i+1)", "s[-i]",
	"s[0]", "zero", "first character", "always equal"
	],
	"fix_keywords": ["s[-(i+1)]", "s[-i-1]", "-(i+1)"],
	"description": (
	"s[-i] when i=0 evaluates to s[0] (the first character), "
	"so it always equals s[i] at i=0. Should be s[-(i+1)]."
	),
	"test_cases": [
	{"input": "racecar", "expected": True},
	{"input": "hello", "expected": False},
	{"input": "amanaplanacanalpanama", "expected": True},
	{"input": "abba", "expected": True},
	{"input": "abc", "expected": False},
	],
	},
	},

	"hard_security": {
	"task_id": "hard_security",
	"task_name": "Security Vulnerability: SQL Injection & Path Traversal",
	"difficulty": "hard",
	"language": "python",
	"max_steps": 10,
	"context": (
	"This is a user authentication module for a web application. "
	"It handles login and serves user-uploaded profile documents. "
	"Perform a thorough security review — identify ALL vulnerabilities "
	"and provide a fixed, secure version of the code."
	),
	"code_snippet": """\
	import sqlite3
	import os


	def authenticate_user(username: str, password: str) -> bool:
	\"\"\"Authenticate user against the database.\"\"\"
	conn = sqlite3.connect("users.db")
	cursor = conn.cursor()
	# Build query with user input directly
	query = (
	"SELECT * FROM users WHERE username='"
	+ username
	+ "' AND password='"
	+ password
	+ "'"
	)
	cursor.execute(query)
	result = cursor.fetchone()
	conn.close()
	return result is not None


	def get_user_document(username: str, filename: str) -> str:
	\"\"\"Return contents of a user's uploaded document.\"\"\"
	base_dir = "/app/user_docs"
	filepath = os.path.join(base_dir, username, filename)
	with open(filepath, "r") as f:
	return f.read()


	def hash_password(password: str) -> str:
	\"\"\"Hash password before storage.\"\"\"
	import hashlib
	return hashlib.md5(password.encode()).hexdigest()
	""",
	"ground_truth": {
	"vulnerabilities": [
	{
	"issue_type": "security_vulnerability",
	"name": "SQL Injection",
	"keywords": [
	"sql injection", "injection", "parameterized",
	"prepared statement", "user input", "string concatenation",
	"sanitize", "placeholder", "?"
	],
	"fix_keywords": [
	"?", "parameterized", "cursor.execute(query, (username",
	"execute(query, "
	],
	"severity": "critical",
	},
	{
	"issue_type": "security_vulnerability",
	"name": "Path Traversal",
	"keywords": [
	"path traversal", "directory traversal", "../",
	"os.path.abspath", "startswith", "realpath",
	"sanitize", "filename", "escape"
	],
	"fix_keywords": [
	"abspath", "realpath", "startswith", "normpath"
	],
	"severity": "high",
	},
	{
	"issue_type": "security_vulnerability",
	"name": "Weak Password Hashing (MD5)",
	"keywords": [
	"md5", "weak", "hash", "bcrypt", "argon2",
	"sha256", "pbkdf2", "salt", "password hashing"
	],
	"fix_keywords": [
	"bcrypt", "argon2", "pbkdf2", "hashlib.sha256",
	"passlib", "werkzeug"
	],
	"severity": "high",
	},
	],
	},
	},
	}


	def get_task(task_id: str) -> dict:
	if task_id not in TASKS:
	raise ValueError(f"Unknown task: {task_id}. Available: {list(TASKS.keys())}")
	return TASKS[task_id]


	def list_tasks() -> list:
	return [
	{
	"task_id": t["task_id"],
	"task_name": t["task_name"],
	"difficulty": t["difficulty"],
	"language": t["language"],
	"max_steps": t["max_steps"],
	}
	for t in TASKS.values()
	]