Spaces:
Build error
Build error
| """Deterministic task bank for Python code review and repair benchmark.""" | |
| from __future__ import annotations | |
| from dataclasses import dataclass, field | |
| from typing import Dict, List, Optional | |
| from models import Difficulty, TaskDescriptor, TaskKind | |
| class TaskSpec: | |
| """Complete task specification with grading criteria.""" | |
| task_id: str | |
| title: str | |
| difficulty: Difficulty | |
| task_kind: TaskKind | |
| task_description: str | |
| starter_code: str | |
| reference_code: str | |
| visible_tests: List[str] | |
| hidden_tests: List[str] | |
| max_steps: int = 10 | |
| benchmark_entrypoint: Optional[str] = None | |
| benchmark_builder: Optional[str] = None | |
| benchmark_repeats: int = 1 | |
| benchmark_timeout_s: float = 2.0 | |
| style_max_line_length: int = 88 | |
| expected_quality_markers: List[str] = field(default_factory=list) | |
| def to_descriptor(self) -> TaskDescriptor: | |
| """Convert to public task descriptor.""" | |
| return TaskDescriptor( | |
| task_id=self.task_id, | |
| title=self.title, | |
| difficulty=self.difficulty, | |
| task_kind=self.task_kind, | |
| task_description=self.task_description, | |
| starter_code=self.starter_code, | |
| visible_tests=list(self.visible_tests), | |
| max_steps=self.max_steps, | |
| ) | |
| # ============================================================================ | |
| # TASK 1: EASY - Syntax Fixing | |
| # ============================================================================ | |
| TASK_SYNTAX_FIX = TaskSpec( | |
| task_id="syntax-fix-easy", | |
| title="Fix a syntax-broken username normalizer", | |
| difficulty="easy", | |
| task_kind="syntax_fix", | |
| task_description=( | |
| "You are reviewing a utility function before merge. The submitted patch left " | |
| "the function with syntax errors. Repair the code so it compiles and preserves " | |
| "the intended behavior of trimming, lowercasing, and replacing spaces with underscores." | |
| ), | |
| starter_code='''def normalize_username(raw_name: str) -> str: | |
| cleaned = raw_name.strip().lower( | |
| if not cleaned: | |
| return "anonymous" | |
| return cleaned.replace(" ", "_") | |
| ''', | |
| reference_code='''def normalize_username(raw_name: str) -> str: | |
| cleaned = raw_name.strip().lower() | |
| if not cleaned: | |
| return "anonymous" | |
| return cleaned.replace(" ", "_") | |
| ''', | |
| visible_tests=[ | |
| "normalize_username(' Alice Smith ') == 'alice_smith'", | |
| "normalize_username(' ') == 'anonymous'", | |
| "normalize_username('Bob') == 'bob'", | |
| ], | |
| hidden_tests=[ | |
| "normalize_username(' HELLO WORLD ') == 'hello_world'", | |
| "normalize_username('') == 'anonymous'", | |
| ], | |
| max_steps=8, | |
| ) | |
| # ============================================================================ | |
| # TASK 2: MEDIUM - Bug Fixing with Tests | |
| # ============================================================================ | |
| TASK_BUG_FIX = TaskSpec( | |
| task_id="bug-fix-medium", | |
| title="Repair invoice discount calculation logic", | |
| difficulty="medium", | |
| task_kind="bug_fix", | |
| task_description=( | |
| "A billing helper function is returning the wrong amount after applying discounts. " | |
| "The function signature is correct, but the calculation logic is broken. " | |
| "Inspect the implementation, run visible tests, and fix the bug so all tests pass. " | |
| "Do not change the function signature or validation logic." | |
| ), | |
| starter_code='''from typing import Iterable | |
| def calculate_invoice_total(line_items: Iterable[int], discount_percent: int) -> int: | |
| """Calculate invoice total with discount applied. | |
| Args: | |
| line_items: List of item prices in cents. | |
| discount_percent: Discount as integer 0-100. | |
| Returns: | |
| Final invoice total in cents after discount. | |
| Raises: | |
| ValueError: If discount_percent is outside 0-100 range. | |
| """ | |
| if discount_percent < 0 or discount_percent > 100: | |
| raise ValueError("discount_percent must be between 0 and 100") | |
| subtotal = sum(line_items) | |
| discounted_total = subtotal - (subtotal * discount_percent // 100) | |
| return subtotal # BUG: returning subtotal instead of discounted_total | |
| ''', | |
| reference_code='''from typing import Iterable | |
| def calculate_invoice_total(line_items: Iterable[int], discount_percent: int) -> int: | |
| """Calculate invoice total with discount applied. | |
| Args: | |
| line_items: List of item prices in cents. | |
| discount_percent: Discount as integer 0-100. | |
| Returns: | |
| Final invoice total in cents after discount. | |
| Raises: | |
| ValueError: If discount_percent is outside 0-100 range. | |
| """ | |
| if discount_percent < 0 or discount_percent > 100: | |
| raise ValueError("discount_percent must be between 0 and 100") | |
| subtotal = sum(line_items) | |
| discounted_total = subtotal - (subtotal * discount_percent // 100) | |
| return discounted_total | |
| ''', | |
| visible_tests=[ | |
| "calculate_invoice_total([1000, 2000], 0) == 3000", # No discount | |
| "calculate_invoice_total([1000, 2000], 50) == 1500", # 50% off | |
| "calculate_invoice_total([1000], 10) == 900", # 10% off | |
| "calculate_invoice_total([], 0) == 0", # Empty | |
| ], | |
| hidden_tests=[ | |
| "calculate_invoice_total([100, 200, 300], 25) == 450", # 25% off | |
| "calculate_invoice_total([5000], 99) == 50", # 99% off | |
| ], | |
| max_steps=10, | |
| ) | |
| # ============================================================================ | |
| # TASK 3: HARD - Optimization & Code Quality | |
| # ============================================================================ | |
| TASK_OPTIMIZATION = TaskSpec( | |
| task_id="optimization-hard", | |
| title="Optimize inefficient user activity summarization", | |
| difficulty="hard", | |
| task_kind="optimization", | |
| task_description=( | |
| "Code review found that `summarize_user_activity` is inefficient for large event streams. " | |
| "The current implementation repeatedly scans the full event list for every user, making it O(n**2). " | |
| "Refactor it to aggregate counts in one pass while preserving the sorted output contract. " | |
| "Style and code quality also matter: use idiomatic Python, proper types, and clear logic. " | |
| "All tests must pass, and the optimized version should be measurably faster." | |
| ), | |
| starter_code='''from typing import Iterable | |
| def summarize_user_activity(events: Iterable[dict]) -> list[tuple[str, int]]: | |
| """Aggregate user activity counts.""" | |
| ordered_users = [] | |
| for event in events: | |
| user_id = event["user_id"] | |
| if user_id not in ordered_users: | |
| ordered_users.append(user_id) | |
| summary = [] | |
| for user_id in ordered_users: | |
| count = 0 | |
| for event in events: | |
| if event["user_id"] == user_id: | |
| count += 1 | |
| summary.append((user_id, count)) | |
| return sorted(summary, key=lambda item: (-item[1], item[0])) | |
| ''', | |
| reference_code='''from collections import Counter | |
| from typing import Iterable | |
| def summarize_user_activity(events: Iterable[dict]) -> list[tuple[str, int]]: | |
| """Aggregate user activity counts in one pass.""" | |
| counts = Counter(event["user_id"] for event in events) | |
| return sorted(counts.items(), key=lambda item: (-item[1], item[0])) | |
| ''', | |
| visible_tests=[ | |
| "summarize_user_activity([{'user_id': 'alice'}, {'user_id': 'bob'}, {'user_id': 'alice'}]) == [('alice', 2), ('bob', 1)]", | |
| "summarize_user_activity([{'user_id': 'z'}, {'user_id': 'a'}]) == [('a', 1), ('z', 1)]", | |
| "summarize_user_activity([]) == []", | |
| "summarize_user_activity([{'user_id': 'solo'}]) == [('solo', 1)]", | |
| ], | |
| hidden_tests=[ | |
| "summarize_user_activity([{'user_id': 'u2'}, {'user_id': 'u1'}, {'user_id': 'u2'}, {'user_id': 'u2'}, {'user_id': 'u1'}]) == [('u2', 3), ('u1', 2)]", | |
| ], | |
| max_steps=10, | |
| benchmark_entrypoint="summarize_user_activity", | |
| benchmark_builder='''def build_benchmark_events(): | |
| return [{"user_id": f"user_{index % 400}"} for index in range(6000)]''', | |
| benchmark_repeats=3, | |
| benchmark_timeout_s=1.0, | |
| style_max_line_length=88, | |
| expected_quality_markers=[ | |
| "Counter", | |
| "sorted", | |
| ], | |
| ) | |
| # ============================================================================ | |
| # Task Bank Registry | |
| # ============================================================================ | |
| TASKS: Dict[str, TaskSpec] = { | |
| "syntax-fix-easy": TASK_SYNTAX_FIX, | |
| "bug-fix-medium": TASK_BUG_FIX, | |
| "optimization-hard": TASK_OPTIMIZATION, | |
| } | |
| def task_ids() -> List[str]: | |
| """Return all task IDs in deterministic order.""" | |
| return ["syntax-fix-easy", "bug-fix-medium", "optimization-hard"] | |
| def get_task(task_id: str) -> TaskSpec: | |
| """Get a task by ID.""" | |
| if task_id not in TASKS: | |
| raise ValueError(f"Task {task_id} not found. Available: {list(TASKS.keys())}") | |
| return TASKS[task_id] | |
| def list_task_descriptors() -> List[TaskDescriptor]: | |
| """List all task descriptors.""" | |
| return [get_task(tid).to_descriptor() for tid in task_ids()] | |
| def list_task_summaries() -> List[TaskDescriptor]: | |
| """List task summaries (alias for descriptors).""" | |
| return list_task_descriptors() | |