Spaces:
Sleeping
Sleeping
| """ | |
| nl2sql-bench/server/tasks/medium.py | |
| ===================================== | |
| Task 2 — Join & Aggregation (difficulty: medium) | |
| Questions require at least one JOIN and GROUP BY / HAVING. | |
| Expect most frontier models to succeed in 2–3 steps. | |
| """ | |
| from __future__ import annotations | |
| from .base import BaseTask, TaskExample, register | |
| class JoinAggregationTask(BaseTask): | |
| name = "join-aggregation" | |
| difficulty = "medium" | |
| examples = [ | |
| TaskExample( | |
| question=( | |
| "How many orders has each customer placed? " | |
| "Return columns: customer_name, order_count. " | |
| "Include customers with zero orders. " | |
| "Sort by order_count descending, then customer_name ascending." | |
| ), | |
| sql=( | |
| "SELECT c.name AS customer_name, COUNT(o.id) AS order_count " | |
| "FROM customers c " | |
| "LEFT JOIN orders o ON c.id = o.customer_id " | |
| "GROUP BY c.id, c.name " | |
| "ORDER BY order_count DESC, customer_name ASC" | |
| ), | |
| notes="LEFT JOIN to include zero-order customers, COUNT aggregate.", | |
| ), | |
| TaskExample( | |
| question=( | |
| "What is the average product rating per category? " | |
| "Only include categories that have at least one review. " | |
| "Return columns: category_name, avg_rating. " | |
| "Round avg_rating to 2 decimal places. " | |
| "Sort by avg_rating descending." | |
| ), | |
| sql=( | |
| "SELECT c.name AS category_name, " | |
| " ROUND(AVG(r.rating), 2) AS avg_rating " | |
| "FROM categories c " | |
| "JOIN products p ON p.category_id = c.id " | |
| "JOIN reviews r ON r.product_id = p.id " | |
| "GROUP BY c.id, c.name " | |
| "ORDER BY avg_rating DESC" | |
| ), | |
| notes="Two JOINs, AVG aggregate, ROUND function.", | |
| ), | |
| TaskExample( | |
| question=( | |
| "Which categories have more than 5 products in stock " | |
| "(i.e., stock_quantity > 0)? " | |
| "Return columns: category_name, in_stock_count. " | |
| "Sort by in_stock_count descending." | |
| ), | |
| sql=( | |
| "SELECT c.name AS category_name, " | |
| " COUNT(p.id) AS in_stock_count " | |
| "FROM categories c " | |
| "JOIN products p ON p.category_id = c.id " | |
| "WHERE p.stock_quantity > 0 " | |
| "GROUP BY c.id, c.name " | |
| "HAVING COUNT(p.id) > 5 " | |
| "ORDER BY in_stock_count DESC" | |
| ), | |
| notes="WHERE before GROUP BY, HAVING filter on aggregate.", | |
| ), | |
| TaskExample( | |
| question=( | |
| "Which customers have spent more than $500 total on delivered orders? " | |
| "Return columns: customer_name, total_spent. " | |
| "Round total_spent to 2 decimal places. " | |
| "Sort by total_spent descending." | |
| ), | |
| sql=( | |
| "SELECT c.name AS customer_name, " | |
| " ROUND(SUM(o.total_amount), 2) AS total_spent " | |
| "FROM customers c " | |
| "JOIN orders o ON o.customer_id = c.id " | |
| "WHERE o.status = 'delivered' " | |
| "GROUP BY c.id, c.name " | |
| "HAVING SUM(o.total_amount) > 500 " | |
| "ORDER BY total_spent DESC" | |
| ), | |
| notes="SUM aggregate, HAVING on SUM, status filter.", | |
| ), | |
| TaskExample( | |
| question=( | |
| "Show the total quantity sold for each product. " | |
| "Only include products that appear in at least one order item. " | |
| "Return columns: product_name, total_quantity_sold. " | |
| "Sort by total_quantity_sold descending." | |
| ), | |
| sql=( | |
| "SELECT p.name AS product_name, " | |
| " SUM(oi.quantity) AS total_quantity_sold " | |
| "FROM products p " | |
| "JOIN order_items oi ON oi.product_id = p.id " | |
| "GROUP BY p.id, p.name " | |
| "ORDER BY total_quantity_sold DESC" | |
| ), | |
| notes="JOIN on order_items, SUM aggregate.", | |
| ), | |
| ] | |
| def description(self) -> str: | |
| return ( | |
| "Multi-table JOIN queries with GROUP BY, HAVING, and aggregation " | |
| "functions (COUNT, SUM, AVG, ROUND). Tests relational reasoning." | |
| ) | |