""" nl2sql-bench/server/tasks/medium.py ===================================== Task 2 — Join & Aggregation (difficulty: medium) Questions require at least one JOIN and GROUP BY / HAVING. Expect most frontier models to succeed in 2–3 steps. """ from __future__ import annotations from .base import BaseTask, TaskExample, register @register class JoinAggregationTask(BaseTask): name = "join-aggregation" difficulty = "medium" examples = [ TaskExample( question=( "How many orders has each customer placed? " "Return columns: customer_name, order_count. " "Include customers with zero orders. " "Sort by order_count descending, then customer_name ascending." ), sql=( "SELECT c.name AS customer_name, COUNT(o.id) AS order_count " "FROM customers c " "LEFT JOIN orders o ON c.id = o.customer_id " "GROUP BY c.id, c.name " "ORDER BY order_count DESC, customer_name ASC" ), notes="LEFT JOIN to include zero-order customers, COUNT aggregate.", ), TaskExample( question=( "What is the average product rating per category? " "Only include categories that have at least one review. " "Return columns: category_name, avg_rating. " "Round avg_rating to 2 decimal places. " "Sort by avg_rating descending." ), sql=( "SELECT c.name AS category_name, " " ROUND(AVG(r.rating), 2) AS avg_rating " "FROM categories c " "JOIN products p ON p.category_id = c.id " "JOIN reviews r ON r.product_id = p.id " "GROUP BY c.id, c.name " "ORDER BY avg_rating DESC" ), notes="Two JOINs, AVG aggregate, ROUND function.", ), TaskExample( question=( "Which categories have more than 5 products in stock " "(i.e., stock_quantity > 0)? " "Return columns: category_name, in_stock_count. " "Sort by in_stock_count descending." ), sql=( "SELECT c.name AS category_name, " " COUNT(p.id) AS in_stock_count " "FROM categories c " "JOIN products p ON p.category_id = c.id " "WHERE p.stock_quantity > 0 " "GROUP BY c.id, c.name " "HAVING COUNT(p.id) > 5 " "ORDER BY in_stock_count DESC" ), notes="WHERE before GROUP BY, HAVING filter on aggregate.", ), TaskExample( question=( "Which customers have spent more than $500 total on delivered orders? " "Return columns: customer_name, total_spent. " "Round total_spent to 2 decimal places. " "Sort by total_spent descending." ), sql=( "SELECT c.name AS customer_name, " " ROUND(SUM(o.total_amount), 2) AS total_spent " "FROM customers c " "JOIN orders o ON o.customer_id = c.id " "WHERE o.status = 'delivered' " "GROUP BY c.id, c.name " "HAVING SUM(o.total_amount) > 500 " "ORDER BY total_spent DESC" ), notes="SUM aggregate, HAVING on SUM, status filter.", ), TaskExample( question=( "Show the total quantity sold for each product. " "Only include products that appear in at least one order item. " "Return columns: product_name, total_quantity_sold. " "Sort by total_quantity_sold descending." ), sql=( "SELECT p.name AS product_name, " " SUM(oi.quantity) AS total_quantity_sold " "FROM products p " "JOIN order_items oi ON oi.product_id = p.id " "GROUP BY p.id, p.name " "ORDER BY total_quantity_sold DESC" ), notes="JOIN on order_items, SUM aggregate.", ), ] def description(self) -> str: return ( "Multi-table JOIN queries with GROUP BY, HAVING, and aggregation " "functions (COUNT, SUM, AVG, ROUND). Tests relational reasoning." )