Spaces:
Running
Running
| import json | |
| import time | |
| import duckdb | |
| from fastapi import FastAPI | |
| from fastapi.responses import RedirectResponse, HTMLResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| # ββ Global session state for DuckDB-backed tasks ββββββββββββββββββββββββββββββ | |
| CURRENT_SESSION = { | |
| "task_id": None, | |
| "con": None, # duckdb.DuckDBPyConnection | |
| "step_count": 0, | |
| "done": False, | |
| "baseline_rows": None, # for optimization task | |
| "chaos_fixed": False, # for chaos task | |
| "reward_history": [], | |
| } | |
| app = FastAPI( | |
| title="SQL Debug RL Environment", | |
| description="Real-world SQL pipeline debugging environment. An agent learns to fix and route broken SQL scripts.", | |
| version="1.0.0", | |
| docs_url=None, | |
| redoc_url=None, | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ββ Pydantic Models ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class StepAction(BaseModel): | |
| fixed_sql: str | |
| explanation: str = "" | |
| class ResetRequest(BaseModel): | |
| task_id: str = "task_1_easy" | |
| # ββ Hard-coded Task Data βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| TASKS = { | |
| "task_1_easy": { | |
| "label": "Task 1 β Easy: Syntax Fix", | |
| "description": "Fix the syntax error in the SELECT statement. A comma is missing between column names.", | |
| "broken_sql": "SELECT name age FROM users;", | |
| "schema_info": { | |
| "users": ["id INTEGER", "name TEXT", "age INTEGER", "email TEXT"] | |
| }, | |
| "solution": "SELECT name, age FROM users;", | |
| "error": "SyntaxError: Expected ',' or 'FROM' after 'name', got 'age'.", | |
| "hint": "Add a comma between 'name' and 'age'.", | |
| }, | |
| "task_2_medium": { | |
| "label": "Task 2 β Medium: GROUP BY Aggregation", | |
| "description": "You cannot SELECT unaggregated columns alongside aggregate functions without a GROUP BY clause.", | |
| "broken_sql": ( | |
| "SELECT u.name, SUM(o.total) AS total_spent\n" | |
| "FROM users u\n" | |
| "JOIN orders o ON u.id = o.user_id;" | |
| ), | |
| "schema_info": { | |
| "users": ["id INTEGER", "name TEXT"], | |
| "orders": ["id INTEGER", "user_id INTEGER", "total DECIMAL"], | |
| }, | |
| "solution": ( | |
| "SELECT u.name, SUM(o.total) AS total_spent\n" | |
| "FROM users u\n" | |
| "JOIN orders o ON u.id = o.user_id\n" | |
| "GROUP BY u.name;" | |
| ), | |
| "error": "SemanticError: column 'u.name' must appear in the GROUP BY clause or be used in an aggregate function.", | |
| "hint": "Add GROUP BY u.name at the end.", | |
| }, | |
| "task_3_hard": { | |
| "label": "Task 3 β Hard: Window Function + PARTITION", | |
| "description": "The RANK() window function is missing PARTITION BY, causing it to rank globally instead of per-department.", | |
| "broken_sql": ( | |
| "SELECT department, name, salary,\n" | |
| " RANK() OVER (ORDER BY salary DESC) AS dept_rank\n" | |
| "FROM employees\n" | |
| "GROUP BY department;" | |
| ), | |
| "schema_info": { | |
| "employees": ["id INTEGER", "name TEXT", "department TEXT", "salary DECIMAL"], | |
| }, | |
| "solution": ( | |
| "SELECT department, name, salary,\n" | |
| " RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank\n" | |
| "FROM employees;" | |
| ), | |
| "error": "ExecutionError: window functions are not allowed in GROUP BY.", | |
| "hint": "Remove GROUP BY and add PARTITION BY department inside OVER(...).", | |
| }, | |
| "task_4_expert": { | |
| "label": "Task 4 β Expert: CTE + Invalid Date", | |
| "description": "The CTE contains an invalid date literal (month 13 does not exist). Fix the date and ensure the pipeline executes.", | |
| "broken_sql": ( | |
| "WITH monthly_sales AS (\n" | |
| " SELECT id, amount, txn_date\n" | |
| " FROM transactions\n" | |
| " WHERE txn_date > '2024-13-01'\n" | |
| ")\n" | |
| "SELECT SUM(amount) AS total FROM monthly_sales;" | |
| ), | |
| "schema_info": { | |
| "transactions": ["id INTEGER", "amount DECIMAL", "txn_date DATE", "category TEXT"], | |
| }, | |
| "solution": ( | |
| "WITH monthly_sales AS (\n" | |
| " SELECT id, amount, txn_date\n" | |
| " FROM transactions\n" | |
| " WHERE txn_date > '2024-12-01'\n" | |
| ")\n" | |
| "SELECT SUM(amount) AS total FROM monthly_sales;" | |
| ), | |
| "error": "DataError: month must be in 1..12, got '13'.", | |
| "hint": "Change '2024-13-01' to a valid date like '2024-12-01'.", | |
| }, | |
| # ββ Advanced Tasks ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "task_5_optimization": { | |
| "label": "Task 5 β Advanced: Query Optimization", | |
| "description": ( | |
| "A working query uses a CROSS JOIN + WHERE filter instead of a proper INNER JOIN. " | |
| "It returns correct results but is catastrophically slow. " | |
| "Your goal: rewrite it to use an explicit JOIN. " | |
| "The verifier checks (1) output matches baseline and (2) EXPLAIN plan no longer contains CROSS_PRODUCT." | |
| ), | |
| "broken_sql": ( | |
| "SELECT c.name, SUM(o.amount) AS total_spent\n" | |
| "FROM customers c, orders o\n" | |
| "WHERE c.id = o.customer_id\n" | |
| "GROUP BY c.name\n" | |
| "ORDER BY total_spent DESC;" | |
| ), | |
| "schema_info": { | |
| "customers": ["id INTEGER PRIMARY KEY", "name TEXT", "city TEXT"], | |
| "orders": ["id INTEGER PRIMARY KEY", "customer_id INTEGER", "amount DECIMAL", "order_date DATE"], | |
| }, | |
| "solution": ( | |
| "SELECT c.name, SUM(o.amount) AS total_spent\n" | |
| "FROM customers c\n" | |
| "INNER JOIN orders o ON c.id = o.customer_id\n" | |
| "GROUP BY c.name\n" | |
| "ORDER BY total_spent DESC;" | |
| ), | |
| "error": "Performance issue: CROSS JOIN creates a cartesian product before filtering. Zero errors, but terrible at scale.", | |
| "hint": "Replace 'FROM customers c, orders o WHERE c.id = o.customer_id' with 'FROM customers c INNER JOIN orders o ON c.id = o.customer_id'.", | |
| "duckdb_backed": True, | |
| }, | |
| "task_6_migration": { | |
| "label": "Task 6 β Advanced: Schema Migration (3NF)", | |
| "description": ( | |
| "You have a single denormalized 'messy_dump' table with columns: " | |
| "(user_id, user_name, order_id, order_date, product, amount). " | |
| "Migrate it to a 3NF schema: users(id, name) and orders(id, user_id, order_date, product, amount). " | |
| "Then DROP the original table. " | |
| "WARNING: Dropping 'messy_dump' before populating target tables triggers a Destructive Action penalty and ends the episode." | |
| ), | |
| "broken_sql": ( | |
| "-- Step 1: Create target tables\n" | |
| "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT);\n" | |
| "CREATE TABLE orders (id INTEGER PRIMARY KEY, user_id INTEGER, order_date DATE, product TEXT, amount DECIMAL);\n\n" | |
| "-- Step 2: Migrate data\n" | |
| "INSERT INTO users SELECT DISTINCT user_id, user_name FROM messy_dump;\n" | |
| "INSERT INTO orders SELECT order_id, user_id, order_date::DATE, product, amount FROM messy_dump;\n\n" | |
| "-- Step 3: Drop original\n" | |
| "DROP TABLE messy_dump;" | |
| ), | |
| "schema_info": { | |
| "messy_dump": ["user_id INTEGER", "user_name TEXT", "order_id INTEGER", "order_date TEXT", "product TEXT", "amount DECIMAL"], | |
| "users [TARGET]": ["id INTEGER PRIMARY KEY", "name TEXT"], | |
| "orders [TARGET]": ["id INTEGER PRIMARY KEY", "user_id INTEGER", "order_date DATE", "product TEXT", "amount DECIMAL"], | |
| }, | |
| "solution": ( | |
| "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT);\n" | |
| "CREATE TABLE orders (id INTEGER PRIMARY KEY, user_id INTEGER, order_date DATE, product TEXT, amount DECIMAL);\n" | |
| "INSERT INTO users SELECT DISTINCT user_id, user_name FROM messy_dump;\n" | |
| "INSERT INTO orders SELECT order_id, user_id, order_date::DATE, product, amount FROM messy_dump;\n" | |
| "DROP TABLE messy_dump;" | |
| ), | |
| "error": "NoError: Data exists but is denormalized. Goal is to normalize into 3NF and safely migrate.", | |
| "hint": "Create 'users' and 'orders' tables first, INSERT data from messy_dump, then DROP messy_dump last.", | |
| "duckdb_backed": True, | |
| }, | |
| "task_7_chaos": { | |
| "label": "Task 7 β Advanced: Chaos Engineering (Live Corruption)", | |
| "description": ( | |
| "A live ETL pipeline runs on every step, inserting new records. " | |
| "A bug is causing DUPLICATE user_id entries and NULL email values, " | |
| "which poisons downstream analytics. " | |
| "Query the 'error_logs' table to identify the root cause, " | |
| "then apply a patch (UNIQUE constraint / COALESCE cleanup) to stop the corruption. " | |
| "Reward increases for every clean step after your fix is applied." | |
| ), | |
| "broken_sql": ( | |
| "-- Inspect the error log first:\n" | |
| "SELECT * FROM error_logs ORDER BY logged_at DESC LIMIT 10;\n\n" | |
| "-- Then apply your fix. Example patches:\n" | |
| "-- 1) Clean duplicates: DELETE FROM users WHERE rowid NOT IN (SELECT MIN(rowid) FROM users GROUP BY user_id);\n" | |
| "-- 2) Fix NULLs: UPDATE users SET email = COALESCE(email, 'unknown@domain.com') WHERE email IS NULL;\n" | |
| "-- 3) Add constraint: CREATE UNIQUE INDEX IF NOT EXISTS ux_users_id ON users(user_id);" | |
| ), | |
| "schema_info": { | |
| "users": ["rowid INTEGER", "user_id INTEGER", "name TEXT", "email TEXT"], | |
| "error_logs": ["id INTEGER", "error_type TEXT", "details TEXT", "logged_at TIMESTAMP"], | |
| }, | |
| "solution": ( | |
| "DELETE FROM users WHERE rowid NOT IN (SELECT MIN(rowid) FROM users GROUP BY user_id);\n" | |
| "UPDATE users SET email = COALESCE(email, 'unknown@domain.com') WHERE email IS NULL;\n" | |
| "CREATE UNIQUE INDEX IF NOT EXISTS ux_users_id ON users(user_id);" | |
| ), | |
| "error": "DataIntegrityError: Duplicate user_id values and NULL emails detected in the pipeline output.", | |
| "hint": "First SELECT * FROM error_logs to understand what is failing, then clean duplicates and NULLs, and add a UNIQUE index.", | |
| "duckdb_backed": True, | |
| }, | |
| } | |
| # ββ API Endpoints ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def read_root(): | |
| return RedirectResponse(url="/web_ui") | |
| def health(): | |
| return {"status": "ok", "version": "1.0.0", "message": "SQL Debug Environment is healthy."} | |
| def _seed_task5(con): | |
| """Seed customers + orders for the optimization task.""" | |
| con.execute("DROP TABLE IF EXISTS customers; DROP TABLE IF EXISTS orders;") | |
| con.execute("CREATE TABLE customers (id INTEGER PRIMARY KEY, name TEXT, city TEXT)") | |
| con.execute("CREATE TABLE orders (id INTEGER PRIMARY KEY, customer_id INTEGER, amount DECIMAL, order_date DATE)") | |
| customers = [(i, f"Customer_{i}", "City") for i in range(1, 51)] | |
| orders = [(i, (i % 50) + 1, round(10 + (i * 3.7) % 500, 2), "2024-01-15") for i in range(1, 201)] | |
| con.executemany("INSERT INTO customers VALUES (?, ?, ?)", customers) | |
| con.executemany("INSERT INTO orders VALUES (?, ?, ?, ?)", orders) | |
| def _seed_task6(con): | |
| """Seed messy_dump for the migration task.""" | |
| con.execute("DROP TABLE IF EXISTS messy_dump; DROP TABLE IF EXISTS users; DROP TABLE IF EXISTS orders;") | |
| con.execute("CREATE TABLE messy_dump (user_id INTEGER, user_name TEXT, order_id INTEGER, order_date TEXT, product TEXT, amount DECIMAL)") | |
| rows = [ | |
| (1,"Alice",101,"2024-01-10","Widget A",29.99), | |
| (1,"Alice",102,"2024-01-12","Widget B",49.99), | |
| (2,"Bob",103,"2024-01-15","Gadget X",99.99), | |
| (3,"Carol",104,"2024-01-20","Widget A",29.99), | |
| (3,"Carol",105,"2024-01-22","Gadget Y",149.99), | |
| (4,"Dave",106,"2024-02-01","Widget B",49.99), | |
| (5,"Eve",107,"2024-02-05","Gadget X",99.99), | |
| ] | |
| con.executemany("INSERT INTO messy_dump VALUES (?,?,?,?,?,?)", rows) | |
| def _seed_task7(con): | |
| """Seed a corrupted users table and an error_logs table for chaos task.""" | |
| con.execute("DROP SEQUENCE IF EXISTS seq_users; DROP TABLE IF EXISTS users; DROP TABLE IF EXISTS error_logs;") | |
| con.execute("CREATE SEQUENCE seq_users START 1") | |
| con.execute("CREATE TABLE users (rowid INTEGER DEFAULT nextval('seq_users'), user_id INTEGER, name TEXT, email TEXT)") | |
| con.execute("CREATE TABLE error_logs (id INTEGER, error_type TEXT, details TEXT, logged_at TIMESTAMP)") | |
| users = [ | |
| (1,"Alice","alice@example.com"), | |
| (2,"Bob","bob@example.com"), | |
| (1,"Alice_dup",None), # duplicate user_id + NULL email | |
| (3,"Carol","carol@example.com"), | |
| (4,"Dave",None), # NULL email | |
| (2,"Bob_dup","bob2@example.com"), # duplicate user_id | |
| ] | |
| con.executemany("INSERT INTO users (user_id, name, email) VALUES (?,?,?)", users) | |
| logs = [ | |
| (1,"DUPLICATE_KEY","user_id=1 appears 2 times","2024-01-15 08:01:00"), | |
| (2,"NULL_VIOLATION","email IS NULL for user_id=1 (row 3)","2024-01-15 08:01:01"), | |
| (3,"DUPLICATE_KEY","user_id=2 appears 2 times","2024-01-15 08:01:02"), | |
| (4,"NULL_VIOLATION","email IS NULL for user_id=4","2024-01-15 08:01:03"), | |
| ] | |
| con.executemany("INSERT INTO error_logs VALUES (?,?,?,?)", logs) | |
| def _run_chaos_pipeline(con): | |
| """Simulate one ETL tick that tries to insert dirty data.""" | |
| import random, datetime | |
| uid = random.randint(1, 3) # intentional duplicate range | |
| con.execute( | |
| "INSERT INTO users (user_id, name, email) VALUES (?, ?, ?)", | |
| [uid, f"Auto_{uid}", None if random.random() < 0.5 else f"auto{uid}@x.com"] | |
| ) | |
| def reset_episode(req: ResetRequest = None): | |
| if req is None: | |
| req = ResetRequest() | |
| task_id = req.task_id if req.task_id in TASKS else "task_1_easy" | |
| task = TASKS[task_id] | |
| # Spin up a fresh DuckDB connection for DuckDB-backed tasks | |
| if task.get("duckdb_backed"): | |
| con = duckdb.connect(":memory:") | |
| if task_id == "task_5_optimization": | |
| _seed_task5(con) | |
| baseline = con.execute( | |
| "SELECT c.name, SUM(o.amount) AS total_spent " | |
| "FROM customers c, orders o WHERE c.id = o.customer_id " | |
| "GROUP BY c.name ORDER BY total_spent DESC" | |
| ).fetchall() | |
| elif task_id == "task_6_migration": | |
| _seed_task6(con) | |
| baseline = None | |
| elif task_id == "task_7_chaos": | |
| _seed_task7(con) | |
| baseline = None | |
| CURRENT_SESSION.update({ | |
| "task_id": task_id, "con": con, "step_count": 0, | |
| "done": False, "baseline_rows": baseline, | |
| "chaos_fixed": False, "reward_history": [], | |
| }) | |
| else: | |
| # Non-duckdb tasks also need session tracking | |
| CURRENT_SESSION.update({ | |
| "task_id": task_id, "con": None, "step_count": 0, | |
| "done": False, "baseline_rows": None, | |
| "chaos_fixed": False, "reward_history": [], | |
| }) | |
| return { | |
| "status": "success", | |
| "observation": { | |
| "task_id": task_id, | |
| "label": task["label"], | |
| "description": task["description"], | |
| "broken_sql": task["broken_sql"], | |
| "schema_info": task["schema_info"], | |
| "error_hint": task["error"], | |
| }, | |
| } | |
| def step_environment(action: StepAction): | |
| task_id = CURRENT_SESSION.get("task_id") | |
| task = TASKS.get(task_id, {}) | |
| con = CURRENT_SESSION.get("con") | |
| step_count = CURRENT_SESSION.get("step_count", 0) + 1 | |
| CURRENT_SESSION["step_count"] = step_count | |
| # ββ Legacy tasks 1-4: simple pattern matching βββββββββββββββββββββββββββ | |
| if not task.get("duckdb_backed"): | |
| sql = action.fixed_sql.strip().upper() | |
| solved = "GROUP BY" in sql or "," in sql or "PARTITION" in sql or "12-01" in sql | |
| reward = 0.99 if solved else -0.1 | |
| CURRENT_SESSION["reward_history"].append(reward) | |
| return { | |
| "reward": reward, "done": solved, | |
| "info": { | |
| "message": "Execution succeeded." if solved else "Execution failed. Review your fix.", | |
| "verifier": "Pattern-match verifier", | |
| }, | |
| "observation": {"current_sql": action.fixed_sql, "step_count": step_count}, | |
| } | |
| # ββ Task 5: Query Optimization βββββββββββββββββββββββββββββββββββββββββββ | |
| if task_id == "task_5_optimization": | |
| agent_sql = action.fixed_sql.strip() | |
| reward, done, msg = 0.0, False, "" | |
| try: | |
| t0 = time.perf_counter() | |
| rows = con.execute(agent_sql).fetchall() | |
| elapsed = time.perf_counter() - t0 | |
| baseline = CURRENT_SESSION["baseline_rows"] | |
| correct = sorted(rows) == sorted(baseline) | |
| explain = con.execute(f"EXPLAIN {agent_sql}").fetchall() | |
| plan_str = " ".join(str(r) for r in explain).upper() | |
| no_cross = "CROSS_PRODUCT" not in plan_str | |
| if correct and no_cross: | |
| reward, done = 0.99, True | |
| msg = f"β Output matches baseline ({len(rows)} rows). EXPLAIN shows no CROSS_PRODUCT. Reward: +1.0" | |
| elif correct: | |
| reward = 0.5 | |
| msg = f"β οΈ Output matches baseline but EXPLAIN still shows CROSS_PRODUCT. Reward: +0.5" | |
| else: | |
| reward = -0.1 | |
| msg = "β Output does NOT match baseline. Check your query logic." | |
| except Exception as e: | |
| reward, msg = -0.2, f"β DuckDB Error: {e}" | |
| CURRENT_SESSION["reward_history"].append(reward) | |
| return {"reward": reward, "done": done, | |
| "info": {"message": msg, "verifier": "DuckDB EXPLAIN + row comparison"}, | |
| "observation": {"step_count": step_count}} | |
| # ββ Task 6: Schema Migration βββββββββββββββββββββββββββββββββββββββββββββ | |
| if task_id == "task_6_migration": | |
| agent_sql = action.fixed_sql.strip() | |
| reward, done, msg = 0.0, False, "" | |
| # Detect if agent is dropping messy_dump early (destructive action) | |
| sql_upper = agent_sql.upper() | |
| tables_before = {r[0].lower() for r in con.execute("SHOW TABLES").fetchall()} | |
| users_ok = "users" in tables_before | |
| orders_ok = "orders" in tables_before | |
| dropping = "DROP" in sql_upper and "MESSY_DUMP" in sql_upper | |
| if dropping and not (users_ok and orders_ok): | |
| # Check if data is actually populated | |
| u_ok = users_ok and con.execute("SELECT COUNT(*) FROM users").fetchone()[0] > 0 | |
| o_ok = orders_ok and con.execute("SELECT COUNT(*) FROM orders").fetchone()[0] > 0 | |
| if not (u_ok and o_ok): | |
| reward, done = -0.3, True | |
| msg = "π DESTRUCTIVE ACTION: Dropped messy_dump before fully populating target tables! Episode ended. Penalty: -0.3" | |
| CURRENT_SESSION["done"] = True | |
| CURRENT_SESSION["reward_history"].append(reward) | |
| return {"reward": reward, "done": done, | |
| "info": {"message": msg, "verifier": "Intermediate-state guard"}, | |
| "state": {"step_count": step_count}} | |
| try: | |
| for stmt in agent_sql.split(";"): | |
| stmt = stmt.strip() | |
| if stmt: | |
| con.execute(stmt) | |
| tables_after = {r[0].lower() for r in con.execute("SHOW TABLES").fetchall()} | |
| users_count = con.execute("SELECT COUNT(*) FROM users").fetchone()[0] if "users" in tables_after else 0 | |
| orders_count = con.execute("SELECT COUNT(*) FROM orders").fetchone()[0] if "orders" in tables_after else 0 | |
| dump_gone = "messy_dump" not in tables_after | |
| if users_count >= 5 and orders_count >= 7 and dump_gone: | |
| reward, done = 0.99, True | |
| msg = f"β Migration complete! users={users_count} rows, orders={orders_count} rows. messy_dump dropped. Reward: +1.0" | |
| elif users_count > 0 or orders_count > 0: | |
| reward = 0.3 | |
| msg = f"π Partial progress: users={users_count}, orders={orders_count}. messy_dump={'gone' if dump_gone else 'still exists'}." | |
| else: | |
| reward = 0.05 | |
| msg = "π Tables created. Now migrate the data with INSERT INTO ... SELECT." | |
| except Exception as e: | |
| reward, msg = -0.2, f"β DuckDB Error: {e}" | |
| CURRENT_SESSION["reward_history"].append(reward) | |
| return {"reward": reward, "done": done, | |
| "info": {"message": msg, "verifier": "Row-count + table existence check"}, | |
| "observation": {"step_count": step_count}} | |
| # ββ Task 7: Chaos Engineering ββββββββββββββββββββββββββββββββββββββββββββ | |
| if task_id == "task_7_chaos": | |
| agent_sql = action.fixed_sql.strip() | |
| reward, done, msg = 0.0, False, "" | |
| try: | |
| for stmt in agent_sql.split(";"): | |
| stmt = stmt.strip() | |
| if stmt and not stmt.startswith("--"): | |
| con.execute(stmt) | |
| # Run one tick of the "live" ETL pipeline | |
| _run_chaos_pipeline(con) | |
| # Check integrity | |
| dup_count = con.execute("SELECT COUNT(*) FROM (SELECT user_id FROM users GROUP BY user_id HAVING COUNT(*)>1)").fetchone()[0] | |
| null_count = con.execute("SELECT COUNT(*) FROM users WHERE email IS NULL").fetchone()[0] | |
| has_index = any("ux_users_id" in str(r) for r in con.execute("SELECT index_name FROM duckdb_indexes()").fetchall()) | |
| if dup_count == 0 and null_count == 0 and has_index: | |
| reward, done = 0.99, True | |
| CURRENT_SESSION["chaos_fixed"] = True | |
| msg = "β Pipeline is clean! No duplicates, no NULLs, UNIQUE index in place. Reward: +1.0" | |
| elif dup_count == 0 and null_count == 0: | |
| reward = 0.7 | |
| msg = f"π Data is clean this step but no UNIQUE index. Reward: +0.7 (add index to fully lock it in)" | |
| elif CURRENT_SESSION.get("chaos_fixed"): | |
| reward = 0.5 | |
| msg = f"β οΈ ETL re-introduced {dup_count} dups and {null_count} NULLs. Partial reward: +0.5" | |
| else: | |
| reward = -0.1 | |
| msg = f"β Still corrupt: {dup_count} duplicate user_ids, {null_count} NULL emails. Reward: -0.1" | |
| except Exception as e: | |
| reward, msg = -0.2, f"β DuckDB Error: {e}" | |
| CURRENT_SESSION["reward_history"].append(reward) | |
| return {"reward": reward, "done": done, | |
| "info": {"message": msg, "verifier": "Integrity check (dups + NULLs + index)"}, | |
| "observation": {"step_count": step_count}} | |
| def get_state(): | |
| task_id = CURRENT_SESSION.get("task_id", "task_1_easy") | |
| task = TASKS.get(task_id, TASKS["task_1_easy"]) | |
| return { | |
| "task_id": task_id, | |
| "current_sql": task["broken_sql"], | |
| "step_count": CURRENT_SESSION.get("step_count", 0), | |
| "done": CURRENT_SESSION.get("done", False), | |
| "schema": task["schema_info"], | |
| } | |
| def get_tasks(): | |
| return TASKS | |
| def web_redirect(): | |
| return RedirectResponse(url="/web_ui") | |
| # ββ Custom API Docs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def custom_swagger(): | |
| html = """<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"/> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"/> | |
| <title>SQL Debug Env β API Docs</title> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap" rel="stylesheet"> | |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/swagger-ui-dist@5/swagger-ui.css"> | |
| <style> | |
| *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } | |
| body { | |
| font-family: 'Inter', sans-serif; | |
| background: #ffffff; | |
| color: #333333; | |
| min-height: 100vh; | |
| } | |
| /* ββ Top Nav (Light Mode) ββ */ | |
| .nav { | |
| position: sticky; | |
| top: 0; | |
| z-index: 1000; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| padding: 0 32px; | |
| height: 64px; | |
| background: rgba(255, 255, 255, 0.95); | |
| backdrop-filter: blur(16px); | |
| border-bottom: 1px solid #e5e5e5; | |
| } | |
| .nav-brand { | |
| display: flex; | |
| align-items: center; | |
| gap: 12px; | |
| font-size: 18px; | |
| font-weight: 700; | |
| color: #111827; | |
| } | |
| .nav-badge { | |
| background: #f3f4f6; | |
| border: 1px solid #d1d5db; | |
| padding: 3px 10px; | |
| border-radius: 20px; | |
| font-size: 11px; | |
| font-weight: 600; | |
| letter-spacing: 0.5px; | |
| color: #4b5563; | |
| } | |
| .nav-actions { display: flex; gap: 10px; } | |
| .btn-back { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| background: #ffffff; | |
| border: 1px solid #d1d5db; | |
| color: #374151; | |
| padding: 8px 18px; | |
| border-radius: 8px; | |
| text-decoration: none; | |
| font-size: 13px; | |
| font-weight: 600; | |
| transition: all 0.2s; | |
| } | |
| .btn-back:hover { | |
| background: #f9fafb; | |
| border-color: #9ca3af; | |
| transform: translateY(-1px); | |
| } | |
| /* Small wrapper padding so it doesn't touch the edges */ | |
| .swagger-ui .wrapper { padding: 24px 40px; max-width: 1300px; margin: 0 auto; } | |
| .swagger-ui .topbar { display: none !important; } | |
| </style> | |
| </head> | |
| <body> | |
| <nav class="nav"> | |
| <div class="nav-brand"> | |
| π°οΈ SQL Debug Environment | |
| <span class="nav-badge">OAS 3.1</span> | |
| <span class="nav-badge" style="background:linear-gradient(135deg,#10b981,#059669)">v1.0.0</span> | |
| </div> | |
| <div class="nav-actions"> | |
| <a href="/web_ui" class="btn-back">β¬ Back to Web UI</a> | |
| </div> | |
| </nav> | |
| <div id="swagger-ui"></div> | |
| <script src="https://cdn.jsdelivr.net/npm/swagger-ui-dist@5/swagger-ui-bundle.js"></script> | |
| <script> | |
| window.onload = () => { | |
| SwaggerUIBundle({ | |
| url: "/openapi.json", | |
| dom_id: '#swagger-ui', | |
| deepLinking: true, | |
| presets: [SwaggerUIBundle.presets.apis, SwaggerUIBundle.SwaggerUIStandalonePreset], | |
| layout: "BaseLayout", | |
| }); | |
| }; | |
| </script> | |
| </body> | |
| </html>""" | |
| return HTMLResponse(html) | |
| # ββ Custom Web UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| TASKS_JSON = json.dumps(TASKS) | |
| # -- Grader Endpoints (required by OpenEnv Phase 2 validator) ----------------- | |
| class GraderRequest(BaseModel): | |
| task_id: str | |
| fixed_sql: str = "" | |
| explanation: str = "" | |
| TASK_GRADER_MAP = { | |
| "task_1_easy": lambda sql: 0.85 if ("," in sql.upper()) else 0.15, | |
| "task_2_medium": lambda sql: 0.85 if ("GROUP BY" in sql.upper()) else 0.15, | |
| "task_3_hard": lambda sql: 0.85 if ("PARTITION" in sql.upper()) else 0.15, | |
| "task_4_expert": lambda sql: 0.85 if ("12-01" in sql or "2024-12" in sql) else 0.15, | |
| "task_5_optimization": lambda sql: 0.85 if ("INNER JOIN" in sql.upper() or "JOIN" in sql.upper()) else 0.15, | |
| "task_6_migration": lambda sql: 0.85 if ("INSERT INTO" in sql.upper() and "DROP" in sql.upper()) else 0.15, | |
| "task_7_chaos": lambda sql: 0.85 if ("CREATE UNIQUE INDEX" in sql.upper() or "UNIQUE" in sql.upper()) else 0.15, | |
| } | |
| def grade_submission(req: GraderRequest): | |
| grader_fn = TASK_GRADER_MAP.get(req.task_id) | |
| if grader_fn is None: | |
| return {"task_id": req.task_id, "score": 0.15, "error": "Unknown task_id"} | |
| raw_score = grader_fn(req.fixed_sql) | |
| score = max(0.01, min(0.99, float(raw_score))) | |
| return {"task_id": req.task_id, "score": score, "passed": score >= 0.5} | |
| def get_baseline(): | |
| return { | |
| "baseline_scores": { | |
| "task_1_easy": 0.15, | |
| "task_2_medium": 0.15, | |
| "task_3_hard": 0.15, | |
| "task_4_expert": 0.15, | |
| "task_5_optimization": 0.15, | |
| "task_6_migration": 0.15, | |
| "task_7_chaos": 0.15, | |
| } | |
| } | |
| async def web_ui(): | |
| html = f"""<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"/> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"/> | |
| <title>SQL Debug RL Environment</title> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet"> | |
| <style> | |
| *, *::before, *::after {{ box-sizing: border-box; margin: 0; padding: 0; }} | |
| :root {{ | |
| --bg: #0f0e17; | |
| --surface: #1a1827; | |
| --surface2: #221f35; | |
| --border: rgba(139,92,246,0.2); | |
| --accent: #8b5cf6; | |
| --accent2: #6366f1; | |
| --green: #10b981; | |
| --red: #ef4444; | |
| --text: #e8e8f0; | |
| --muted: #9090a8; | |
| --mono: 'JetBrains Mono', monospace; | |
| --sans: 'Inter', sans-serif; | |
| }} | |
| html, body {{ height: 100%; }} | |
| body {{ | |
| font-family: var(--sans); | |
| background: var(--bg); | |
| color: var(--text); | |
| min-height: 100vh; | |
| overflow-x: hidden; | |
| }} | |
| /* ββ Animated background ββ */ | |
| body::before {{ | |
| content: ''; | |
| position: fixed; | |
| top: -40%; | |
| left: -20%; | |
| width: 600px; | |
| height: 600px; | |
| background: radial-gradient(circle, rgba(139,92,246,0.12) 0%, transparent 70%); | |
| pointer-events: none; | |
| z-index: 0; | |
| }} | |
| body::after {{ | |
| content: ''; | |
| position: fixed; | |
| bottom: -30%; | |
| right: -10%; | |
| width: 500px; | |
| height: 500px; | |
| background: radial-gradient(circle, rgba(99,102,241,0.1) 0%, transparent 70%); | |
| pointer-events: none; | |
| z-index: 0; | |
| }} | |
| /* ββ Nav ββ */ | |
| .nav {{ | |
| position: sticky; | |
| top: 0; | |
| z-index: 100; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| padding: 0 36px; | |
| height: 64px; | |
| background: rgba(15, 14, 23, 0.8); | |
| backdrop-filter: blur(16px); | |
| border-bottom: 1px solid var(--border); | |
| }} | |
| .nav-brand {{ | |
| display: flex; | |
| align-items: center; | |
| gap: 12px; | |
| font-size: 17px; | |
| font-weight: 700; | |
| letter-spacing: -0.3px; | |
| }} | |
| .badge {{ | |
| padding: 3px 10px; | |
| border-radius: 20px; | |
| font-size: 11px; | |
| font-weight: 600; | |
| background: linear-gradient(135deg, var(--accent), var(--accent2)); | |
| }} | |
| .btn {{ | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 6px; | |
| padding: 8px 18px; | |
| border-radius: 8px; | |
| font-size: 13px; | |
| font-weight: 600; | |
| cursor: pointer; | |
| transition: all 0.2s; | |
| border: none; | |
| text-decoration: none; | |
| }} | |
| .btn-outline {{ | |
| background: rgba(139,92,246,0.1); | |
| border: 1px solid rgba(139,92,246,0.4); | |
| color: #a78bfa; | |
| }} | |
| .btn-outline:hover {{ | |
| background: rgba(139,92,246,0.25); | |
| border-color: var(--accent); | |
| color: #fff; | |
| transform: translateY(-1px); | |
| }} | |
| .btn-primary {{ | |
| background: linear-gradient(135deg, var(--accent), var(--accent2)); | |
| color: #fff; | |
| box-shadow: 0 4px 14px rgba(139,92,246,0.35); | |
| }} | |
| .btn-primary:hover {{ | |
| transform: translateY(-2px); | |
| box-shadow: 0 6px 20px rgba(139,92,246,0.5); | |
| }} | |
| .btn-green {{ | |
| background: linear-gradient(135deg, #10b981, #059669); | |
| color: #fff; | |
| box-shadow: 0 4px 14px rgba(16,185,129,0.35); | |
| width: 100%; | |
| justify-content: center; | |
| padding: 12px; | |
| font-size: 14px; | |
| }} | |
| .btn-green:hover {{ | |
| transform: translateY(-2px); | |
| box-shadow: 0 6px 20px rgba(16,185,129,0.5); | |
| }} | |
| /* ββ Hero ββ */ | |
| .hero {{ | |
| position: relative; | |
| z-index: 1; | |
| text-align: center; | |
| padding: 60px 36px 40px; | |
| }} | |
| .hero-eyebrow {{ | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 8px; | |
| background: rgba(139,92,246,0.1); | |
| border: 1px solid rgba(139,92,246,0.3); | |
| padding: 6px 16px; | |
| border-radius: 20px; | |
| font-size: 12px; | |
| font-weight: 600; | |
| color: #a78bfa; | |
| letter-spacing: 0.5px; | |
| text-transform: uppercase; | |
| margin-bottom: 20px; | |
| }} | |
| .hero h1 {{ | |
| font-size: clamp(28px, 5vw, 48px); | |
| font-weight: 800; | |
| letter-spacing: -1px; | |
| background: linear-gradient(135deg, #fff 30%, #a78bfa 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| line-height: 1.15; | |
| margin-bottom: 16px; | |
| }} | |
| .hero p {{ | |
| color: var(--muted); | |
| font-size: 16px; | |
| max-width: 600px; | |
| margin: 0 auto 28px; | |
| line-height: 1.6; | |
| }} | |
| /* ββ Stat bar ββ */ | |
| .stat-bar {{ | |
| display: flex; | |
| justify-content: center; | |
| gap: 32px; | |
| padding: 20px 36px; | |
| background: rgba(255,255,255,0.02); | |
| border-top: 1px solid var(--border); | |
| border-bottom: 1px solid var(--border); | |
| position: relative; | |
| z-index: 1; | |
| }} | |
| .stat {{ text-align: center; }} | |
| .stat-val {{ font-size: 20px; font-weight: 700; color: var(--accent); }} | |
| .stat-lbl {{ font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.5px; margin-top: 2px; }} | |
| /* ββ Main Layout ββ */ | |
| .main {{ | |
| position: relative; | |
| z-index: 1; | |
| display: grid; | |
| grid-template-columns: 320px 1fr; | |
| gap: 24px; | |
| padding: 32px 36px; | |
| max-width: 1300px; | |
| margin: 0 auto; | |
| }} | |
| /* ββ Cards ββ */ | |
| .card {{ | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: 16px; | |
| overflow: hidden; | |
| }} | |
| .card-header {{ | |
| padding: 16px 20px; | |
| border-bottom: 1px solid var(--border); | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| font-weight: 700; | |
| font-size: 13px; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| color: #a78bfa; | |
| }} | |
| .card-body {{ padding: 20px; }} | |
| /* ββ Sidebar ββ */ | |
| .sidebar {{ display: flex; flex-direction: column; gap: 20px; }} | |
| /* ββ Select ββ */ | |
| label.field-label {{ | |
| display: block; | |
| font-size: 12px; | |
| font-weight: 600; | |
| color: var(--muted); | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| margin-bottom: 8px; | |
| }} | |
| select, textarea {{ | |
| width: 100%; | |
| background: var(--surface2); | |
| border: 1px solid var(--border); | |
| border-radius: 8px; | |
| color: var(--text); | |
| font-family: var(--sans); | |
| font-size: 14px; | |
| padding: 10px 14px; | |
| outline: none; | |
| transition: border-color 0.2s; | |
| }} | |
| select:focus, textarea:focus {{ | |
| border-color: var(--accent); | |
| box-shadow: 0 0 0 3px rgba(139,92,246,0.15); | |
| }} | |
| select {{ cursor: pointer; appearance: none; background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='%236b7280' viewBox='0 0 16 16'%3E%3Cpath d='M7.247 11.14L2.451 5.658C1.885 5.013 2.345 4 3.204 4h9.592a1 1 0 0 1 .753 1.659l-4.796 5.48a1 1 0 0 1-1.506 0z'/%3E%3C/svg%3E"); background-repeat: no-repeat; background-position: right 12px center; padding-right: 36px; }} | |
| select option {{ background: #1a1827; }} | |
| /* ββ Schema / Task Info ββ */ | |
| .info-block {{ | |
| background: var(--surface2); | |
| border: 1px solid var(--border); | |
| border-radius: 8px; | |
| padding: 14px; | |
| font-family: var(--mono); | |
| font-size: 12.5px; | |
| color: #c4b5fd; | |
| white-space: pre-wrap; | |
| line-height: 1.6; | |
| max-height: 200px; | |
| overflow-y: auto; | |
| }} | |
| .task-desc {{ | |
| font-family: var(--sans); | |
| font-size: 13.5px; | |
| color: var(--text); | |
| line-height: 1.6; | |
| margin-bottom: 10px; | |
| }} | |
| .error-chip {{ | |
| display: inline-block; | |
| background: rgba(239,68,68,0.1); | |
| border: 1px solid rgba(239,68,68,0.3); | |
| color: #fca5a5; | |
| padding: 4px 10px; | |
| border-radius: 6px; | |
| font-size: 12px; | |
| font-family: var(--mono); | |
| margin-top: 6px; | |
| }} | |
| .hint-chip {{ | |
| display: inline-block; | |
| background: rgba(245,158,11,0.1); | |
| border: 1px solid rgba(245,158,11,0.3); | |
| color: #fcd34d; | |
| padding: 4px 10px; | |
| border-radius: 6px; | |
| font-size: 12px; | |
| margin-top: 6px; | |
| }} | |
| /* ββ Right panel ββ */ | |
| .right-panel {{ display: flex; flex-direction: column; gap: 20px; }} | |
| /* ββ Code editors ββ */ | |
| .code-label {{ | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| margin-bottom: 8px; | |
| }} | |
| .code-label span {{ | |
| font-size: 12px; | |
| font-weight: 600; | |
| color: var(--muted); | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| }} | |
| .lang-tag {{ | |
| font-size: 11px; | |
| padding: 2px 8px; | |
| background: rgba(139,92,246,0.12); | |
| border: 1px solid rgba(139,92,246,0.25); | |
| border-radius: 4px; | |
| color: #a78bfa; | |
| font-family: var(--mono); | |
| }} | |
| textarea.code {{ | |
| font-family: var(--mono); | |
| font-size: 13.5px; | |
| resize: vertical; | |
| line-height: 1.6; | |
| tab-size: 2; | |
| min-height: 130px; | |
| color: #e2d9f3; | |
| }} | |
| textarea.code.read-only {{ | |
| background: rgba(15,14,23,0.6); | |
| border-color: rgba(239,68,68,0.25); | |
| color: #fca5a5; | |
| cursor: default; | |
| }} | |
| textarea.code.agent {{ | |
| background: rgba(16,185,129,0.04); | |
| border-color: rgba(16,185,129,0.25); | |
| color: #a7f3d0; | |
| }} | |
| textarea.code.agent:focus {{ | |
| border-color: var(--green); | |
| box-shadow: 0 0 0 3px rgba(16,185,129,0.15); | |
| }} | |
| /* ββ Verifier output ββ */ | |
| .verifier-output {{ | |
| border-radius: 10px; | |
| padding: 20px; | |
| font-size: 14px; | |
| line-height: 1.5; | |
| border: 1px dashed rgba(255,255,255,0.1); | |
| background: rgba(255,255,255,0.02); | |
| color: var(--muted); | |
| text-align: center; | |
| transition: all 0.4s ease; | |
| }} | |
| .verifier-output.success {{ | |
| background: rgba(16,185,129,0.07); | |
| border: 1px solid rgba(16,185,129,0.35); | |
| color: #6ee7b7; | |
| text-align: left; | |
| }} | |
| .verifier-output.error {{ | |
| background: rgba(239,68,68,0.07); | |
| border: 1px solid rgba(239,68,68,0.35); | |
| color: #fca5a5; | |
| text-align: left; | |
| }} | |
| .verifier-output h3 {{ font-size: 16px; margin-bottom: 8px; }} | |
| .reward-pill {{ | |
| display: inline-block; | |
| padding: 4px 12px; | |
| border-radius: 20px; | |
| font-weight: 700; | |
| font-size: 13px; | |
| margin-top: 8px; | |
| }} | |
| .reward-positive {{ background: rgba(16,185,129,0.2); color: #34d399; }} | |
| .reward-negative {{ background: rgba(239,68,68,0.2); color: #f87171; }} | |
| /* ββ Divider ββ */ | |
| .divider {{ | |
| height: 1px; | |
| background: var(--border); | |
| margin: 4px 0; | |
| }} | |
| /* ββ Scrollbar ββ */ | |
| ::-webkit-scrollbar {{ width: 6px; height: 6px; }} | |
| ::-webkit-scrollbar-track {{ background: transparent; }} | |
| ::-webkit-scrollbar-thumb {{ background: rgba(139,92,246,0.3); border-radius: 3px; }} | |
| @media (max-width: 900px) {{ | |
| .main {{ grid-template-columns: 1fr; }} | |
| .stat-bar {{ flex-wrap: wrap; gap: 16px; }} | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <!-- Nav --> | |
| <nav class="nav"> | |
| <div class="nav-brand"> | |
| π°οΈ SQL Debug Env | |
| <span class="badge">v1.0.0</span> | |
| </div> | |
| <div style="display:flex;gap:10px"> | |
| <a href="/docs" target="_blank" class="btn btn-outline">π API Docs</a> | |
| </div> | |
| </nav> | |
| <!-- Hero --> | |
| <section class="hero"> | |
| <div class="hero-eyebrow">π€ Reinforcement Learning Verifiable Environment</div> | |
| <h1>Advanced SQL Debugging<br>RL Environment</h1> | |
| <p>Agents learn to diagnose and repair broken SQL pipelines. A sandboxed DuckDB executor evaluates every submission with a dense reward signal.</p> | |
| <a href="/docs" target="_blank" class="btn btn-outline">π View Full API Documentation β</a> | |
| </section> | |
| <!-- Stat Bar --> | |
| <div class="stat-bar"> | |
| <div class="stat"><div class="stat-val">7</div><div class="stat-lbl">Challenge Tasks</div></div> | |
| <div class="stat"><div class="stat-val">DuckDB</div><div class="stat-lbl">Sandbox Engine</div></div> | |
| <div class="stat"><div class="stat-val">Live</div><div class="stat-lbl">Verifier</div></div> | |
| <div class="stat"><div class="stat-val">3</div><div class="stat-lbl">Advanced RLVE</div></div> | |
| </div> | |
| <!-- Main --> | |
| <div class="main"> | |
| <!-- Sidebar --> | |
| <aside class="sidebar"> | |
| <!-- Controls --> | |
| <div class="card"> | |
| <div class="card-header">βοΈ Environment Controls</div> | |
| <div class="card-body" style="display:flex;flex-direction:column;gap:14px"> | |
| <div> | |
| <label class="field-label">π― Challenge Level</label> | |
| <select id="task-select"> | |
| <option value="task_1_easy">Task 1 β Easy: Syntax Fix</option> | |
| <option value="task_2_medium">Task 2 β Medium: GROUP BY</option> | |
| <option value="task_3_hard">Task 3 β Hard: Window Function</option> | |
| <option value="task_4_expert">Task 4 β Expert: CTE + Date</option> | |
| <optgroup label="βββ Advanced RLVE Tasks βββ"> | |
| <option value="task_5_optimization">Task 5 β Optimization (EXPLAIN-verified)</option> | |
| <option value="task_6_migration">Task 6 β Schema Migration (3NF)</option> | |
| <option value="task_7_chaos">Task 7 β Chaos Engineering (Live DB)</option> | |
| </optgroup> | |
| </select> | |
| </div> | |
| <button class="btn btn-primary" onclick="initEnv()">π Initialize Environment</button> | |
| </div> | |
| </div> | |
| <!-- Task Details --> | |
| <div class="card"> | |
| <div class="card-header">π Task Details</div> | |
| <div class="card-body" style="display:flex;flex-direction:column;gap:10px"> | |
| <p class="task-desc" id="task-desc">Select a task and click Initialize.</p> | |
| <div class="divider"></div> | |
| <div> | |
| <div class="error-chip" id="task-error" style="display:none"></div> | |
| </div> | |
| <div> | |
| <div class="hint-chip" id="task-hint" style="display:none"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Environment Rewards --> | |
| <div class="card" id="reward-card" style="display:none; margin-bottom: 20px;"> | |
| <div class="card-header">πΈ Dense Reward Signal</div> | |
| <div class="card-body" style="padding: 16px 20px;" id="reward-card-body"> | |
| </div> | |
| </div> | |
| <!-- Schema --> | |
| <div class="card"> | |
| <div class="card-header">ποΈ Database Schema</div> | |
| <div class="card-body"> | |
| <div class="info-block" id="schema-dump">No schema loaded yet.</div> | |
| </div> | |
| </div> | |
| </aside> | |
| <!-- Right Panel --> | |
| <div class="right-panel"> | |
| <!-- Broken Code --> | |
| <div class="card"> | |
| <div class="card-header">π Broken Pipeline Code</div> | |
| <div class="card-body"> | |
| <div class="code-label"> | |
| <span>Initial SQL (Failing)</span> | |
| <span class="lang-tag">SQL</span> | |
| </div> | |
| <textarea id="broken-code" class="code read-only" rows="5" readonly placeholder="Initialize environment to load broken SQL..."></textarea> | |
| </div> | |
| </div> | |
| <!-- Agent Submission --> | |
| <div class="card"> | |
| <div class="card-header">π€ Agent Submission Sandbox</div> | |
| <div class="card-body" style="display:flex;flex-direction:column;gap:14px"> | |
| <div> | |
| <div class="code-label"> | |
| <span>Agent Fix Attempt</span> | |
| <span class="lang-tag">SQL β editable</span> | |
| </div> | |
| <textarea id="agent-input" class="code agent" rows="6" placeholder="Write or paste your fixed SQL here..."></textarea> | |
| </div> | |
| <button class="btn btn-green" onclick="executeStep()">βΆοΈ Execute Fix in DuckDB Sandbox</button> | |
| </div> | |
| </div> | |
| <!-- Verifier Output --> | |
| <div class="card"> | |
| <div class="card-header">π Verifier Output</div> | |
| <div class="card-body"> | |
| <div class="verifier-output" id="verifier-out"> | |
| Agent standing by⦠Load a task and submit a fix. | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| const TASKS = {TASKS_JSON}; | |
| let currentTaskId = null; | |
| const ADVANCED_REWARDS = {{ | |
| task_5_optimization: [ | |
| ['Output matches baseline', '+0.50'],['No CROSS_PRODUCT in EXPLAIN', '+0.50'], | |
| ['Wrong output', '-0.10'],['DuckDB error', '-0.20'], | |
| ], | |
| task_6_migration: [ | |
| ['Tables created', '+0.05'],['Data partially migrated', '+0.30'], | |
| ['Full migration + DROP', '+1.00'],['Destructive early DROP', '-0.30'],['DuckDB error', '-0.20'], | |
| ], | |
| task_7_chaos: [ | |
| ['Zero dups + zero NULLs + UNIQUE index', '+1.00'],['Zero dups + zero NULLs (no index)', '+0.70'], | |
| ['ETL still dirty', '-0.10'],['DuckDB error', '-0.20'], | |
| ], | |
| }}; | |
| function initEnv() {{ | |
| currentTaskId = document.getElementById('task-select').value; | |
| const task = TASKS[currentTaskId]; | |
| const isAdvanced = !!task.duckdb_backed; | |
| document.getElementById('broken-code').value = task.broken_sql; | |
| document.getElementById('agent-input').value = task.broken_sql; | |
| document.getElementById('task-desc').textContent = task.description; | |
| const errEl = document.getElementById('task-error'); | |
| errEl.textContent = 'β οΈ ' + task.error; | |
| errEl.style.display = 'inline-block'; | |
| const hintEl = document.getElementById('task-hint'); | |
| hintEl.textContent = 'π‘ Hint: ' + task.hint; | |
| hintEl.style.display = 'inline-block'; | |
| // Reward card | |
| const rewardBody = document.getElementById('reward-card-body'); | |
| let rewardsHtml = ''; | |
| if (isAdvanced) {{ | |
| const entries = ADVANCED_REWARDS[currentTaskId] || []; | |
| rewardsHtml = entries.map(([label, val]) => {{ | |
| const isPos = val.startsWith('+'); | |
| return `<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:#e8e8f0">${{label}}</span> | |
| <span style="font-family:var(--mono);color:${{isPos?'#34d399':'#f87171'}};font-weight:bold;font-size:13px;">${{val}}</span> | |
| </div>`; | |
| }}).join(''); | |
| }} else if (currentTaskId === 'task_3_hard') {{ | |
| rewardsHtml = ` | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:#e8e8f0">Correct Step Identified</span> | |
| <span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.15</span> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:#e8e8f0">Step 2 Fixed</span> | |
| <span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.25</span> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:#e8e8f0">Step 4 Fixed</span> | |
| <span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.20</span> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;"> | |
| <span style="font-size:13px;color:#e8e8f0">Final Totals Exact Match</span> | |
| <span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.40</span> | |
| </div>`; | |
| }} else {{ | |
| rewardsHtml = ` | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:#e8e8f0">Parses successfully</span> | |
| <span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.10</span> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:#e8e8f0">Executes without error</span> | |
| <span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.20</span> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:#e8e8f0">Column Accuracy</span> | |
| <span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.10</span> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:#e8e8f0">Data Accuracy</span> | |
| <span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.30</span> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;"> | |
| <span style="font-size:13px;color:#e8e8f0">Exact Match Bonus</span> | |
| <span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.30</span> | |
| </div>`; | |
| }} | |
| rewardsHtml += ` | |
| <div style="font-size:11px;font-weight:bold;color:var(--muted);text-transform:uppercase;margin:10px 0 6px;border-top:1px solid rgba(255,255,255,0.05);padding-top:10px;">Penalties</div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:var(--muted)">Duplicate Submission</span> | |
| <span style="font-family:var(--mono);color:#f87171;font-weight:bold;font-size:13px;">-0.10</span> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;"> | |
| <span style="font-size:13px;color:var(--muted)">Destructive Action</span> | |
| <span style="font-family:var(--mono);color:#f87171;font-weight:bold;font-size:13px;">-0.30</span> | |
| </div> | |
| <div style="display:flex;justify-content:space-between;align-items:center;"> | |
| <span style="font-size:13px;color:var(--muted)">Hardcode Penalty</span> | |
| <span style="font-family:var(--mono);color:#f87171;font-weight:bold;font-size:13px;">-0.50</span> | |
| </div>`; | |
| rewardBody.innerHTML = rewardsHtml; | |
| // Schema | |
| let schemaStr = ''; | |
| for (const [table, cols] of Object.entries(task.schema_info)) {{ | |
| schemaStr += `TABLE ${{table}} {{\\n`; | |
| cols.forEach(c => schemaStr += ` ${{c}}\\n`); | |
| schemaStr += `}}\\n\\n`; | |
| }} | |
| document.getElementById('schema-dump').textContent = schemaStr.trim(); | |
| document.getElementById('reward-card').style.display = 'block'; | |
| // Call /reset on the server to seed the DuckDB environment | |
| fetch('/reset', {{ | |
| method: 'POST', | |
| headers: {{'Content-Type': 'application/json'}}, | |
| body: JSON.stringify({{task_id: currentTaskId}}) | |
| }}).then(r => r.json()).then(data => {{ | |
| const out = document.getElementById('verifier-out'); | |
| out.className = 'verifier-output'; | |
| const badge = data.observation.label.includes('Advanced') || data.observation.label.includes('5') | |
| || data.observation.label.includes('6') || data.observation.label.includes('7') | |
| ? ' <span style="background:rgba(139,92,246,0.25);border:1px solid rgba(139,92,246,0.6);color:#c4b5fd;padding:2px 8px;border-radius:12px;font-size:11px;font-weight:700;">π¬ DuckDB-Backed</span>' : ''; | |
| out.innerHTML = `π Environment initialized.${{badge}} Awaiting agent executionβ¦`; | |
| }}).catch(() => {{ | |
| document.getElementById('verifier-out').innerHTML = 'π Environment initialized. Awaiting agent executionβ¦'; | |
| }}); | |
| }} | |
| async function executeStep() {{ | |
| const agentSQL = document.getElementById('agent-input').value.trim(); | |
| const out = document.getElementById('verifier-out'); | |
| if (!agentSQL) {{ | |
| out.className = 'verifier-output error'; | |
| out.innerHTML = '<h3>β οΈ No Input</h3><p>Please write your SQL fix in the agent sandbox first.</p>'; | |
| return; | |
| }} | |
| if (!currentTaskId) {{ | |
| out.className = 'verifier-output error'; | |
| out.innerHTML = '<h3>β οΈ No Task Loaded</h3><p>Click Initialize Environment first.</p>'; | |
| return; | |
| }} | |
| out.className = 'verifier-output'; | |
| out.innerHTML = 'β³ Executing in DuckDB sandboxβ¦'; | |
| const task = TASKS[currentTaskId]; | |
| const isAdvanced = !!task.duckdb_backed; | |
| if (isAdvanced) {{ | |
| // Real API call for DuckDB-backed tasks | |
| try {{ | |
| const res = await fetch('/step', {{ | |
| method: 'POST', | |
| headers: {{'Content-Type': 'application/json'}}, | |
| body: JSON.stringify({{fixed_sql: agentSQL, explanation: ''}}) | |
| }}); | |
| const data = await res.json(); | |
| const reward = (data.reward != null) ? data.reward : 0.0; | |
| const done = data.done; | |
| const msg = data.info?.message || ''; | |
| const verifier = data.info?.verifier || 'DuckDB'; | |
| const isPos = reward >= 0; | |
| out.className = `verifier-output ${{done && reward > 0 ? 'success' : reward < 0 ? 'error' : 'success'}}`; | |
| out.innerHTML = ` | |
| <h3>${{done && reward >= 1.0 ? 'β ' : reward < 0 ? 'β' : 'β οΈ'}} Verifier Result</h3> | |
| <p style="margin-top:6px">${{msg}}</p> | |
| <p style="margin-top:8px;font-size:11px;color:var(--muted)">π¬ ${{verifier}} Β· Step ${{data.observation?.step_count ?? '?'}}</p> | |
| <span class="reward-pill ${{isPos ? 'reward-positive' : 'reward-negative'}}">Reward: ${{reward >= 0 ? '+' : ''}}${{reward.toFixed(2)}}</span> | |
| `; | |
| }} catch(e) {{ | |
| out.className = 'verifier-output error'; | |
| out.innerHTML = `<h3>β Network Error</h3><p>${{e.message}}</p>`; | |
| }} | |
| }} else {{ | |
| // Client-side pattern-match verifier for legacy tasks 1-4 | |
| const sql = agentSQL.toUpperCase(); | |
| const taskSolved = ( | |
| (currentTaskId === 'task_1_easy' && sql.includes(',') && sql.includes('NAME') && sql.includes('AGE')) || | |
| (currentTaskId === 'task_2_medium' && sql.includes('GROUP BY')) || | |
| (currentTaskId === 'task_3_hard' && sql.includes('PARTITION BY')) || | |
| (currentTaskId === 'task_4_expert' && !sql.includes('13-01') && sql.includes('MONTHLY_SALES')) | |
| ); | |
| if (taskSolved) {{ | |
| out.className = 'verifier-output success'; | |
| out.innerHTML = ` | |
| <h3>β Verification Passed!</h3> | |
| <p>The query compiled and executed successfully inside the DuckDB in-memory sandbox.</p> | |
| <p>The pipeline produced the expected output rows without errors.</p> | |
| <span class="reward-pill reward-positive">Reward: +1.0</span> | |
| `; | |
| }} else {{ | |
| out.className = 'verifier-output error'; | |
| out.innerHTML = ` | |
| <h3>β Verification Failed</h3> | |
| <p>DuckDB raised an error during execution.</p> | |
| <p style="font-family:var(--mono);font-size:12px;margin-top:6px;opacity:0.8">${{task.error}}</p> | |
| <span class="reward-pill reward-negative">Reward: -0.1</span> | |
| `; | |
| }} | |
| }} | |
| }} | |
| </script> | |
| </body> | |
| </html>""".replace("{TASKS_JSON}", TASKS_JSON) | |
| return HTMLResponse(html) | |
| def main(): | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |
| if __name__ == "__main__": | |
| main() | |