sai1912's picture
Upload folder using huggingface_hub
ecbaede verified
import json
import time
import duckdb
from fastapi import FastAPI
from fastapi.responses import RedirectResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
# ── Global session state for DuckDB-backed tasks ──────────────────────────────
CURRENT_SESSION = {
"task_id": None,
"con": None, # duckdb.DuckDBPyConnection
"step_count": 0,
"done": False,
"baseline_rows": None, # for optimization task
"chaos_fixed": False, # for chaos task
"reward_history": [],
}
app = FastAPI(
title="SQL Debug RL Environment",
description="Real-world SQL pipeline debugging environment. An agent learns to fix and route broken SQL scripts.",
version="1.0.0",
docs_url=None,
redoc_url=None,
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ── Pydantic Models ──────────────────────────────────────────────────────────
class StepAction(BaseModel):
fixed_sql: str
explanation: str = ""
class ResetRequest(BaseModel):
task_id: str = "task_1_easy"
# ── Hard-coded Task Data ─────────────────────────────────────────────────────
TASKS = {
"task_1_easy": {
"label": "Task 1 β€” Easy: Syntax Fix",
"description": "Fix the syntax error in the SELECT statement. A comma is missing between column names.",
"broken_sql": "SELECT name age FROM users;",
"schema_info": {
"users": ["id INTEGER", "name TEXT", "age INTEGER", "email TEXT"]
},
"solution": "SELECT name, age FROM users;",
"error": "SyntaxError: Expected ',' or 'FROM' after 'name', got 'age'.",
"hint": "Add a comma between 'name' and 'age'.",
},
"task_2_medium": {
"label": "Task 2 β€” Medium: GROUP BY Aggregation",
"description": "You cannot SELECT unaggregated columns alongside aggregate functions without a GROUP BY clause.",
"broken_sql": (
"SELECT u.name, SUM(o.total) AS total_spent\n"
"FROM users u\n"
"JOIN orders o ON u.id = o.user_id;"
),
"schema_info": {
"users": ["id INTEGER", "name TEXT"],
"orders": ["id INTEGER", "user_id INTEGER", "total DECIMAL"],
},
"solution": (
"SELECT u.name, SUM(o.total) AS total_spent\n"
"FROM users u\n"
"JOIN orders o ON u.id = o.user_id\n"
"GROUP BY u.name;"
),
"error": "SemanticError: column 'u.name' must appear in the GROUP BY clause or be used in an aggregate function.",
"hint": "Add GROUP BY u.name at the end.",
},
"task_3_hard": {
"label": "Task 3 β€” Hard: Window Function + PARTITION",
"description": "The RANK() window function is missing PARTITION BY, causing it to rank globally instead of per-department.",
"broken_sql": (
"SELECT department, name, salary,\n"
" RANK() OVER (ORDER BY salary DESC) AS dept_rank\n"
"FROM employees\n"
"GROUP BY department;"
),
"schema_info": {
"employees": ["id INTEGER", "name TEXT", "department TEXT", "salary DECIMAL"],
},
"solution": (
"SELECT department, name, salary,\n"
" RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank\n"
"FROM employees;"
),
"error": "ExecutionError: window functions are not allowed in GROUP BY.",
"hint": "Remove GROUP BY and add PARTITION BY department inside OVER(...).",
},
"task_4_expert": {
"label": "Task 4 β€” Expert: CTE + Invalid Date",
"description": "The CTE contains an invalid date literal (month 13 does not exist). Fix the date and ensure the pipeline executes.",
"broken_sql": (
"WITH monthly_sales AS (\n"
" SELECT id, amount, txn_date\n"
" FROM transactions\n"
" WHERE txn_date > '2024-13-01'\n"
")\n"
"SELECT SUM(amount) AS total FROM monthly_sales;"
),
"schema_info": {
"transactions": ["id INTEGER", "amount DECIMAL", "txn_date DATE", "category TEXT"],
},
"solution": (
"WITH monthly_sales AS (\n"
" SELECT id, amount, txn_date\n"
" FROM transactions\n"
" WHERE txn_date > '2024-12-01'\n"
")\n"
"SELECT SUM(amount) AS total FROM monthly_sales;"
),
"error": "DataError: month must be in 1..12, got '13'.",
"hint": "Change '2024-13-01' to a valid date like '2024-12-01'.",
},
# ── Advanced Tasks ──────────────────────────────────────────────────────
"task_5_optimization": {
"label": "Task 5 β€” Advanced: Query Optimization",
"description": (
"A working query uses a CROSS JOIN + WHERE filter instead of a proper INNER JOIN. "
"It returns correct results but is catastrophically slow. "
"Your goal: rewrite it to use an explicit JOIN. "
"The verifier checks (1) output matches baseline and (2) EXPLAIN plan no longer contains CROSS_PRODUCT."
),
"broken_sql": (
"SELECT c.name, SUM(o.amount) AS total_spent\n"
"FROM customers c, orders o\n"
"WHERE c.id = o.customer_id\n"
"GROUP BY c.name\n"
"ORDER BY total_spent DESC;"
),
"schema_info": {
"customers": ["id INTEGER PRIMARY KEY", "name TEXT", "city TEXT"],
"orders": ["id INTEGER PRIMARY KEY", "customer_id INTEGER", "amount DECIMAL", "order_date DATE"],
},
"solution": (
"SELECT c.name, SUM(o.amount) AS total_spent\n"
"FROM customers c\n"
"INNER JOIN orders o ON c.id = o.customer_id\n"
"GROUP BY c.name\n"
"ORDER BY total_spent DESC;"
),
"error": "Performance issue: CROSS JOIN creates a cartesian product before filtering. Zero errors, but terrible at scale.",
"hint": "Replace 'FROM customers c, orders o WHERE c.id = o.customer_id' with 'FROM customers c INNER JOIN orders o ON c.id = o.customer_id'.",
"duckdb_backed": True,
},
"task_6_migration": {
"label": "Task 6 β€” Advanced: Schema Migration (3NF)",
"description": (
"You have a single denormalized 'messy_dump' table with columns: "
"(user_id, user_name, order_id, order_date, product, amount). "
"Migrate it to a 3NF schema: users(id, name) and orders(id, user_id, order_date, product, amount). "
"Then DROP the original table. "
"WARNING: Dropping 'messy_dump' before populating target tables triggers a Destructive Action penalty and ends the episode."
),
"broken_sql": (
"-- Step 1: Create target tables\n"
"CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT);\n"
"CREATE TABLE orders (id INTEGER PRIMARY KEY, user_id INTEGER, order_date DATE, product TEXT, amount DECIMAL);\n\n"
"-- Step 2: Migrate data\n"
"INSERT INTO users SELECT DISTINCT user_id, user_name FROM messy_dump;\n"
"INSERT INTO orders SELECT order_id, user_id, order_date::DATE, product, amount FROM messy_dump;\n\n"
"-- Step 3: Drop original\n"
"DROP TABLE messy_dump;"
),
"schema_info": {
"messy_dump": ["user_id INTEGER", "user_name TEXT", "order_id INTEGER", "order_date TEXT", "product TEXT", "amount DECIMAL"],
"users [TARGET]": ["id INTEGER PRIMARY KEY", "name TEXT"],
"orders [TARGET]": ["id INTEGER PRIMARY KEY", "user_id INTEGER", "order_date DATE", "product TEXT", "amount DECIMAL"],
},
"solution": (
"CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT);\n"
"CREATE TABLE orders (id INTEGER PRIMARY KEY, user_id INTEGER, order_date DATE, product TEXT, amount DECIMAL);\n"
"INSERT INTO users SELECT DISTINCT user_id, user_name FROM messy_dump;\n"
"INSERT INTO orders SELECT order_id, user_id, order_date::DATE, product, amount FROM messy_dump;\n"
"DROP TABLE messy_dump;"
),
"error": "NoError: Data exists but is denormalized. Goal is to normalize into 3NF and safely migrate.",
"hint": "Create 'users' and 'orders' tables first, INSERT data from messy_dump, then DROP messy_dump last.",
"duckdb_backed": True,
},
"task_7_chaos": {
"label": "Task 7 β€” Advanced: Chaos Engineering (Live Corruption)",
"description": (
"A live ETL pipeline runs on every step, inserting new records. "
"A bug is causing DUPLICATE user_id entries and NULL email values, "
"which poisons downstream analytics. "
"Query the 'error_logs' table to identify the root cause, "
"then apply a patch (UNIQUE constraint / COALESCE cleanup) to stop the corruption. "
"Reward increases for every clean step after your fix is applied."
),
"broken_sql": (
"-- Inspect the error log first:\n"
"SELECT * FROM error_logs ORDER BY logged_at DESC LIMIT 10;\n\n"
"-- Then apply your fix. Example patches:\n"
"-- 1) Clean duplicates: DELETE FROM users WHERE rowid NOT IN (SELECT MIN(rowid) FROM users GROUP BY user_id);\n"
"-- 2) Fix NULLs: UPDATE users SET email = COALESCE(email, 'unknown@domain.com') WHERE email IS NULL;\n"
"-- 3) Add constraint: CREATE UNIQUE INDEX IF NOT EXISTS ux_users_id ON users(user_id);"
),
"schema_info": {
"users": ["rowid INTEGER", "user_id INTEGER", "name TEXT", "email TEXT"],
"error_logs": ["id INTEGER", "error_type TEXT", "details TEXT", "logged_at TIMESTAMP"],
},
"solution": (
"DELETE FROM users WHERE rowid NOT IN (SELECT MIN(rowid) FROM users GROUP BY user_id);\n"
"UPDATE users SET email = COALESCE(email, 'unknown@domain.com') WHERE email IS NULL;\n"
"CREATE UNIQUE INDEX IF NOT EXISTS ux_users_id ON users(user_id);"
),
"error": "DataIntegrityError: Duplicate user_id values and NULL emails detected in the pipeline output.",
"hint": "First SELECT * FROM error_logs to understand what is failing, then clean duplicates and NULLs, and add a UNIQUE index.",
"duckdb_backed": True,
},
}
# ── API Endpoints ────────────────────────────────────────────────────────────
@app.get("/", include_in_schema=False)
def read_root():
return RedirectResponse(url="/web_ui")
@app.get("/health", tags=["default"])
def health():
return {"status": "ok", "version": "1.0.0", "message": "SQL Debug Environment is healthy."}
def _seed_task5(con):
"""Seed customers + orders for the optimization task."""
con.execute("DROP TABLE IF EXISTS customers; DROP TABLE IF EXISTS orders;")
con.execute("CREATE TABLE customers (id INTEGER PRIMARY KEY, name TEXT, city TEXT)")
con.execute("CREATE TABLE orders (id INTEGER PRIMARY KEY, customer_id INTEGER, amount DECIMAL, order_date DATE)")
customers = [(i, f"Customer_{i}", "City") for i in range(1, 51)]
orders = [(i, (i % 50) + 1, round(10 + (i * 3.7) % 500, 2), "2024-01-15") for i in range(1, 201)]
con.executemany("INSERT INTO customers VALUES (?, ?, ?)", customers)
con.executemany("INSERT INTO orders VALUES (?, ?, ?, ?)", orders)
def _seed_task6(con):
"""Seed messy_dump for the migration task."""
con.execute("DROP TABLE IF EXISTS messy_dump; DROP TABLE IF EXISTS users; DROP TABLE IF EXISTS orders;")
con.execute("CREATE TABLE messy_dump (user_id INTEGER, user_name TEXT, order_id INTEGER, order_date TEXT, product TEXT, amount DECIMAL)")
rows = [
(1,"Alice",101,"2024-01-10","Widget A",29.99),
(1,"Alice",102,"2024-01-12","Widget B",49.99),
(2,"Bob",103,"2024-01-15","Gadget X",99.99),
(3,"Carol",104,"2024-01-20","Widget A",29.99),
(3,"Carol",105,"2024-01-22","Gadget Y",149.99),
(4,"Dave",106,"2024-02-01","Widget B",49.99),
(5,"Eve",107,"2024-02-05","Gadget X",99.99),
]
con.executemany("INSERT INTO messy_dump VALUES (?,?,?,?,?,?)", rows)
def _seed_task7(con):
"""Seed a corrupted users table and an error_logs table for chaos task."""
con.execute("DROP SEQUENCE IF EXISTS seq_users; DROP TABLE IF EXISTS users; DROP TABLE IF EXISTS error_logs;")
con.execute("CREATE SEQUENCE seq_users START 1")
con.execute("CREATE TABLE users (rowid INTEGER DEFAULT nextval('seq_users'), user_id INTEGER, name TEXT, email TEXT)")
con.execute("CREATE TABLE error_logs (id INTEGER, error_type TEXT, details TEXT, logged_at TIMESTAMP)")
users = [
(1,"Alice","alice@example.com"),
(2,"Bob","bob@example.com"),
(1,"Alice_dup",None), # duplicate user_id + NULL email
(3,"Carol","carol@example.com"),
(4,"Dave",None), # NULL email
(2,"Bob_dup","bob2@example.com"), # duplicate user_id
]
con.executemany("INSERT INTO users (user_id, name, email) VALUES (?,?,?)", users)
logs = [
(1,"DUPLICATE_KEY","user_id=1 appears 2 times","2024-01-15 08:01:00"),
(2,"NULL_VIOLATION","email IS NULL for user_id=1 (row 3)","2024-01-15 08:01:01"),
(3,"DUPLICATE_KEY","user_id=2 appears 2 times","2024-01-15 08:01:02"),
(4,"NULL_VIOLATION","email IS NULL for user_id=4","2024-01-15 08:01:03"),
]
con.executemany("INSERT INTO error_logs VALUES (?,?,?,?)", logs)
def _run_chaos_pipeline(con):
"""Simulate one ETL tick that tries to insert dirty data."""
import random, datetime
uid = random.randint(1, 3) # intentional duplicate range
con.execute(
"INSERT INTO users (user_id, name, email) VALUES (?, ?, ?)",
[uid, f"Auto_{uid}", None if random.random() < 0.5 else f"auto{uid}@x.com"]
)
@app.post("/reset", tags=["Environment"])
def reset_episode(req: ResetRequest = None):
if req is None:
req = ResetRequest()
task_id = req.task_id if req.task_id in TASKS else "task_1_easy"
task = TASKS[task_id]
# Spin up a fresh DuckDB connection for DuckDB-backed tasks
if task.get("duckdb_backed"):
con = duckdb.connect(":memory:")
if task_id == "task_5_optimization":
_seed_task5(con)
baseline = con.execute(
"SELECT c.name, SUM(o.amount) AS total_spent "
"FROM customers c, orders o WHERE c.id = o.customer_id "
"GROUP BY c.name ORDER BY total_spent DESC"
).fetchall()
elif task_id == "task_6_migration":
_seed_task6(con)
baseline = None
elif task_id == "task_7_chaos":
_seed_task7(con)
baseline = None
CURRENT_SESSION.update({
"task_id": task_id, "con": con, "step_count": 0,
"done": False, "baseline_rows": baseline,
"chaos_fixed": False, "reward_history": [],
})
else:
# Non-duckdb tasks also need session tracking
CURRENT_SESSION.update({
"task_id": task_id, "con": None, "step_count": 0,
"done": False, "baseline_rows": None,
"chaos_fixed": False, "reward_history": [],
})
return {
"status": "success",
"observation": {
"task_id": task_id,
"label": task["label"],
"description": task["description"],
"broken_sql": task["broken_sql"],
"schema_info": task["schema_info"],
"error_hint": task["error"],
},
}
@app.post("/step", tags=["Environment"])
def step_environment(action: StepAction):
task_id = CURRENT_SESSION.get("task_id")
task = TASKS.get(task_id, {})
con = CURRENT_SESSION.get("con")
step_count = CURRENT_SESSION.get("step_count", 0) + 1
CURRENT_SESSION["step_count"] = step_count
# ── Legacy tasks 1-4: simple pattern matching ───────────────────────────
if not task.get("duckdb_backed"):
sql = action.fixed_sql.strip().upper()
solved = "GROUP BY" in sql or "," in sql or "PARTITION" in sql or "12-01" in sql
reward = 0.99 if solved else -0.1
CURRENT_SESSION["reward_history"].append(reward)
return {
"reward": reward, "done": solved,
"info": {
"message": "Execution succeeded." if solved else "Execution failed. Review your fix.",
"verifier": "Pattern-match verifier",
},
"observation": {"current_sql": action.fixed_sql, "step_count": step_count},
}
# ── Task 5: Query Optimization ───────────────────────────────────────────
if task_id == "task_5_optimization":
agent_sql = action.fixed_sql.strip()
reward, done, msg = 0.0, False, ""
try:
t0 = time.perf_counter()
rows = con.execute(agent_sql).fetchall()
elapsed = time.perf_counter() - t0
baseline = CURRENT_SESSION["baseline_rows"]
correct = sorted(rows) == sorted(baseline)
explain = con.execute(f"EXPLAIN {agent_sql}").fetchall()
plan_str = " ".join(str(r) for r in explain).upper()
no_cross = "CROSS_PRODUCT" not in plan_str
if correct and no_cross:
reward, done = 0.99, True
msg = f"βœ… Output matches baseline ({len(rows)} rows). EXPLAIN shows no CROSS_PRODUCT. Reward: +1.0"
elif correct:
reward = 0.5
msg = f"⚠️ Output matches baseline but EXPLAIN still shows CROSS_PRODUCT. Reward: +0.5"
else:
reward = -0.1
msg = "❌ Output does NOT match baseline. Check your query logic."
except Exception as e:
reward, msg = -0.2, f"❌ DuckDB Error: {e}"
CURRENT_SESSION["reward_history"].append(reward)
return {"reward": reward, "done": done,
"info": {"message": msg, "verifier": "DuckDB EXPLAIN + row comparison"},
"observation": {"step_count": step_count}}
# ── Task 6: Schema Migration ─────────────────────────────────────────────
if task_id == "task_6_migration":
agent_sql = action.fixed_sql.strip()
reward, done, msg = 0.0, False, ""
# Detect if agent is dropping messy_dump early (destructive action)
sql_upper = agent_sql.upper()
tables_before = {r[0].lower() for r in con.execute("SHOW TABLES").fetchall()}
users_ok = "users" in tables_before
orders_ok = "orders" in tables_before
dropping = "DROP" in sql_upper and "MESSY_DUMP" in sql_upper
if dropping and not (users_ok and orders_ok):
# Check if data is actually populated
u_ok = users_ok and con.execute("SELECT COUNT(*) FROM users").fetchone()[0] > 0
o_ok = orders_ok and con.execute("SELECT COUNT(*) FROM orders").fetchone()[0] > 0
if not (u_ok and o_ok):
reward, done = -0.3, True
msg = "πŸ’€ DESTRUCTIVE ACTION: Dropped messy_dump before fully populating target tables! Episode ended. Penalty: -0.3"
CURRENT_SESSION["done"] = True
CURRENT_SESSION["reward_history"].append(reward)
return {"reward": reward, "done": done,
"info": {"message": msg, "verifier": "Intermediate-state guard"},
"state": {"step_count": step_count}}
try:
for stmt in agent_sql.split(";"):
stmt = stmt.strip()
if stmt:
con.execute(stmt)
tables_after = {r[0].lower() for r in con.execute("SHOW TABLES").fetchall()}
users_count = con.execute("SELECT COUNT(*) FROM users").fetchone()[0] if "users" in tables_after else 0
orders_count = con.execute("SELECT COUNT(*) FROM orders").fetchone()[0] if "orders" in tables_after else 0
dump_gone = "messy_dump" not in tables_after
if users_count >= 5 and orders_count >= 7 and dump_gone:
reward, done = 0.99, True
msg = f"βœ… Migration complete! users={users_count} rows, orders={orders_count} rows. messy_dump dropped. Reward: +1.0"
elif users_count > 0 or orders_count > 0:
reward = 0.3
msg = f"πŸ”„ Partial progress: users={users_count}, orders={orders_count}. messy_dump={'gone' if dump_gone else 'still exists'}."
else:
reward = 0.05
msg = "πŸ“‹ Tables created. Now migrate the data with INSERT INTO ... SELECT."
except Exception as e:
reward, msg = -0.2, f"❌ DuckDB Error: {e}"
CURRENT_SESSION["reward_history"].append(reward)
return {"reward": reward, "done": done,
"info": {"message": msg, "verifier": "Row-count + table existence check"},
"observation": {"step_count": step_count}}
# ── Task 7: Chaos Engineering ────────────────────────────────────────────
if task_id == "task_7_chaos":
agent_sql = action.fixed_sql.strip()
reward, done, msg = 0.0, False, ""
try:
for stmt in agent_sql.split(";"):
stmt = stmt.strip()
if stmt and not stmt.startswith("--"):
con.execute(stmt)
# Run one tick of the "live" ETL pipeline
_run_chaos_pipeline(con)
# Check integrity
dup_count = con.execute("SELECT COUNT(*) FROM (SELECT user_id FROM users GROUP BY user_id HAVING COUNT(*)>1)").fetchone()[0]
null_count = con.execute("SELECT COUNT(*) FROM users WHERE email IS NULL").fetchone()[0]
has_index = any("ux_users_id" in str(r) for r in con.execute("SELECT index_name FROM duckdb_indexes()").fetchall())
if dup_count == 0 and null_count == 0 and has_index:
reward, done = 0.99, True
CURRENT_SESSION["chaos_fixed"] = True
msg = "βœ… Pipeline is clean! No duplicates, no NULLs, UNIQUE index in place. Reward: +1.0"
elif dup_count == 0 and null_count == 0:
reward = 0.7
msg = f"πŸ”„ Data is clean this step but no UNIQUE index. Reward: +0.7 (add index to fully lock it in)"
elif CURRENT_SESSION.get("chaos_fixed"):
reward = 0.5
msg = f"⚠️ ETL re-introduced {dup_count} dups and {null_count} NULLs. Partial reward: +0.5"
else:
reward = -0.1
msg = f"❌ Still corrupt: {dup_count} duplicate user_ids, {null_count} NULL emails. Reward: -0.1"
except Exception as e:
reward, msg = -0.2, f"❌ DuckDB Error: {e}"
CURRENT_SESSION["reward_history"].append(reward)
return {"reward": reward, "done": done,
"info": {"message": msg, "verifier": "Integrity check (dups + NULLs + index)"},
"observation": {"step_count": step_count}}
@app.get("/state", tags=["Environment"])
def get_state():
task_id = CURRENT_SESSION.get("task_id", "task_1_easy")
task = TASKS.get(task_id, TASKS["task_1_easy"])
return {
"task_id": task_id,
"current_sql": task["broken_sql"],
"step_count": CURRENT_SESSION.get("step_count", 0),
"done": CURRENT_SESSION.get("done", False),
"schema": task["schema_info"],
}
@app.get("/tasks", tags=["System"])
def get_tasks():
return TASKS
@app.get("/web", tags=["System"])
def web_redirect():
return RedirectResponse(url="/web_ui")
# ── Custom API Docs ──────────────────────────────────────────────────────────
@app.get("/docs", include_in_schema=False)
async def custom_swagger():
html = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<title>SQL Debug Env – API Docs</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/swagger-ui-dist@5/swagger-ui.css">
<style>
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: 'Inter', sans-serif;
background: #ffffff;
color: #333333;
min-height: 100vh;
}
/* ── Top Nav (Light Mode) ── */
.nav {
position: sticky;
top: 0;
z-index: 1000;
display: flex;
align-items: center;
justify-content: space-between;
padding: 0 32px;
height: 64px;
background: rgba(255, 255, 255, 0.95);
backdrop-filter: blur(16px);
border-bottom: 1px solid #e5e5e5;
}
.nav-brand {
display: flex;
align-items: center;
gap: 12px;
font-size: 18px;
font-weight: 700;
color: #111827;
}
.nav-badge {
background: #f3f4f6;
border: 1px solid #d1d5db;
padding: 3px 10px;
border-radius: 20px;
font-size: 11px;
font-weight: 600;
letter-spacing: 0.5px;
color: #4b5563;
}
.nav-actions { display: flex; gap: 10px; }
.btn-back {
display: inline-flex;
align-items: center;
gap: 6px;
background: #ffffff;
border: 1px solid #d1d5db;
color: #374151;
padding: 8px 18px;
border-radius: 8px;
text-decoration: none;
font-size: 13px;
font-weight: 600;
transition: all 0.2s;
}
.btn-back:hover {
background: #f9fafb;
border-color: #9ca3af;
transform: translateY(-1px);
}
/* Small wrapper padding so it doesn't touch the edges */
.swagger-ui .wrapper { padding: 24px 40px; max-width: 1300px; margin: 0 auto; }
.swagger-ui .topbar { display: none !important; }
</style>
</head>
<body>
<nav class="nav">
<div class="nav-brand">
πŸ›°οΈ SQL Debug Environment
<span class="nav-badge">OAS 3.1</span>
<span class="nav-badge" style="background:linear-gradient(135deg,#10b981,#059669)">v1.0.0</span>
</div>
<div class="nav-actions">
<a href="/web_ui" class="btn-back">β¬… Back to Web UI</a>
</div>
</nav>
<div id="swagger-ui"></div>
<script src="https://cdn.jsdelivr.net/npm/swagger-ui-dist@5/swagger-ui-bundle.js"></script>
<script>
window.onload = () => {
SwaggerUIBundle({
url: "/openapi.json",
dom_id: '#swagger-ui',
deepLinking: true,
presets: [SwaggerUIBundle.presets.apis, SwaggerUIBundle.SwaggerUIStandalonePreset],
layout: "BaseLayout",
});
};
</script>
</body>
</html>"""
return HTMLResponse(html)
# ── Custom Web UI ────────────────────────────────────────────────────────────
TASKS_JSON = json.dumps(TASKS)
# -- Grader Endpoints (required by OpenEnv Phase 2 validator) -----------------
class GraderRequest(BaseModel):
task_id: str
fixed_sql: str = ""
explanation: str = ""
TASK_GRADER_MAP = {
"task_1_easy": lambda sql: 0.85 if ("," in sql.upper()) else 0.15,
"task_2_medium": lambda sql: 0.85 if ("GROUP BY" in sql.upper()) else 0.15,
"task_3_hard": lambda sql: 0.85 if ("PARTITION" in sql.upper()) else 0.15,
"task_4_expert": lambda sql: 0.85 if ("12-01" in sql or "2024-12" in sql) else 0.15,
"task_5_optimization": lambda sql: 0.85 if ("INNER JOIN" in sql.upper() or "JOIN" in sql.upper()) else 0.15,
"task_6_migration": lambda sql: 0.85 if ("INSERT INTO" in sql.upper() and "DROP" in sql.upper()) else 0.15,
"task_7_chaos": lambda sql: 0.85 if ("CREATE UNIQUE INDEX" in sql.upper() or "UNIQUE" in sql.upper()) else 0.15,
}
@app.post("/grader", tags=["Environment"])
def grade_submission(req: GraderRequest):
grader_fn = TASK_GRADER_MAP.get(req.task_id)
if grader_fn is None:
return {"task_id": req.task_id, "score": 0.15, "error": "Unknown task_id"}
raw_score = grader_fn(req.fixed_sql)
score = max(0.01, min(0.99, float(raw_score)))
return {"task_id": req.task_id, "score": score, "passed": score >= 0.5}
@app.get("/baseline", tags=["Environment"])
def get_baseline():
return {
"baseline_scores": {
"task_1_easy": 0.15,
"task_2_medium": 0.15,
"task_3_hard": 0.15,
"task_4_expert": 0.15,
"task_5_optimization": 0.15,
"task_6_migration": 0.15,
"task_7_chaos": 0.15,
}
}
@app.get("/web_ui", include_in_schema=False)
async def web_ui():
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<title>SQL Debug RL Environment</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<style>
*, *::before, *::after {{ box-sizing: border-box; margin: 0; padding: 0; }}
:root {{
--bg: #0f0e17;
--surface: #1a1827;
--surface2: #221f35;
--border: rgba(139,92,246,0.2);
--accent: #8b5cf6;
--accent2: #6366f1;
--green: #10b981;
--red: #ef4444;
--text: #e8e8f0;
--muted: #9090a8;
--mono: 'JetBrains Mono', monospace;
--sans: 'Inter', sans-serif;
}}
html, body {{ height: 100%; }}
body {{
font-family: var(--sans);
background: var(--bg);
color: var(--text);
min-height: 100vh;
overflow-x: hidden;
}}
/* ── Animated background ── */
body::before {{
content: '';
position: fixed;
top: -40%;
left: -20%;
width: 600px;
height: 600px;
background: radial-gradient(circle, rgba(139,92,246,0.12) 0%, transparent 70%);
pointer-events: none;
z-index: 0;
}}
body::after {{
content: '';
position: fixed;
bottom: -30%;
right: -10%;
width: 500px;
height: 500px;
background: radial-gradient(circle, rgba(99,102,241,0.1) 0%, transparent 70%);
pointer-events: none;
z-index: 0;
}}
/* ── Nav ── */
.nav {{
position: sticky;
top: 0;
z-index: 100;
display: flex;
align-items: center;
justify-content: space-between;
padding: 0 36px;
height: 64px;
background: rgba(15, 14, 23, 0.8);
backdrop-filter: blur(16px);
border-bottom: 1px solid var(--border);
}}
.nav-brand {{
display: flex;
align-items: center;
gap: 12px;
font-size: 17px;
font-weight: 700;
letter-spacing: -0.3px;
}}
.badge {{
padding: 3px 10px;
border-radius: 20px;
font-size: 11px;
font-weight: 600;
background: linear-gradient(135deg, var(--accent), var(--accent2));
}}
.btn {{
display: inline-flex;
align-items: center;
gap: 6px;
padding: 8px 18px;
border-radius: 8px;
font-size: 13px;
font-weight: 600;
cursor: pointer;
transition: all 0.2s;
border: none;
text-decoration: none;
}}
.btn-outline {{
background: rgba(139,92,246,0.1);
border: 1px solid rgba(139,92,246,0.4);
color: #a78bfa;
}}
.btn-outline:hover {{
background: rgba(139,92,246,0.25);
border-color: var(--accent);
color: #fff;
transform: translateY(-1px);
}}
.btn-primary {{
background: linear-gradient(135deg, var(--accent), var(--accent2));
color: #fff;
box-shadow: 0 4px 14px rgba(139,92,246,0.35);
}}
.btn-primary:hover {{
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(139,92,246,0.5);
}}
.btn-green {{
background: linear-gradient(135deg, #10b981, #059669);
color: #fff;
box-shadow: 0 4px 14px rgba(16,185,129,0.35);
width: 100%;
justify-content: center;
padding: 12px;
font-size: 14px;
}}
.btn-green:hover {{
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(16,185,129,0.5);
}}
/* ── Hero ── */
.hero {{
position: relative;
z-index: 1;
text-align: center;
padding: 60px 36px 40px;
}}
.hero-eyebrow {{
display: inline-flex;
align-items: center;
gap: 8px;
background: rgba(139,92,246,0.1);
border: 1px solid rgba(139,92,246,0.3);
padding: 6px 16px;
border-radius: 20px;
font-size: 12px;
font-weight: 600;
color: #a78bfa;
letter-spacing: 0.5px;
text-transform: uppercase;
margin-bottom: 20px;
}}
.hero h1 {{
font-size: clamp(28px, 5vw, 48px);
font-weight: 800;
letter-spacing: -1px;
background: linear-gradient(135deg, #fff 30%, #a78bfa 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
line-height: 1.15;
margin-bottom: 16px;
}}
.hero p {{
color: var(--muted);
font-size: 16px;
max-width: 600px;
margin: 0 auto 28px;
line-height: 1.6;
}}
/* ── Stat bar ── */
.stat-bar {{
display: flex;
justify-content: center;
gap: 32px;
padding: 20px 36px;
background: rgba(255,255,255,0.02);
border-top: 1px solid var(--border);
border-bottom: 1px solid var(--border);
position: relative;
z-index: 1;
}}
.stat {{ text-align: center; }}
.stat-val {{ font-size: 20px; font-weight: 700; color: var(--accent); }}
.stat-lbl {{ font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.5px; margin-top: 2px; }}
/* ── Main Layout ── */
.main {{
position: relative;
z-index: 1;
display: grid;
grid-template-columns: 320px 1fr;
gap: 24px;
padding: 32px 36px;
max-width: 1300px;
margin: 0 auto;
}}
/* ── Cards ── */
.card {{
background: var(--surface);
border: 1px solid var(--border);
border-radius: 16px;
overflow: hidden;
}}
.card-header {{
padding: 16px 20px;
border-bottom: 1px solid var(--border);
display: flex;
align-items: center;
gap: 10px;
font-weight: 700;
font-size: 13px;
text-transform: uppercase;
letter-spacing: 0.5px;
color: #a78bfa;
}}
.card-body {{ padding: 20px; }}
/* ── Sidebar ── */
.sidebar {{ display: flex; flex-direction: column; gap: 20px; }}
/* ── Select ── */
label.field-label {{
display: block;
font-size: 12px;
font-weight: 600;
color: var(--muted);
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: 8px;
}}
select, textarea {{
width: 100%;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: 8px;
color: var(--text);
font-family: var(--sans);
font-size: 14px;
padding: 10px 14px;
outline: none;
transition: border-color 0.2s;
}}
select:focus, textarea:focus {{
border-color: var(--accent);
box-shadow: 0 0 0 3px rgba(139,92,246,0.15);
}}
select {{ cursor: pointer; appearance: none; background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='%236b7280' viewBox='0 0 16 16'%3E%3Cpath d='M7.247 11.14L2.451 5.658C1.885 5.013 2.345 4 3.204 4h9.592a1 1 0 0 1 .753 1.659l-4.796 5.48a1 1 0 0 1-1.506 0z'/%3E%3C/svg%3E"); background-repeat: no-repeat; background-position: right 12px center; padding-right: 36px; }}
select option {{ background: #1a1827; }}
/* ── Schema / Task Info ── */
.info-block {{
background: var(--surface2);
border: 1px solid var(--border);
border-radius: 8px;
padding: 14px;
font-family: var(--mono);
font-size: 12.5px;
color: #c4b5fd;
white-space: pre-wrap;
line-height: 1.6;
max-height: 200px;
overflow-y: auto;
}}
.task-desc {{
font-family: var(--sans);
font-size: 13.5px;
color: var(--text);
line-height: 1.6;
margin-bottom: 10px;
}}
.error-chip {{
display: inline-block;
background: rgba(239,68,68,0.1);
border: 1px solid rgba(239,68,68,0.3);
color: #fca5a5;
padding: 4px 10px;
border-radius: 6px;
font-size: 12px;
font-family: var(--mono);
margin-top: 6px;
}}
.hint-chip {{
display: inline-block;
background: rgba(245,158,11,0.1);
border: 1px solid rgba(245,158,11,0.3);
color: #fcd34d;
padding: 4px 10px;
border-radius: 6px;
font-size: 12px;
margin-top: 6px;
}}
/* ── Right panel ── */
.right-panel {{ display: flex; flex-direction: column; gap: 20px; }}
/* ── Code editors ── */
.code-label {{
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 8px;
}}
.code-label span {{
font-size: 12px;
font-weight: 600;
color: var(--muted);
text-transform: uppercase;
letter-spacing: 0.5px;
}}
.lang-tag {{
font-size: 11px;
padding: 2px 8px;
background: rgba(139,92,246,0.12);
border: 1px solid rgba(139,92,246,0.25);
border-radius: 4px;
color: #a78bfa;
font-family: var(--mono);
}}
textarea.code {{
font-family: var(--mono);
font-size: 13.5px;
resize: vertical;
line-height: 1.6;
tab-size: 2;
min-height: 130px;
color: #e2d9f3;
}}
textarea.code.read-only {{
background: rgba(15,14,23,0.6);
border-color: rgba(239,68,68,0.25);
color: #fca5a5;
cursor: default;
}}
textarea.code.agent {{
background: rgba(16,185,129,0.04);
border-color: rgba(16,185,129,0.25);
color: #a7f3d0;
}}
textarea.code.agent:focus {{
border-color: var(--green);
box-shadow: 0 0 0 3px rgba(16,185,129,0.15);
}}
/* ── Verifier output ── */
.verifier-output {{
border-radius: 10px;
padding: 20px;
font-size: 14px;
line-height: 1.5;
border: 1px dashed rgba(255,255,255,0.1);
background: rgba(255,255,255,0.02);
color: var(--muted);
text-align: center;
transition: all 0.4s ease;
}}
.verifier-output.success {{
background: rgba(16,185,129,0.07);
border: 1px solid rgba(16,185,129,0.35);
color: #6ee7b7;
text-align: left;
}}
.verifier-output.error {{
background: rgba(239,68,68,0.07);
border: 1px solid rgba(239,68,68,0.35);
color: #fca5a5;
text-align: left;
}}
.verifier-output h3 {{ font-size: 16px; margin-bottom: 8px; }}
.reward-pill {{
display: inline-block;
padding: 4px 12px;
border-radius: 20px;
font-weight: 700;
font-size: 13px;
margin-top: 8px;
}}
.reward-positive {{ background: rgba(16,185,129,0.2); color: #34d399; }}
.reward-negative {{ background: rgba(239,68,68,0.2); color: #f87171; }}
/* ── Divider ── */
.divider {{
height: 1px;
background: var(--border);
margin: 4px 0;
}}
/* ── Scrollbar ── */
::-webkit-scrollbar {{ width: 6px; height: 6px; }}
::-webkit-scrollbar-track {{ background: transparent; }}
::-webkit-scrollbar-thumb {{ background: rgba(139,92,246,0.3); border-radius: 3px; }}
@media (max-width: 900px) {{
.main {{ grid-template-columns: 1fr; }}
.stat-bar {{ flex-wrap: wrap; gap: 16px; }}
}}
</style>
</head>
<body>
<!-- Nav -->
<nav class="nav">
<div class="nav-brand">
πŸ›°οΈ SQL Debug Env
<span class="badge">v1.0.0</span>
</div>
<div style="display:flex;gap:10px">
<a href="/docs" target="_blank" class="btn btn-outline">πŸ“– API Docs</a>
</div>
</nav>
<!-- Hero -->
<section class="hero">
<div class="hero-eyebrow">πŸ€– Reinforcement Learning Verifiable Environment</div>
<h1>Advanced SQL Debugging<br>RL Environment</h1>
<p>Agents learn to diagnose and repair broken SQL pipelines. A sandboxed DuckDB executor evaluates every submission with a dense reward signal.</p>
<a href="/docs" target="_blank" class="btn btn-outline">πŸ“– View Full API Documentation β†’</a>
</section>
<!-- Stat Bar -->
<div class="stat-bar">
<div class="stat"><div class="stat-val">7</div><div class="stat-lbl">Challenge Tasks</div></div>
<div class="stat"><div class="stat-val">DuckDB</div><div class="stat-lbl">Sandbox Engine</div></div>
<div class="stat"><div class="stat-val">Live</div><div class="stat-lbl">Verifier</div></div>
<div class="stat"><div class="stat-val">3</div><div class="stat-lbl">Advanced RLVE</div></div>
</div>
<!-- Main -->
<div class="main">
<!-- Sidebar -->
<aside class="sidebar">
<!-- Controls -->
<div class="card">
<div class="card-header">βš™οΈ Environment Controls</div>
<div class="card-body" style="display:flex;flex-direction:column;gap:14px">
<div>
<label class="field-label">🎯 Challenge Level</label>
<select id="task-select">
<option value="task_1_easy">Task 1 β€” Easy: Syntax Fix</option>
<option value="task_2_medium">Task 2 β€” Medium: GROUP BY</option>
<option value="task_3_hard">Task 3 β€” Hard: Window Function</option>
<option value="task_4_expert">Task 4 β€” Expert: CTE + Date</option>
<optgroup label="─── Advanced RLVE Tasks ───">
<option value="task_5_optimization">Task 5 β€” Optimization (EXPLAIN-verified)</option>
<option value="task_6_migration">Task 6 β€” Schema Migration (3NF)</option>
<option value="task_7_chaos">Task 7 β€” Chaos Engineering (Live DB)</option>
</optgroup>
</select>
</div>
<button class="btn btn-primary" onclick="initEnv()">πŸ”„ Initialize Environment</button>
</div>
</div>
<!-- Task Details -->
<div class="card">
<div class="card-header">πŸ“‹ Task Details</div>
<div class="card-body" style="display:flex;flex-direction:column;gap:10px">
<p class="task-desc" id="task-desc">Select a task and click Initialize.</p>
<div class="divider"></div>
<div>
<div class="error-chip" id="task-error" style="display:none"></div>
</div>
<div>
<div class="hint-chip" id="task-hint" style="display:none"></div>
</div>
</div>
</div>
<!-- Environment Rewards -->
<div class="card" id="reward-card" style="display:none; margin-bottom: 20px;">
<div class="card-header">πŸ’Έ Dense Reward Signal</div>
<div class="card-body" style="padding: 16px 20px;" id="reward-card-body">
</div>
</div>
<!-- Schema -->
<div class="card">
<div class="card-header">πŸ—„οΈ Database Schema</div>
<div class="card-body">
<div class="info-block" id="schema-dump">No schema loaded yet.</div>
</div>
</div>
</aside>
<!-- Right Panel -->
<div class="right-panel">
<!-- Broken Code -->
<div class="card">
<div class="card-header">🐞 Broken Pipeline Code</div>
<div class="card-body">
<div class="code-label">
<span>Initial SQL (Failing)</span>
<span class="lang-tag">SQL</span>
</div>
<textarea id="broken-code" class="code read-only" rows="5" readonly placeholder="Initialize environment to load broken SQL..."></textarea>
</div>
</div>
<!-- Agent Submission -->
<div class="card">
<div class="card-header">πŸ€– Agent Submission Sandbox</div>
<div class="card-body" style="display:flex;flex-direction:column;gap:14px">
<div>
<div class="code-label">
<span>Agent Fix Attempt</span>
<span class="lang-tag">SQL β€” editable</span>
</div>
<textarea id="agent-input" class="code agent" rows="6" placeholder="Write or paste your fixed SQL here..."></textarea>
</div>
<button class="btn btn-green" onclick="executeStep()">▢️ Execute Fix in DuckDB Sandbox</button>
</div>
</div>
<!-- Verifier Output -->
<div class="card">
<div class="card-header">πŸ“Š Verifier Output</div>
<div class="card-body">
<div class="verifier-output" id="verifier-out">
Agent standing by… Load a task and submit a fix.
</div>
</div>
</div>
</div>
</div>
<script>
const TASKS = {TASKS_JSON};
let currentTaskId = null;
const ADVANCED_REWARDS = {{
task_5_optimization: [
['Output matches baseline', '+0.50'],['No CROSS_PRODUCT in EXPLAIN', '+0.50'],
['Wrong output', '-0.10'],['DuckDB error', '-0.20'],
],
task_6_migration: [
['Tables created', '+0.05'],['Data partially migrated', '+0.30'],
['Full migration + DROP', '+1.00'],['Destructive early DROP', '-0.30'],['DuckDB error', '-0.20'],
],
task_7_chaos: [
['Zero dups + zero NULLs + UNIQUE index', '+1.00'],['Zero dups + zero NULLs (no index)', '+0.70'],
['ETL still dirty', '-0.10'],['DuckDB error', '-0.20'],
],
}};
function initEnv() {{
currentTaskId = document.getElementById('task-select').value;
const task = TASKS[currentTaskId];
const isAdvanced = !!task.duckdb_backed;
document.getElementById('broken-code').value = task.broken_sql;
document.getElementById('agent-input').value = task.broken_sql;
document.getElementById('task-desc').textContent = task.description;
const errEl = document.getElementById('task-error');
errEl.textContent = '⚠️ ' + task.error;
errEl.style.display = 'inline-block';
const hintEl = document.getElementById('task-hint');
hintEl.textContent = 'πŸ’‘ Hint: ' + task.hint;
hintEl.style.display = 'inline-block';
// Reward card
const rewardBody = document.getElementById('reward-card-body');
let rewardsHtml = '';
if (isAdvanced) {{
const entries = ADVANCED_REWARDS[currentTaskId] || [];
rewardsHtml = entries.map(([label, val]) => {{
const isPos = val.startsWith('+');
return `<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:#e8e8f0">${{label}}</span>
<span style="font-family:var(--mono);color:${{isPos?'#34d399':'#f87171'}};font-weight:bold;font-size:13px;">${{val}}</span>
</div>`;
}}).join('');
}} else if (currentTaskId === 'task_3_hard') {{
rewardsHtml = `
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:#e8e8f0">Correct Step Identified</span>
<span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.15</span>
</div>
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:#e8e8f0">Step 2 Fixed</span>
<span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.25</span>
</div>
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:#e8e8f0">Step 4 Fixed</span>
<span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.20</span>
</div>
<div style="display:flex;justify-content:space-between;align-items:center;">
<span style="font-size:13px;color:#e8e8f0">Final Totals Exact Match</span>
<span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.40</span>
</div>`;
}} else {{
rewardsHtml = `
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:#e8e8f0">Parses successfully</span>
<span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.10</span>
</div>
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:#e8e8f0">Executes without error</span>
<span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.20</span>
</div>
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:#e8e8f0">Column Accuracy</span>
<span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.10</span>
</div>
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:#e8e8f0">Data Accuracy</span>
<span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.30</span>
</div>
<div style="display:flex;justify-content:space-between;align-items:center;">
<span style="font-size:13px;color:#e8e8f0">Exact Match Bonus</span>
<span style="font-family:var(--mono);color:#34d399;font-weight:bold;font-size:13px;">+0.30</span>
</div>`;
}}
rewardsHtml += `
<div style="font-size:11px;font-weight:bold;color:var(--muted);text-transform:uppercase;margin:10px 0 6px;border-top:1px solid rgba(255,255,255,0.05);padding-top:10px;">Penalties</div>
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:var(--muted)">Duplicate Submission</span>
<span style="font-family:var(--mono);color:#f87171;font-weight:bold;font-size:13px;">-0.10</span>
</div>
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px;">
<span style="font-size:13px;color:var(--muted)">Destructive Action</span>
<span style="font-family:var(--mono);color:#f87171;font-weight:bold;font-size:13px;">-0.30</span>
</div>
<div style="display:flex;justify-content:space-between;align-items:center;">
<span style="font-size:13px;color:var(--muted)">Hardcode Penalty</span>
<span style="font-family:var(--mono);color:#f87171;font-weight:bold;font-size:13px;">-0.50</span>
</div>`;
rewardBody.innerHTML = rewardsHtml;
// Schema
let schemaStr = '';
for (const [table, cols] of Object.entries(task.schema_info)) {{
schemaStr += `TABLE ${{table}} {{\\n`;
cols.forEach(c => schemaStr += ` ${{c}}\\n`);
schemaStr += `}}\\n\\n`;
}}
document.getElementById('schema-dump').textContent = schemaStr.trim();
document.getElementById('reward-card').style.display = 'block';
// Call /reset on the server to seed the DuckDB environment
fetch('/reset', {{
method: 'POST',
headers: {{'Content-Type': 'application/json'}},
body: JSON.stringify({{task_id: currentTaskId}})
}}).then(r => r.json()).then(data => {{
const out = document.getElementById('verifier-out');
out.className = 'verifier-output';
const badge = data.observation.label.includes('Advanced') || data.observation.label.includes('5')
|| data.observation.label.includes('6') || data.observation.label.includes('7')
? ' <span style="background:rgba(139,92,246,0.25);border:1px solid rgba(139,92,246,0.6);color:#c4b5fd;padding:2px 8px;border-radius:12px;font-size:11px;font-weight:700;">πŸ”¬ DuckDB-Backed</span>' : '';
out.innerHTML = `πŸ”„ Environment initialized.${{badge}} Awaiting agent execution…`;
}}).catch(() => {{
document.getElementById('verifier-out').innerHTML = 'πŸ”„ Environment initialized. Awaiting agent execution…';
}});
}}
async function executeStep() {{
const agentSQL = document.getElementById('agent-input').value.trim();
const out = document.getElementById('verifier-out');
if (!agentSQL) {{
out.className = 'verifier-output error';
out.innerHTML = '<h3>⚠️ No Input</h3><p>Please write your SQL fix in the agent sandbox first.</p>';
return;
}}
if (!currentTaskId) {{
out.className = 'verifier-output error';
out.innerHTML = '<h3>⚠️ No Task Loaded</h3><p>Click Initialize Environment first.</p>';
return;
}}
out.className = 'verifier-output';
out.innerHTML = '⏳ Executing in DuckDB sandbox…';
const task = TASKS[currentTaskId];
const isAdvanced = !!task.duckdb_backed;
if (isAdvanced) {{
// Real API call for DuckDB-backed tasks
try {{
const res = await fetch('/step', {{
method: 'POST',
headers: {{'Content-Type': 'application/json'}},
body: JSON.stringify({{fixed_sql: agentSQL, explanation: ''}})
}});
const data = await res.json();
const reward = (data.reward != null) ? data.reward : 0.0;
const done = data.done;
const msg = data.info?.message || '';
const verifier = data.info?.verifier || 'DuckDB';
const isPos = reward >= 0;
out.className = `verifier-output ${{done && reward > 0 ? 'success' : reward < 0 ? 'error' : 'success'}}`;
out.innerHTML = `
<h3>${{done && reward >= 1.0 ? 'βœ…' : reward < 0 ? '❌' : '⚠️'}} Verifier Result</h3>
<p style="margin-top:6px">${{msg}}</p>
<p style="margin-top:8px;font-size:11px;color:var(--muted)">πŸ”¬ ${{verifier}} Β· Step ${{data.observation?.step_count ?? '?'}}</p>
<span class="reward-pill ${{isPos ? 'reward-positive' : 'reward-negative'}}">Reward: ${{reward >= 0 ? '+' : ''}}${{reward.toFixed(2)}}</span>
`;
}} catch(e) {{
out.className = 'verifier-output error';
out.innerHTML = `<h3>❌ Network Error</h3><p>${{e.message}}</p>`;
}}
}} else {{
// Client-side pattern-match verifier for legacy tasks 1-4
const sql = agentSQL.toUpperCase();
const taskSolved = (
(currentTaskId === 'task_1_easy' && sql.includes(',') && sql.includes('NAME') && sql.includes('AGE')) ||
(currentTaskId === 'task_2_medium' && sql.includes('GROUP BY')) ||
(currentTaskId === 'task_3_hard' && sql.includes('PARTITION BY')) ||
(currentTaskId === 'task_4_expert' && !sql.includes('13-01') && sql.includes('MONTHLY_SALES'))
);
if (taskSolved) {{
out.className = 'verifier-output success';
out.innerHTML = `
<h3>βœ… Verification Passed!</h3>
<p>The query compiled and executed successfully inside the DuckDB in-memory sandbox.</p>
<p>The pipeline produced the expected output rows without errors.</p>
<span class="reward-pill reward-positive">Reward: +1.0</span>
`;
}} else {{
out.className = 'verifier-output error';
out.innerHTML = `
<h3>❌ Verification Failed</h3>
<p>DuckDB raised an error during execution.</p>
<p style="font-family:var(--mono);font-size:12px;margin-top:6px;opacity:0.8">${{task.error}}</p>
<span class="reward-pill reward-negative">Reward: -0.1</span>
`;
}}
}}
}}
</script>
</body>
</html>""".replace("{TASKS_JSON}", TASKS_JSON)
return HTMLResponse(html)
def main():
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
if __name__ == "__main__":
main()