"""
QueryForge — Gradio Demo
========================
Interactive SQL debugger UI that runs the environment in-process (no server needed).
Run locally:
python demo.py
# opens http://localhost:7860
On HF Spaces:
Set ANTHROPIC_API_KEY secret in Space settings for AI judging (optional).
The demo auto-detects it.
"""
import os
import sys
import gradio as gr
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from models import SQLAction
from server.queryforge_environment import QueryforgeEnvironment
from tasks import REGISTRY
# ── Helpers ───────────────────────────────────────────────────────────────────
AI_JUDGE_ACTIVE = bool(os.environ.get("ANTHROPIC_API_KEY"))
TASK_CHOICES = [
(f"[{t.level.upper()}] {t.title}", t.id)
for t in REGISTRY.list_all()
]
def _score_html(score: float, done: bool) -> str:
filled = int(score * 20)
bar = "█" * filled + "░" * (20 - filled)
color = "#22c55e" if score >= 0.9 else ("#f59e0b" if score >= 0.5 else "#ef4444")
suffix = " ✓ Solved!" if done and score >= 0.9 else (" ⏹ Ended" if done else "")
return (
f'
'
f'[{bar}] '
f'{score:.3f}{suffix}
'
)
# ── Callbacks ─────────────────────────────────────────────────────────────────
def load_task(task_id: str):
"""Reset environment and populate UI with the chosen task."""
env = QueryforgeEnvironment()
obs = env.reset(task_id=task_id)
task = REGISTRY.get(task_id)
state = {"env": env, "history": [], "done": False}
return (
obs.task_description, # task description box
task.broken_query, # pre-fill SQL editor with broken query
"Submit a query to see your score.
",
"", # clear feedback
[], # clear history table
state,
gr.update(interactive=True), # enable submit button
)
def submit_query(sql: str, state: dict):
"""Grade the submitted SQL and update all output components."""
if state is None or "env" not in state:
return (
"Load a task first.
",
"", [], state,
)
if state.get("done"):
return (
"Episode already ended. Load a new task.
",
"", state["history"], state,
)
env = state["env"]
obs = env.step(SQLAction(sql=sql.strip()))
score = obs.reward or 0.0
# ── Score HTML ────────────────────────────────────────────────────────────
score_html = _score_html(score, obs.done)
# ── Feedback (split into labelled sections) ───────────────────────────────
sections = [p.strip() for p in obs.feedback.split(" ") if p.strip()]
feedback_md = "\n\n".join(f"**{s.split(']')[0].lstrip('[').strip()}**{s.split(']',1)[1] if ']' in s else s}"
for s in sections)
if obs.hint and not obs.done:
feedback_md += f"\n\n> 💡 **Hint:** {obs.hint}"
# ── History table ─────────────────────────────────────────────────────────
status = "✓ Solved" if (obs.done and score >= 0.9) else ("⏹ Ended" if obs.done else "↻ Retry")
state["history"].append([obs.attempt, f"{score:.3f}", obs.rows_returned, status])
state["done"] = obs.done
return score_html, feedback_md, state["history"], state
# ── UI layout ─────────────────────────────────────────────────────────────────
HEADER = """
# 🔧 QueryForge — SQL Debugger & Optimiser
Fix broken or slow SQL queries and get instant graded feedback.
{ai_status}
""".format(
ai_status=(
"🟢 **AI Judge active** — scores up to 1.0 (Anthropic)"
if AI_JUDGE_ACTIVE else
"🟡 **Deterministic mode** — max score 0.80 (set `ANTHROPIC_API_KEY` to enable AI judge)"
)
)
with gr.Blocks(title="QueryForge") as demo:
state = gr.State(None)
gr.Markdown(HEADER)
# ── Task selection row ────────────────────────────────────────────────────
with gr.Row():
task_dd = gr.Dropdown(
choices=TASK_CHOICES,
value=TASK_CHOICES[0][1],
label="Select Task",
scale=4,
)
load_btn = gr.Button("Load Task ▶", variant="primary", scale=1)
# ── Main two-column layout ────────────────────────────────────────────────
with gr.Row():
with gr.Column(scale=1):
task_desc = gr.Textbox(
label="📋 Task Description",
lines=18,
interactive=False,
placeholder="Load a task to see the description and broken query…",
)
with gr.Column(scale=1):
sql_input = gr.Code(
label="✏️ Your SQL Query",
language="sql",
lines=12,
)
submit_btn = gr.Button(
"Submit Query ⚡",
variant="primary",
interactive=False,
)
# ── Score + feedback ──────────────────────────────────────────────────────
score_html = gr.HTML(
value="Submit a query to see your score.
",
label="Score",
)
feedback_display = gr.Markdown(label="Feedback")
# ── Attempt history ───────────────────────────────────────────────────────
history_table = gr.Dataframe(
headers=["Step", "Score", "Rows Returned", "Status"],
datatype=["number", "str", "number", "str"],
label="📊 Attempt History",
interactive=False,
wrap=True,
)
# ── Wire up events ────────────────────────────────────────────────────────
load_btn.click(
load_task,
inputs=[task_dd],
outputs=[task_desc, sql_input, score_html, feedback_display, history_table, state, submit_btn],
)
submit_btn.click(
submit_query,
inputs=[sql_input, state],
outputs=[score_html, feedback_display, history_table, state],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)