/** * DemoMode — scripted autoplay showcase for SQL Agent OpenEnv. * * Right panel: live reward chart + GitHub-style prompt diff. * Chat: single-difficulty rounds separated by GEPA evolution. * No looping — scrollable at the end. */ import { useState, useRef, useCallback, useEffect } from 'react' import { motion, AnimatePresence } from 'framer-motion' import { LineChart, Line, XAxis, YAxis, CartesianGrid, Tooltip, ResponsiveContainer, ReferenceLine, } from 'recharts' import { Play, X, Zap, CheckCircle2, XCircle, ChevronDown, ChevronUp, Sparkles, Loader2, GitCommitHorizontal, } from 'lucide-react' // ─── Prompts (3 generations) ───────────────────────────────────────────────── const PROMPTS = [ `You are a SQL expert. Given a question and a SQLite schema, write correct SQL. Rules: - Output ONLY the SQL query - Use SQLite syntax - No markdown, no code fences`, `You are a SQL expert. Given a question and a SQLite schema, write correct SQL. Rules: - Output ONLY the SQL query - Use SQLite syntax - No markdown, no code fences - Always qualify column names with table aliases in JOINs - Use t.column_name to avoid ambiguous column errors - Verify every column name against the schema before use`, `You are a SQL expert. Given a question and a SQLite schema, write correct SQL. Rules: - Output ONLY the SQL query - Use SQLite syntax - No markdown, no code fences - Always qualify column names with table aliases in JOINs - Use t.column_name to avoid ambiguous column errors - Verify every column name against the schema before use - For aggregations: include all non-aggregated columns in GROUP BY - For revenue: use orders.total_price (not price or amount) - For top-N: use ORDER BY … DESC LIMIT N`, ] const SCORES = [0.42, 0.74, 0.91] // ─── Scripted query data ───────────────────────────────────────────────────── interface Attempt { sql: string error?: string errorClass?: string rlAction?: string reward: number rows?: Record[] } interface QueryDef { id: string question: string attempts: Attempt[] } const QUERIES: Record = { // Round 1 — simple queries, some fail r1q1: { id: 'r1q1', question: 'Show all products', attempts: [ { sql: 'SELECT * FROM product', error: 'no such table: product', errorClass: 'NO_SUCH_TABLE', rlAction: 'FIX_TABLE', reward: -0.15 }, { sql: 'SELECT * FROM products LIMIT 10', reward: 0.90, rows: [{ id: 1, name: 'Wireless Headphones', category: 'Electronics', price: 79.99 }, { id: 2, name: 'Running Shoes', category: 'Footwear', price: 59.99 }, { id: 3, name: 'Coffee Maker', category: 'Kitchen', price: 49.99 }] }, ], }, r1q2: { id: 'r1q2', question: 'List all users from the USA', attempts: [ { sql: "SELECT * FROM users WHERE country = 'USA'", reward: 1.00, rows: [{ id: 1, name: 'Alice Johnson', email: 'alice@example.com', country: 'USA' }, { id: 2, name: 'Bob Smith', email: 'bob@example.com', country: 'USA' }] }, ], }, // Round 2 — join queries, ambiguous columns fixed by GEPA r2q1: { id: 'r2q1', question: 'Top 5 sellers by total revenue', attempts: [ { sql: `SELECT seller_id, SUM(total_price) as revenue\nFROM orders\nGROUP BY seller_id\nORDER BY revenue DESC\nLIMIT 5`, error: 'ambiguous column name: seller_id', errorClass: 'AMBIGUOUS_COLUMN', rlAction: 'FIX_COLUMN', reward: -0.20 }, { sql: `SELECT s.name, SUM(o.total_price) as revenue\nFROM orders o\nJOIN sellers s ON o.user_id = s.id\nGROUP BY s.id, s.name\nORDER BY revenue DESC\nLIMIT 5`, reward: 0.80, rows: [{ seller: 'TechStore Pro', revenue: 12840 }, { seller: 'StyleHub', revenue: 9320 }, { seller: 'GadgetWorld', revenue: 8750 }] }, ], }, r2q2: { id: 'r2q2', question: 'Average order value by country', attempts: [ { sql: `SELECT country, AVG(total_price) as avg_order\nFROM orders\nJOIN users ON user_id = users.id\nGROUP BY country`, error: 'ambiguous column name: country', errorClass: 'AMBIGUOUS_COLUMN', rlAction: 'FIX_COLUMN', reward: -0.15 }, { sql: `SELECT u.country, ROUND(AVG(o.total_price), 2) as avg_order\nFROM orders o\nJOIN users u ON o.user_id = u.id\nGROUP BY u.country\nORDER BY avg_order DESC`, reward: 0.85, rows: [{ country: 'USA', avg_order: 142.50 }, { country: 'UK', avg_order: 138.20 }, { country: 'Germany', avg_order: 121.80 }] }, ], }, // Round 3 — after GEPA 2, same joins succeed first try r3q1: { id: 'r3q1', question: 'Top 5 sellers by total revenue', attempts: [ { sql: `SELECT s.name, SUM(o.total_price) as revenue\nFROM orders o\nJOIN sellers s ON o.user_id = s.id\nGROUP BY s.id, s.name\nORDER BY revenue DESC\nLIMIT 5`, reward: 1.00, rows: [{ seller: 'TechStore Pro', revenue: 12840 }, { seller: 'StyleHub', revenue: 9320 }, { seller: 'GadgetWorld', revenue: 8750 }] }, ], }, r3q2: { id: 'r3q2', question: 'Monthly revenue for the last 6 months', attempts: [ { sql: `SELECT strftime('%Y-%m', created_at) as month,\n ROUND(SUM(total_price), 2) as revenue\nFROM orders\nGROUP BY month\nORDER BY month DESC\nLIMIT 6`, reward: 1.00, rows: [{ month: '2024-11', revenue: 24180 }, { month: '2024-10', revenue: 21340 }, { month: '2024-09', revenue: 19800 }] }, ], }, } const ROUND_1 = ['r1q1', 'r1q2'] const ROUND_2 = ['r2q1', 'r2q2'] const ROUND_3 = ['r3q1', 'r3q2'] // ─── GitHub-style diff ─────────────────────────────────────────────────────── interface DiffLine { type: 'add' | 'remove' | 'same'; text: string } function diffPrompts(fromIdx: number, toIdx: number): DiffLine[] { const oldLines = PROMPTS[fromIdx].split('\n') const newLines = PROMPTS[toIdx].split('\n') const result: DiffLine[] = [] // Simple patience diff: lines in both = same, only in old = remove, only in new = add const oldSet = new Set(oldLines) const newSet = new Set(newLines) // Walk new lines, carrying old-only lines before matching let oi = 0 for (let ni = 0; ni < newLines.length; ni++) { const line = newLines[ni] if (oldSet.has(line)) { // Flush any old-only lines before this match while (oi < oldLines.length && oldLines[oi] !== line) { if (!newSet.has(oldLines[oi])) result.push({ type: 'remove', text: oldLines[oi] }) oi++ } result.push({ type: 'same', text: line }) oi++ } else { result.push({ type: 'add', text: line }) } } // Remaining old-only while (oi < oldLines.length) { if (!newSet.has(oldLines[oi])) result.push({ type: 'remove', text: oldLines[oi] }) oi++ } return result } function GithubDiff({ fromIdx, toIdx }: { fromIdx: number; toIdx: number }) { const lines = diffPrompts(fromIdx, toIdx) const added = lines.filter((l) => l.type === 'add').length const removed = lines.filter((l) => l.type === 'remove').length return (
{/* Header */}
system_prompt.txt +{added} −{removed}
{/* Diff lines */}
{lines.map((line, i) => { const bg = line.type === 'add' ? 'bg-green-500/10' : line.type === 'remove' ? 'bg-red-500/10' : '' const prefix = line.type === 'add' ? '+' : line.type === 'remove' ? '-' : ' ' const color = line.type === 'add' ? 'text-green-300' : line.type === 'remove' ? 'text-red-300' : 'text-gray-500' return (
{prefix} {line.text || }
) })}
) } // ─── Reward chart ──────────────────────────────────────────────────────────── interface RewardPoint { step: number; reward: number } function RewardChart({ points }: { points: RewardPoint[] }) { if (points.length === 0) { return (
Rewards appear as agent runs
) } return ( [v.toFixed(2), 'reward']} labelFormatter={(l) => `Step ${l}`} /> { const { cx, cy, payload } = props const color = (payload as RewardPoint).reward >= 0 ? '#22c55e' : '#ef4444' return }} activeDot={{ r: 4, fill: '#f97316' }} isAnimationActive={false} /> ) } // ─── Right panel ───────────────────────────────────────────────────────────── interface RightPanelProps { gen: number score: number rewardPoints: RewardPoint[] latestDiff: { from: number; to: number } | null } function RightPanel({ gen, score, rewardPoints, latestDiff }: RightPanelProps) { return (
{/* Gen badge + score */}
GEPA
Gen {gen}
{/* Score bar */}
{(score * 100).toFixed(0)}% benchmark score
{/* Generation history */}
{SCORES.slice(0, gen + 1).map((s, i) => (
Gen {i}
{(s * 100).toFixed(0)}%
))}
{/* Reward chart */}
RL Reward per Step
{/* GitHub diff — appears after each GEPA cycle */} {latestDiff && (
Prompt diff — Gen {latestDiff.from} → {latestDiff.to}
)}
) } // ─── SQL highlighter ───────────────────────────────────────────────────────── function HighlightSQL({ sql }: { sql: string }) { const re = /\b(SELECT|FROM|WHERE|JOIN|LEFT|RIGHT|ON|GROUP BY|ORDER BY|HAVING|LIMIT|AS|AND|OR|NOT|IN|IS|NULL|SUM|AVG|COUNT|ROUND|DISTINCT|DESC|ASC|strftime|JULIANDAY)\b/gi const parts: React.ReactNode[] = [] let last = 0; let match: RegExpExecArray | null const r = new RegExp(re.source, 'gi') while ((match = r.exec(sql)) !== null) { if (match.index > last) parts.push({sql.slice(last, match.index)}) parts.push({match[0]}) last = match.index + match[0].length } if (last < sql.length) parts.push({sql.slice(last)}) return <>{parts} } // ─── Bubble types ───────────────────────────────────────────────────────────── interface BaseBubble { id: string } interface UserBubble extends BaseBubble { type: 'user'; text: string } interface ThinkingBubble extends BaseBubble { type: 'thinking'; label: string } interface SqlStreamBubble extends BaseBubble { type: 'sql_stream'; sql: string; attempt: number } interface SqlErrBubble extends BaseBubble { type: 'sql_err'; sql: string; error: string; errorClass: string; rlAction: string; reward: number; attempt: number } interface SqlOkBubble extends BaseBubble { type: 'sql_ok'; sql: string; rows: Record[]; reward: number; attempt: number; firstTry: boolean } interface GepaBubble extends BaseBubble { type: 'gepa'; fromGen: number; toGen: number; scoreFrom: number; scoreTo: number } interface GroupBubble extends BaseBubble { type: 'group'; question: string; success: boolean; attempts: number; children: BubbleData[] } type BubbleData = UserBubble | ThinkingBubble | SqlStreamBubble | SqlErrBubble | SqlOkBubble | GepaBubble | GroupBubble let _id = 0 const uid = () => `b${++_id}` const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)) // ─── Bubble renderer ───────────────────────────────────────────────────────── function Bubble({ b }: { b: BubbleData }) { const [open, setOpen] = useState(false) if (b.type === 'user') return (

{b.text}

) if (b.type === 'thinking') return (
{b.label}
) if (b.type === 'sql_stream') return (
SQL attempt {b.attempt}
        
        
      
) if (b.type === 'sql_err') return (
SQL attempt {b.attempt}
          
        
{b.error} {b.errorClass} {b.rlAction} {b.reward.toFixed(2)}
) if (b.type === 'sql_ok') return (
SQL {b.firstTry && ( first try ✓ )} +{b.reward.toFixed(2)}
          
        
Success · {b.rows.length}+ rows returned
{Object.keys(b.rows[0] ?? {}).map((k) => ( ))} {b.rows.map((row, i) => ( {Object.values(row).map((v, j) => ( ))} ))}
{k}
{String(v)}
) if (b.type === 'gepa') return (
GEPA — System Prompt Evolved Gen {b.fromGen} → {b.toGen}
Before
{(b.scoreFrom * 100).toFixed(0)}%
↑ improved
After
{(b.scoreTo * 100).toFixed(0)}%
Prompt diff (see sidebar for full view)
{diffPrompts(b.fromGen, b.toGen).filter((l) => l.type !== 'same').map((line, i) => (
{line.type === 'add' ? '+' : '-'} {line.text}
))}
) if (b.type === 'group') return (
{open && (
{b.children.map((c) => )}
)}
) return null } // ─── DemoMode ──────────────────────────────────────────────────────────────── export function DemoMode({ onClose }: { onClose: () => void }) { const [bubbles, setBubbles] = useState([]) const [appState, setAppState] = useState<'idle' | 'running' | 'done'>('idle') const [gen, setGen] = useState(0) const [score, setScore] = useState(SCORES[0]) const [rewardPoints, setRewardPoints] = useState([]) const [latestDiff, setLatestDiff] = useState<{ from: number; to: number } | null>(null) const stepRef = useRef(0) const cancel = useRef(false) const bottomRef = useRef(null) // Track all played queries and GEPA bubbles for end-of-demo collapse const allQueriesRef = useRef>([]) const gepaBubblesRef = useRef([]) const scroll = useCallback(() => { setTimeout(() => bottomRef.current?.scrollIntoView({ behavior: 'smooth' }), 60) }, []) const push = useCallback((b: BubbleData) => setBubbles((p) => [...p, b]), []) const addReward = useCallback((reward: number) => { stepRef.current += 1 setRewardPoints((p) => [...p, { step: stepRef.current, reward }]) }, []) const typeUser = useCallback(async (text: string) => { const id = uid() push({ id, type: 'user', text: '' }) for (let i = 1; i <= text.length; i++) { if (cancel.current) return setBubbles((p) => p.map((b) => b.id === id ? { ...b, text: text.slice(0, i) } : b)) await sleep(32 + Math.random() * 22) } scroll(); await sleep(300) }, [push, scroll]) const streamSQL = useCallback(async (sql: string, attempt: number) => { const id = uid() push({ id, type: 'sql_stream', sql: '', attempt }) let built = '' for (const line of sql.split('\n')) { if (cancel.current) return built += (built ? '\n' : '') + line setBubbles((p) => p.map((b) => b.id === id ? { ...b, sql: built } : b)) await sleep(85 + Math.random() * 70) } scroll(); await sleep(200) setBubbles((p) => p.filter((b) => b.id !== id)) }, [push, scroll]) const playQuery = useCallback(async (def: QueryDef): Promise => { const children: BubbleData[] = [] await typeUser(def.question) for (let i = 0; i < def.attempts.length; i++) { if (cancel.current) return children const att = def.attempts[i] // Thinking const thId = uid() push({ id: thId, type: 'thinking', label: i === 0 ? 'Generating SQL…' : 'Applying repair strategy…' }) scroll(); await sleep(750) if (cancel.current) return children setBubbles((p) => p.filter((b) => b.id !== thId)) await streamSQL(att.sql, i + 1) if (cancel.current) return children addReward(att.reward) if (att.error) { const eb: SqlErrBubble = { id: uid(), type: 'sql_err', sql: att.sql, error: att.error, errorClass: att.errorClass ?? 'OTHER', rlAction: att.rlAction ?? 'REWRITE_FULL', reward: att.reward, attempt: i + 1 } push(eb); children.push(eb); scroll(); await sleep(900) } else { const ob: SqlOkBubble = { id: uid(), type: 'sql_ok', sql: att.sql, rows: att.rows ?? [], reward: att.reward, attempt: i + 1, firstTry: i === 0 } push(ob); children.push(ob); scroll(); await sleep(1100) } } return children }, [push, scroll, typeUser, streamSQL, addReward]) const playGepa = useCallback(async (fromGen: number, toGen: number) => { const steps = ['Analyzing failure patterns…', 'Identifying missing rules from errors…', 'Rewriting system prompt…', 'Benchmarking candidate prompt…'] for (const label of steps) { if (cancel.current) return setBubbles((p) => { const last = p[p.length - 1] if (last?.type === 'thinking') return [...p.slice(0, -1), { id: last.id, type: 'thinking' as const, label }] return [...p, { id: uid(), type: 'thinking' as const, label }] }) scroll(); await sleep(1050) } setBubbles((p) => p.filter((b) => b.type !== 'thinking')) // Animate score const from = SCORES[fromGen], to = SCORES[toGen] for (let i = 0; i <= 45; i++) { if (cancel.current) return setScore(from + (to - from) * (i / 45)) await sleep(18) } setGen(toGen) setLatestDiff({ from: fromGen, to: toGen }) const gepaBubble: GepaBubble = { id: uid(), type: 'gepa', fromGen, toGen, scoreFrom: from, scoreTo: to } gepaBubblesRef.current.push(gepaBubble) push(gepaBubble) scroll(); await sleep(1000) }, [push, scroll]) const autoPlay = useCallback(async () => { cancel.current = false allQueriesRef.current = [] gepaBubblesRef.current = [] setBubbles([]); setGen(0); setScore(SCORES[0]); setRewardPoints([]); setLatestDiff(null) stepRef.current = 0; setAppState('running') await sleep(300) for (const id of ROUND_1) { if (cancel.current) break const children = await playQuery(QUERIES[id]) if (!cancel.current) { allQueriesRef.current.push({ def: QUERIES[id], children }) await sleep(600) } } if (!cancel.current) { await playGepa(0, 1); await sleep(500) } for (const id of ROUND_2) { if (cancel.current) break const children = await playQuery(QUERIES[id]) if (!cancel.current) { allQueriesRef.current.push({ def: QUERIES[id], children }) await sleep(600) } } if (!cancel.current) { await playGepa(1, 2); await sleep(500) } for (const id of ROUND_3) { if (cancel.current) break const children = await playQuery(QUERIES[id]) if (!cancel.current) { allQueriesRef.current.push({ def: QUERIES[id], children }) await sleep(600) } } if (!cancel.current) { // Collapse all queries at once, rebuilding the bubble list const queries = allQueriesRef.current const gepas = gepaBubblesRef.current const result: BubbleData[] = [] // Round 1 groups for (let i = 0; i < ROUND_1.length && i < queries.length; i++) { const { def, children } = queries[i] const lastAtt = def.attempts[def.attempts.length - 1] result.push({ id: uid(), type: 'group', question: def.question, success: !lastAtt.error, attempts: def.attempts.length, children }) } // GEPA 0→1 if (gepas[0]) result.push(gepas[0]) // Round 2 groups for (let i = 0; i < ROUND_2.length; i++) { const qi = ROUND_1.length + i if (qi >= queries.length) break const { def, children } = queries[qi] const lastAtt = def.attempts[def.attempts.length - 1] result.push({ id: uid(), type: 'group', question: def.question, success: !lastAtt.error, attempts: def.attempts.length, children }) } // GEPA 1→2 if (gepas[1]) result.push(gepas[1]) // Round 3 groups for (let i = 0; i < ROUND_3.length; i++) { const qi = ROUND_1.length + ROUND_2.length + i if (qi >= queries.length) break const { def, children } = queries[qi] const lastAtt = def.attempts[def.attempts.length - 1] result.push({ id: uid(), type: 'group', question: def.question, success: !lastAtt.error, attempts: def.attempts.length, children }) } setBubbles(result) await sleep(300) scroll() setAppState('done') } }, [playQuery, playGepa, scroll]) useEffect(() => () => { cancel.current = true }, []) return ( {/* Header */}
Demo
RL repair loop · GEPA prompt evolution · 42% → 91%
{/* Body */}
{/* Chat */}
{appState === 'idle' ? (

SQL Agent OpenEnv — Live Demo

Watch the agent fail, self-repair using LinUCB, then improve through two GEPA prompt evolution cycles.

{['Round 1 — simple queries, RL repairs table/column errors', 'GEPA cycle 1 — prompt learns alias rules (42%→74%)', 'Round 2 — join queries, ambiguous column errors repaired', 'GEPA cycle 2 — prompt learns aggregation rules (74%→91%)', 'Round 3 — same queries, first-try success'].map((s, i) => (
{i + 1}
{s}
))}
) : (
{bubbles.map((b) => ( ))} {appState === 'done' && (
91%
Agent improved from 42% → 91% accuracy through RL repair strategies and two GEPA prompt evolution cycles.
)}
)}
{/* Right panel */}
) }