import { useState, useCallback } from 'react' import { motion, AnimatePresence } from 'framer-motion' import { Target, Play, Loader2, CheckCircle2, XCircle, ChevronDown, RotateCcw, Zap, } from 'lucide-react' import { useStore } from '../store/useStore' import { streamBenchmark } from '../lib/api' import type { BenchmarkResult, Difficulty } from '../lib/types' const DIFFICULTY_TABS: { id: Difficulty; label: string }[] = [ { id: 'easy', label: 'Easy' }, { id: 'medium', label: 'Medium' }, { id: 'hard', label: 'Hard' }, ] function QueryRow({ result, isActive, isExpanded, onToggleExpand, onRunSingle, isRunning, dbSeeded, }: { result: BenchmarkResult isActive: boolean isExpanded: boolean onToggleExpand: () => void onRunSingle: () => void isRunning: boolean dbSeeded: boolean }) { const statusIcon = () => { switch (result.status) { case 'pending': return case 'running': return case 'pass': return case 'fail': return } } const difficultyColor = result.difficulty === 'hard' ? 'text-red-400 bg-red-500/10 border-red-500/25' : result.difficulty === 'medium' ? 'text-amber-400 bg-amber-500/10 border-amber-500/25' : 'text-blue-400 bg-blue-500/10 border-blue-500/25' return (
{statusIcon()}
{result.id} {result.difficulty} {result.score !== null && ( {result.score.toFixed(2)} )} {result.attempts !== null && ( {result.attempts} attempt{result.attempts !== 1 ? 's' : ''} )}
{result.question}
{result.reason && result.status !== 'pending' && (
{result.reason.length > 120 ? result.reason.slice(0, 120) + '…' : result.reason}
)}
{result.status === 'pending' && dbSeeded && !isRunning && ( )}
{/* Expanded detail */} {isExpanded && (

{result.question}

{result.sql && (
Generated SQL
                    {result.sql}
                  
)} {(result.refRowCount !== null || result.reason) && (
{result.refRowCount !== null && (
reference: {result.refRowCount} rows agent: {result.agentRowCount ?? 0} rows
)} {result.reason && (
{result.reason}
)}
)} {result.status !== 'pending' && result.status !== 'running' && !isRunning && dbSeeded && ( )}
)}
) } export function BenchmarkPanel() { const { benchmarkResults, isBenchmarking, overallScore, activeBenchmarkId, dbSeeded, setIsBenchmarking, updateBenchmarkResult, setOverallScore, setActiveBenchmarkId, resetBenchmark, taskDifficulty, setTaskDifficulty, } = useStore() const [expandedIds, setExpandedIds] = useState>(new Set()) const toggleExpand = (id: string) => { setExpandedIds((prev) => { const next = new Set(prev) if (next.has(id)) next.delete(id) else next.add(id) return next }) } const runBenchmark = useCallback( async (queryIds?: string[]) => { if (isBenchmarking) return setIsBenchmarking(true) const targetIds = queryIds ?? benchmarkResults.map((r) => r.id) for (const id of targetIds) { const existing = benchmarkResults.find((r) => r.id === id) if (existing) { updateBenchmarkResult({ ...existing, status: 'running', score: null, reason: null, sql: null }) } } try { for await (const event of streamBenchmark(taskDifficulty, queryIds)) { if (event.type === 'query_start') { setActiveBenchmarkId(event.id as string) const existing = benchmarkResults.find((r) => r.id === event.id) if (existing) updateBenchmarkResult({ ...existing, status: 'running' }) } else if (event.type === 'query_result') { const existing = benchmarkResults.find((r) => r.id === event.id) if (existing) { updateBenchmarkResult({ ...existing, status: (event.pass as boolean) ? 'pass' : 'fail', score: event.score as number, reason: event.reason as string, sql: event.sql as string, attempts: (event.attempts as number) ?? null, refRowCount: (event.refRowCount as number) ?? null, agentRowCount: (event.agentRowCount as number) ?? null, }) } } else if (event.type === 'done') { setOverallScore(event.overallScore as number) setActiveBenchmarkId(null) setIsBenchmarking(false) } else if (event.type === 'error') { setActiveBenchmarkId(null) setIsBenchmarking(false) } } } catch { setIsBenchmarking(false) setActiveBenchmarkId(null) } }, [isBenchmarking, benchmarkResults, setIsBenchmarking, updateBenchmarkResult, setOverallScore, setActiveBenchmarkId, taskDifficulty] ) const passedCount = benchmarkResults.filter((r) => r.status === 'pass').length const completedCount = benchmarkResults.filter((r) => r.status === 'pass' || r.status === 'fail').length const totalScore = benchmarkResults.reduce((s, r) => s + (r.score ?? 0), 0) const progressPct = benchmarkResults.length > 0 ? Math.round((completedCount / benchmarkResults.length) * 100) : 0 const scorePct = completedCount > 0 ? Math.round((totalScore / benchmarkResults.length) * 100) : 0 return (
{/* Header */}
{/* Difficulty tabs */}
{DIFFICULTY_TABS.map((tab) => ( ))}
Benchmark {completedCount > 0 && ( {passedCount}/{benchmarkResults.length} passed )}
{completedCount > 0 && ( )}
{/* Overall score */} {overallScore !== null && (
Overall Score
{(overallScore * 100).toFixed(0)}%
)} {/* Score bar */} {completedCount > 0 && (
Score: {totalScore.toFixed(1)}/{benchmarkResults.length} {scorePct}%
)} {/* Progress */} {isBenchmarking && (
)}
{/* Query list */}
{benchmarkResults.map((result) => ( toggleExpand(result.id)} onRunSingle={() => void runBenchmark([result.id])} isRunning={isBenchmarking} dbSeeded={dbSeeded} /> ))}
{!dbSeeded && (
Waiting for database initialization...
)}
) }