import { useState, useCallback } from 'react'
import { motion, AnimatePresence } from 'framer-motion'
import {
Target, Play, Loader2, CheckCircle2, XCircle,
ChevronDown, RotateCcw, Zap,
} from 'lucide-react'
import { useStore } from '../store/useStore'
import { streamBenchmark } from '../lib/api'
import type { BenchmarkResult, Difficulty } from '../lib/types'
const DIFFICULTY_TABS: { id: Difficulty; label: string }[] = [
{ id: 'easy', label: 'Easy' },
{ id: 'medium', label: 'Medium' },
{ id: 'hard', label: 'Hard' },
]
function QueryRow({
result,
isActive,
isExpanded,
onToggleExpand,
onRunSingle,
isRunning,
dbSeeded,
}: {
result: BenchmarkResult
isActive: boolean
isExpanded: boolean
onToggleExpand: () => void
onRunSingle: () => void
isRunning: boolean
dbSeeded: boolean
}) {
const statusIcon = () => {
switch (result.status) {
case 'pending': return
case 'running': return
case 'pass': return
case 'fail': return
}
}
const difficultyColor =
result.difficulty === 'hard'
? 'text-red-400 bg-red-500/10 border-red-500/25'
: result.difficulty === 'medium'
? 'text-amber-400 bg-amber-500/10 border-amber-500/25'
: 'text-blue-400 bg-blue-500/10 border-blue-500/25'
return (
{statusIcon()}
{result.id}
{result.difficulty}
{result.score !== null && (
{result.score.toFixed(2)}
)}
{result.attempts !== null && (
{result.attempts} attempt{result.attempts !== 1 ? 's' : ''}
)}
{result.question}
{result.reason && result.status !== 'pending' && (
{result.reason.length > 120 ? result.reason.slice(0, 120) + '…' : result.reason}
)}
{result.status === 'pending' && dbSeeded && !isRunning && (
)}
{/* Expanded detail */}
{isExpanded && (
{result.question}
{result.sql && (
Generated SQL
{result.sql}
)}
{(result.refRowCount !== null || result.reason) && (
{result.refRowCount !== null && (
reference:
{result.refRowCount} rows
agent:
{result.agentRowCount ?? 0} rows
)}
{result.reason && (
{result.reason}
)}
)}
{result.status !== 'pending' && result.status !== 'running' && !isRunning && dbSeeded && (
)}
)}
)
}
export function BenchmarkPanel() {
const {
benchmarkResults, isBenchmarking, overallScore,
activeBenchmarkId, dbSeeded,
setIsBenchmarking, updateBenchmarkResult, setOverallScore,
setActiveBenchmarkId, resetBenchmark,
taskDifficulty, setTaskDifficulty,
} = useStore()
const [expandedIds, setExpandedIds] = useState>(new Set())
const toggleExpand = (id: string) => {
setExpandedIds((prev) => {
const next = new Set(prev)
if (next.has(id)) next.delete(id)
else next.add(id)
return next
})
}
const runBenchmark = useCallback(
async (queryIds?: string[]) => {
if (isBenchmarking) return
setIsBenchmarking(true)
const targetIds = queryIds ?? benchmarkResults.map((r) => r.id)
for (const id of targetIds) {
const existing = benchmarkResults.find((r) => r.id === id)
if (existing) {
updateBenchmarkResult({ ...existing, status: 'running', score: null, reason: null, sql: null })
}
}
try {
for await (const event of streamBenchmark(taskDifficulty, queryIds)) {
if (event.type === 'query_start') {
setActiveBenchmarkId(event.id as string)
const existing = benchmarkResults.find((r) => r.id === event.id)
if (existing) updateBenchmarkResult({ ...existing, status: 'running' })
} else if (event.type === 'query_result') {
const existing = benchmarkResults.find((r) => r.id === event.id)
if (existing) {
updateBenchmarkResult({
...existing,
status: (event.pass as boolean) ? 'pass' : 'fail',
score: event.score as number,
reason: event.reason as string,
sql: event.sql as string,
attempts: (event.attempts as number) ?? null,
refRowCount: (event.refRowCount as number) ?? null,
agentRowCount: (event.agentRowCount as number) ?? null,
})
}
} else if (event.type === 'done') {
setOverallScore(event.overallScore as number)
setActiveBenchmarkId(null)
setIsBenchmarking(false)
} else if (event.type === 'error') {
setActiveBenchmarkId(null)
setIsBenchmarking(false)
}
}
} catch {
setIsBenchmarking(false)
setActiveBenchmarkId(null)
}
},
[isBenchmarking, benchmarkResults, setIsBenchmarking, updateBenchmarkResult,
setOverallScore, setActiveBenchmarkId, taskDifficulty]
)
const passedCount = benchmarkResults.filter((r) => r.status === 'pass').length
const completedCount = benchmarkResults.filter((r) => r.status === 'pass' || r.status === 'fail').length
const totalScore = benchmarkResults.reduce((s, r) => s + (r.score ?? 0), 0)
const progressPct = benchmarkResults.length > 0 ? Math.round((completedCount / benchmarkResults.length) * 100) : 0
const scorePct = completedCount > 0 ? Math.round((totalScore / benchmarkResults.length) * 100) : 0
return (
{/* Header */}
{/* Difficulty tabs */}
{DIFFICULTY_TABS.map((tab) => (
))}
Benchmark
{completedCount > 0 && (
{passedCount}/{benchmarkResults.length} passed
)}
{completedCount > 0 && (
)}
{/* Overall score */}
{overallScore !== null && (
Overall Score
{(overallScore * 100).toFixed(0)}%
)}
{/* Score bar */}
{completedCount > 0 && (
Score: {totalScore.toFixed(1)}/{benchmarkResults.length}
{scorePct}%
)}
{/* Progress */}
{isBenchmarking && (
)}
{/* Query list */}
{benchmarkResults.map((result) => (
toggleExpand(result.id)}
onRunSingle={() => void runBenchmark([result.id])}
isRunning={isBenchmarking}
dbSeeded={dbSeeded}
/>
))}
{!dbSeeded && (
Waiting for database initialization...
)}
)
}