sql-agent-openenv / frontend /src /components /BenchmarkPanel.tsx
ar9avg's picture
Fix chat query failures and benchmark ID mismatches
8ae8e0b
import { useState, useCallback } from 'react'
import { motion, AnimatePresence } from 'framer-motion'
import {
Target, Play, Loader2, CheckCircle2, XCircle,
ChevronDown, RotateCcw, Zap,
} from 'lucide-react'
import { useStore } from '../store/useStore'
import { streamBenchmark } from '../lib/api'
import type { BenchmarkResult, Difficulty } from '../lib/types'
const DIFFICULTY_TABS: { id: Difficulty; label: string }[] = [
{ id: 'easy', label: 'Easy' },
{ id: 'medium', label: 'Medium' },
{ id: 'hard', label: 'Hard' },
]
function QueryRow({
result,
isActive,
isExpanded,
onToggleExpand,
onRunSingle,
isRunning,
dbSeeded,
}: {
result: BenchmarkResult
isActive: boolean
isExpanded: boolean
onToggleExpand: () => void
onRunSingle: () => void
isRunning: boolean
dbSeeded: boolean
}) {
const statusIcon = () => {
switch (result.status) {
case 'pending': return <span className="w-2 h-2 rounded-full bg-gray-600 shrink-0" />
case 'running': return <Loader2 size={12} className="text-violet-400 animate-spin shrink-0" />
case 'pass': return <CheckCircle2 size={12} className="text-green-400 shrink-0" />
case 'fail': return <XCircle size={12} className="text-red-400 shrink-0" />
}
}
const difficultyColor =
result.difficulty === 'hard'
? 'text-red-400 bg-red-500/10 border-red-500/25'
: result.difficulty === 'medium'
? 'text-amber-400 bg-amber-500/10 border-amber-500/25'
: 'text-blue-400 bg-blue-500/10 border-blue-500/25'
return (
<div
className={`rounded-xl border transition-all duration-150 ${
isActive
? 'border-violet-500/40 bg-violet-500/5'
: 'border-white/5 bg-white/[0.02] hover:bg-white/[0.04]'
}`}
>
<div
className="flex items-start gap-2 px-3 py-2.5 cursor-pointer"
onClick={onToggleExpand}
>
<div className="mt-0.5 shrink-0">{statusIcon()}</div>
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 mb-0.5 flex-wrap">
<span className="text-[10px] font-mono text-gray-600">{result.id}</span>
<span className={`text-[9px] font-semibold px-1.5 py-0.5 rounded-full border ${difficultyColor}`}>
{result.difficulty}
</span>
{result.score !== null && (
<span className={`text-[10px] font-mono font-bold ${result.status === 'pass' ? 'text-green-400' : 'text-red-400'}`}>
{result.score.toFixed(2)}
</span>
)}
{result.attempts !== null && (
<span className="text-[9px] text-gray-600 font-mono">
{result.attempts} attempt{result.attempts !== 1 ? 's' : ''}
</span>
)}
</div>
<div className="text-xs text-gray-300 leading-relaxed line-clamp-2">
{result.question}
</div>
{result.reason && result.status !== 'pending' && (
<div className={`text-[10px] mt-1 ${result.status === 'pass' ? 'text-green-500/70' : 'text-red-400/70'}`}>
{result.reason.length > 120 ? result.reason.slice(0, 120) + '…' : result.reason}
</div>
)}
</div>
<div className="flex items-center gap-1.5 shrink-0">
{result.status === 'pending' && dbSeeded && !isRunning && (
<button
onClick={(e) => { e.stopPropagation(); onRunSingle() }}
className="p-1 rounded-lg hover:bg-white/10 transition-colors"
title="Run this query"
>
<Play size={10} className="text-gray-500 hover:text-violet-400" />
</button>
)}
<ChevronDown
size={11}
className={`text-gray-600 transition-transform duration-150 ${isExpanded ? 'rotate-180' : ''}`}
/>
</div>
</div>
{/* Expanded detail */}
<AnimatePresence>
{isExpanded && (
<motion.div
initial={{ height: 0, opacity: 0 }}
animate={{ height: 'auto', opacity: 1 }}
exit={{ height: 0, opacity: 0 }}
transition={{ duration: 0.15 }}
className="overflow-hidden"
>
<div className="px-3 pb-3 flex flex-col gap-2 border-t border-white/5 pt-2">
<p className="text-xs text-gray-400 leading-relaxed">{result.question}</p>
{result.sql && (
<div>
<div className="text-[10px] text-gray-600 mb-1 font-semibold uppercase tracking-wider">
Generated SQL
</div>
<pre className="text-[10px] font-mono text-violet-200/70 bg-black/40 rounded-lg p-2.5 border border-white/5 whitespace-pre-wrap leading-relaxed max-h-40 overflow-y-auto">
{result.sql}
</pre>
</div>
)}
{(result.refRowCount !== null || result.reason) && (
<div className="flex flex-col gap-1.5">
{result.refRowCount !== null && (
<div className="flex items-center gap-3 text-[10px] font-mono">
<span className="text-gray-600">reference:</span>
<span className="text-blue-400">{result.refRowCount} rows</span>
<span className="text-gray-600">agent:</span>
<span className={
result.agentRowCount === result.refRowCount
? 'text-green-400'
: result.agentRowCount === 0
? 'text-red-400'
: 'text-amber-400'
}>
{result.agentRowCount ?? 0} rows
</span>
</div>
)}
{result.reason && (
<div className={`text-[10px] leading-relaxed ${result.status === 'pass' ? 'text-green-400/80' : 'text-red-400/80'}`}>
{result.reason}
</div>
)}
</div>
)}
{result.status !== 'pending' && result.status !== 'running' && !isRunning && dbSeeded && (
<button
onClick={(e) => { e.stopPropagation(); onRunSingle() }}
className="flex items-center gap-1 text-[10px] text-violet-400 hover:text-violet-300 transition-colors self-start mt-1"
>
<RotateCcw size={9} />
Re-run
</button>
)}
</div>
</motion.div>
)}
</AnimatePresence>
</div>
)
}
export function BenchmarkPanel() {
const {
benchmarkResults, isBenchmarking, overallScore,
activeBenchmarkId, dbSeeded,
setIsBenchmarking, updateBenchmarkResult, setOverallScore,
setActiveBenchmarkId, resetBenchmark,
taskDifficulty, setTaskDifficulty,
} = useStore()
const [expandedIds, setExpandedIds] = useState<Set<string>>(new Set())
const toggleExpand = (id: string) => {
setExpandedIds((prev) => {
const next = new Set(prev)
if (next.has(id)) next.delete(id)
else next.add(id)
return next
})
}
const runBenchmark = useCallback(
async (queryIds?: string[]) => {
if (isBenchmarking) return
setIsBenchmarking(true)
const targetIds = queryIds ?? benchmarkResults.map((r) => r.id)
for (const id of targetIds) {
const existing = benchmarkResults.find((r) => r.id === id)
if (existing) {
updateBenchmarkResult({ ...existing, status: 'running', score: null, reason: null, sql: null })
}
}
try {
for await (const event of streamBenchmark(taskDifficulty, queryIds)) {
if (event.type === 'query_start') {
setActiveBenchmarkId(event.id as string)
const existing = benchmarkResults.find((r) => r.id === event.id)
if (existing) updateBenchmarkResult({ ...existing, status: 'running' })
} else if (event.type === 'query_result') {
const existing = benchmarkResults.find((r) => r.id === event.id)
if (existing) {
updateBenchmarkResult({
...existing,
status: (event.pass as boolean) ? 'pass' : 'fail',
score: event.score as number,
reason: event.reason as string,
sql: event.sql as string,
attempts: (event.attempts as number) ?? null,
refRowCount: (event.refRowCount as number) ?? null,
agentRowCount: (event.agentRowCount as number) ?? null,
})
}
} else if (event.type === 'done') {
setOverallScore(event.overallScore as number)
setActiveBenchmarkId(null)
setIsBenchmarking(false)
} else if (event.type === 'error') {
setActiveBenchmarkId(null)
setIsBenchmarking(false)
}
}
} catch {
setIsBenchmarking(false)
setActiveBenchmarkId(null)
}
},
[isBenchmarking, benchmarkResults, setIsBenchmarking, updateBenchmarkResult,
setOverallScore, setActiveBenchmarkId, taskDifficulty]
)
const passedCount = benchmarkResults.filter((r) => r.status === 'pass').length
const completedCount = benchmarkResults.filter((r) => r.status === 'pass' || r.status === 'fail').length
const totalScore = benchmarkResults.reduce((s, r) => s + (r.score ?? 0), 0)
const progressPct = benchmarkResults.length > 0 ? Math.round((completedCount / benchmarkResults.length) * 100) : 0
const scorePct = completedCount > 0 ? Math.round((totalScore / benchmarkResults.length) * 100) : 0
return (
<div className="flex flex-col h-full">
{/* Header */}
<div className="px-4 py-3 border-b border-white/[0.06] shrink-0">
{/* Difficulty tabs */}
<div className="flex items-center gap-1 mb-2.5 p-0.5 rounded-lg border border-white/[0.06] w-fit">
{DIFFICULTY_TABS.map((tab) => (
<button
key={tab.id}
onClick={() => setTaskDifficulty(tab.id)}
disabled={isBenchmarking}
className={`text-[10px] font-semibold px-2.5 py-1 rounded transition-all disabled:opacity-50 ${
taskDifficulty === tab.id
? 'bg-violet-600/25 text-violet-300 border border-violet-500/30'
: 'text-gray-500 hover:text-gray-300 border border-transparent'
}`}
>
{tab.label}
</button>
))}
</div>
<div className="flex items-center justify-between mb-2">
<div className="flex items-center gap-2">
<Target size={14} className="text-violet-400" />
<span className="text-xs font-semibold text-white">Benchmark</span>
{completedCount > 0 && (
<span className="text-xs text-gray-500 font-mono">
{passedCount}/{benchmarkResults.length} passed
</span>
)}
</div>
<div className="flex items-center gap-2">
{completedCount > 0 && (
<button
onClick={resetBenchmark}
disabled={isBenchmarking}
className="flex items-center gap-1 px-2 py-1 rounded-lg text-[10px] text-gray-500 hover:text-gray-300 hover:bg-white/5 transition-all disabled:opacity-40"
>
<RotateCcw size={10} />
Reset
</button>
)}
<button
onClick={() => void runBenchmark()}
disabled={isBenchmarking || !dbSeeded}
className="flex items-center gap-1.5 px-3 py-1.5 rounded-lg bg-violet-600 hover:bg-violet-500 disabled:opacity-40 disabled:cursor-not-allowed transition-all text-white text-xs font-semibold"
>
{isBenchmarking ? (
<Loader2 size={11} className="animate-spin" />
) : (
<Play size={11} />
)}
Run All
</button>
</div>
</div>
{/* Overall score */}
{overallScore !== null && (
<motion.div
initial={{ opacity: 0, scale: 0.95 }}
animate={{ opacity: 1, scale: 1 }}
className="mb-2 flex items-center gap-3 px-3 py-2 rounded-xl border border-violet-500/20 bg-violet-500/5"
>
<Zap size={14} className="text-violet-400 shrink-0" />
<div>
<div className="text-[10px] text-gray-500 uppercase tracking-wider">Overall Score</div>
<div className="text-xl font-bold font-mono text-violet-300">
{(overallScore * 100).toFixed(0)}%
</div>
</div>
</motion.div>
)}
{/* Score bar */}
{completedCount > 0 && (
<div className="flex flex-col gap-1">
<div className="flex items-center justify-between text-[10px]">
<span className="text-gray-500">
Score: {totalScore.toFixed(1)}/{benchmarkResults.length}
</span>
<span className="text-violet-400 font-mono">{scorePct}%</span>
</div>
<div className="h-1.5 bg-white/5 rounded-full overflow-hidden">
<motion.div
className="h-full rounded-full bg-gradient-to-r from-violet-600 to-violet-400"
initial={{ width: 0 }}
animate={{ width: `${scorePct}%` }}
transition={{ duration: 0.5, ease: 'easeOut' }}
/>
</div>
</div>
)}
{/* Progress */}
{isBenchmarking && (
<div className="mt-1.5">
<div className="h-1 bg-white/5 rounded-full overflow-hidden">
<motion.div
className="h-full rounded-full bg-violet-500/60"
initial={{ width: 0 }}
animate={{ width: `${progressPct}%` }}
transition={{ duration: 0.3 }}
/>
</div>
</div>
)}
</div>
{/* Query list */}
<div className="flex-1 overflow-y-auto">
<div className="p-2 flex flex-col gap-1">
{benchmarkResults.map((result) => (
<QueryRow
key={result.id}
result={result}
isActive={activeBenchmarkId === result.id}
isExpanded={expandedIds.has(result.id)}
onToggleExpand={() => toggleExpand(result.id)}
onRunSingle={() => void runBenchmark([result.id])}
isRunning={isBenchmarking}
dbSeeded={dbSeeded}
/>
))}
</div>
</div>
{!dbSeeded && (
<div className="px-4 py-2 border-t border-white/[0.06] text-[10px] text-gray-600 text-center shrink-0">
Waiting for database initialization...
</div>
)}
</div>
)
}