Add Tab 2: Benchmark with radar chart and data table
Browse files
web/src/components/tabs/Benchmark.tsx
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState } from "react";
|
| 4 |
+
import {
|
| 5 |
+
RadarChart, Radar, PolarGrid, PolarAngleAxis,
|
| 6 |
+
ResponsiveContainer, Tooltip, Legend,
|
| 7 |
+
BarChart, Bar, XAxis, YAxis, CartesianGrid,
|
| 8 |
+
} from "recharts";
|
| 9 |
+
|
| 10 |
+
const DEMO_AGGREGATE = {
|
| 11 |
+
numSamples: 100,
|
| 12 |
+
baseline: { avgF1: 0.5523, avgEM: 0.3810, avgContextHit: 0.4520, avgTokens: 952, avgCost: 0.000203, avgLatency: 1240 },
|
| 13 |
+
graphrag: { avgF1: 0.6241, avgEM: 0.4230, avgContextHit: 0.5830, avgTokens: 2387, avgCost: 0.000518, avgLatency: 3820 },
|
| 14 |
+
f1WinRate: 0.62,
|
| 15 |
+
byType: [
|
| 16 |
+
{ type: "bridge", count: 58, baselineF1: 0.52, graphragF1: 0.63 },
|
| 17 |
+
{ type: "comparison", count: 42, baselineF1: 0.58, graphragF1: 0.61 },
|
| 18 |
+
],
|
| 19 |
+
};
|
| 20 |
+
|
| 21 |
+
const radarData = [
|
| 22 |
+
{ metric: "F1 Score", Baseline: 55, GraphRAG: 62 },
|
| 23 |
+
{ metric: "Exact Match", Baseline: 38, GraphRAG: 42 },
|
| 24 |
+
{ metric: "Context Hit", Baseline: 45, GraphRAG: 58 },
|
| 25 |
+
{ metric: "Token Eff.", Baseline: 90, GraphRAG: 40 },
|
| 26 |
+
{ metric: "Cost Eff.", Baseline: 85, GraphRAG: 35 },
|
| 27 |
+
];
|
| 28 |
+
|
| 29 |
+
export function Benchmark() {
|
| 30 |
+
const [running, setRunning] = useState(false);
|
| 31 |
+
const [samples, setSamples] = useState(50);
|
| 32 |
+
const [data] = useState(DEMO_AGGREGATE);
|
| 33 |
+
|
| 34 |
+
const typeData = data.byType.map((t) => ({
|
| 35 |
+
name: t.type.charAt(0).toUpperCase() + t.type.slice(1),
|
| 36 |
+
Baseline: +(t.baselineF1 * 100).toFixed(1),
|
| 37 |
+
GraphRAG: +(t.graphragF1 * 100).toFixed(1),
|
| 38 |
+
}));
|
| 39 |
+
|
| 40 |
+
return (
|
| 41 |
+
<div>
|
| 42 |
+
{/* Controls */}
|
| 43 |
+
<div className="card mb-6">
|
| 44 |
+
<div className="flex flex-wrap items-end gap-6">
|
| 45 |
+
<div>
|
| 46 |
+
<div className="display-sm mb-2">Batch Benchmark</div>
|
| 47 |
+
<p className="body-sm" style={{ color: "#6c6a64" }}>
|
| 48 |
+
Run both pipelines on HotpotQA multi-hop questions
|
| 49 |
+
</p>
|
| 50 |
+
</div>
|
| 51 |
+
<div className="flex items-center gap-4 ml-auto">
|
| 52 |
+
<label className="caption">
|
| 53 |
+
Samples
|
| 54 |
+
<input
|
| 55 |
+
type="range"
|
| 56 |
+
min={10}
|
| 57 |
+
max={500}
|
| 58 |
+
step={10}
|
| 59 |
+
value={samples}
|
| 60 |
+
onChange={(e) => setSamples(+e.target.value)}
|
| 61 |
+
className="block w-32 mt-1 accent-[#FF6B00]"
|
| 62 |
+
/>
|
| 63 |
+
<span className="body-sm font-mono">{samples}</span>
|
| 64 |
+
</label>
|
| 65 |
+
<button className="btn btn-primary" onClick={() => setRunning(true)} disabled={running}>
|
| 66 |
+
{running ? "Running…" : "🏃 Run Benchmark"}
|
| 67 |
+
</button>
|
| 68 |
+
</div>
|
| 69 |
+
</div>
|
| 70 |
+
</div>
|
| 71 |
+
|
| 72 |
+
{/* Summary Metrics */}
|
| 73 |
+
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
|
| 74 |
+
{[
|
| 75 |
+
{ label: "Avg F1 (Baseline)", value: data.baseline.avgF1.toFixed(4), color: "#0072CE" },
|
| 76 |
+
{ label: "Avg F1 (GraphRAG)", value: data.graphrag.avgF1.toFixed(4), color: "#FF6B00" },
|
| 77 |
+
{ label: "GraphRAG Win Rate", value: (data.f1WinRate * 100).toFixed(0) + "%", color: "#5db872" },
|
| 78 |
+
{ label: "Samples Evaluated", value: data.numSamples.toString(), color: "#002B49" },
|
| 79 |
+
].map((m, i) => (
|
| 80 |
+
<div key={i} className="card-cream text-center" style={{ padding: "20px" }}>
|
| 81 |
+
<div className="metric-value-sm" style={{ color: m.color }}>{m.value}</div>
|
| 82 |
+
<div className="metric-label">{m.label}</div>
|
| 83 |
+
</div>
|
| 84 |
+
))}
|
| 85 |
+
</div>
|
| 86 |
+
|
| 87 |
+
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-6">
|
| 88 |
+
{/* Radar Chart */}
|
| 89 |
+
<div className="card">
|
| 90 |
+
<div className="title-md mb-4">Multi-Metric Radar</div>
|
| 91 |
+
<ResponsiveContainer width="100%" height={340}>
|
| 92 |
+
<RadarChart data={radarData}>
|
| 93 |
+
<PolarGrid stroke="#002B49" strokeOpacity={0.12} />
|
| 94 |
+
<PolarAngleAxis dataKey="metric" tick={{ fill: "#6c6a64", fontSize: 12 }} />
|
| 95 |
+
<Radar name="Baseline" dataKey="Baseline" stroke="#0072CE" fill="#0072CE" fillOpacity={0.15} strokeWidth={2} />
|
| 96 |
+
<Radar name="GraphRAG" dataKey="GraphRAG" stroke="#FF6B00" fill="#FF6B00" fillOpacity={0.15} strokeWidth={2} />
|
| 97 |
+
<Legend />
|
| 98 |
+
<Tooltip contentStyle={{ background: "#faf9f5", border: "1px solid #e6dfd8", borderRadius: "8px" }} />
|
| 99 |
+
</RadarChart>
|
| 100 |
+
</ResponsiveContainer>
|
| 101 |
+
</div>
|
| 102 |
+
|
| 103 |
+
{/* By Question Type */}
|
| 104 |
+
<div className="card">
|
| 105 |
+
<div className="title-md mb-4">F1 by Question Type</div>
|
| 106 |
+
<ResponsiveContainer width="100%" height={340}>
|
| 107 |
+
<BarChart data={typeData} margin={{ top: 20, right: 20, left: 0, bottom: 0 }}>
|
| 108 |
+
<CartesianGrid strokeDasharray="3 3" stroke="#002B49" strokeOpacity={0.08} />
|
| 109 |
+
<XAxis dataKey="name" tick={{ fill: "#6c6a64", fontSize: 13 }} />
|
| 110 |
+
<YAxis domain={[0, 100]} tick={{ fill: "#6c6a64", fontSize: 12 }} />
|
| 111 |
+
<Tooltip contentStyle={{ background: "#faf9f5", border: "1px solid #e6dfd8", borderRadius: "8px" }} />
|
| 112 |
+
<Legend />
|
| 113 |
+
<Bar dataKey="Baseline" fill="#0072CE" radius={[4, 4, 0, 0]} />
|
| 114 |
+
<Bar dataKey="GraphRAG" fill="#FF6B00" radius={[4, 4, 0, 0]} />
|
| 115 |
+
</BarChart>
|
| 116 |
+
</ResponsiveContainer>
|
| 117 |
+
</div>
|
| 118 |
+
</div>
|
| 119 |
+
|
| 120 |
+
{/* Detailed Table */}
|
| 121 |
+
<div className="card">
|
| 122 |
+
<div className="title-md mb-4">Detailed Comparison</div>
|
| 123 |
+
<div className="overflow-x-auto">
|
| 124 |
+
<table style={{ width: "100%", borderCollapse: "collapse", fontSize: "0.875rem" }}>
|
| 125 |
+
<thead>
|
| 126 |
+
<tr style={{ borderBottom: "2px solid var(--color-hairline)" }}>
|
| 127 |
+
{["Metric", "Baseline RAG", "GraphRAG", "Winner"].map((h) => (
|
| 128 |
+
<th key={h} className="caption-uppercase text-left" style={{ padding: "12px 16px" }}>{h}</th>
|
| 129 |
+
))}
|
| 130 |
+
</tr>
|
| 131 |
+
</thead>
|
| 132 |
+
<tbody>
|
| 133 |
+
{[
|
| 134 |
+
{ metric: "Avg F1 Score", b: data.baseline.avgF1.toFixed(4), g: data.graphrag.avgF1.toFixed(4), winner: "graphrag" },
|
| 135 |
+
{ metric: "Avg Exact Match", b: data.baseline.avgEM.toFixed(4), g: data.graphrag.avgEM.toFixed(4), winner: "graphrag" },
|
| 136 |
+
{ metric: "Avg Context Hit", b: data.baseline.avgContextHit.toFixed(4), g: data.graphrag.avgContextHit.toFixed(4), winner: "graphrag" },
|
| 137 |
+
{ metric: "Avg Tokens/Query", b: data.baseline.avgTokens.toFixed(0), g: data.graphrag.avgTokens.toFixed(0), winner: "baseline" },
|
| 138 |
+
{ metric: "Avg Cost ($)", b: "$" + data.baseline.avgCost.toFixed(6), g: "$" + data.graphrag.avgCost.toFixed(6), winner: "baseline" },
|
| 139 |
+
{ metric: "Avg Latency (ms)", b: data.baseline.avgLatency.toFixed(0), g: data.graphrag.avgLatency.toFixed(0), winner: "baseline" },
|
| 140 |
+
].map((row, i) => (
|
| 141 |
+
<tr key={i} style={{ borderBottom: "1px solid var(--color-hairline-soft)" }}>
|
| 142 |
+
<td className="title-sm" style={{ padding: "12px 16px" }}>{row.metric}</td>
|
| 143 |
+
<td style={{ padding: "12px 16px", fontFamily: "var(--font-mono)", color: "#0072CE" }}>{row.b}</td>
|
| 144 |
+
<td style={{ padding: "12px 16px", fontFamily: "var(--font-mono)", color: "#FF6B00" }}>{row.g}</td>
|
| 145 |
+
<td style={{ padding: "12px 16px" }}>
|
| 146 |
+
<span className={row.winner === "graphrag" ? "badge-orange" : "badge-blue"} style={{ fontSize: "0.6875rem" }}>
|
| 147 |
+
{row.winner === "graphrag" ? "GraphRAG" : "Baseline"}
|
| 148 |
+
</span>
|
| 149 |
+
</td>
|
| 150 |
+
</tr>
|
| 151 |
+
))}
|
| 152 |
+
</tbody>
|
| 153 |
+
</table>
|
| 154 |
+
</div>
|
| 155 |
+
</div>
|
| 156 |
+
</div>
|
| 157 |
+
);
|
| 158 |
+
}
|