muthuk1 commited on
Commit
370c4da
·
verified ·
1 Parent(s): ca12a92

Add Tab 2: Benchmark with radar chart and data table

Browse files
Files changed (1) hide show
  1. web/src/components/tabs/Benchmark.tsx +158 -0
web/src/components/tabs/Benchmark.tsx ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useState } from "react";
4
+ import {
5
+ RadarChart, Radar, PolarGrid, PolarAngleAxis,
6
+ ResponsiveContainer, Tooltip, Legend,
7
+ BarChart, Bar, XAxis, YAxis, CartesianGrid,
8
+ } from "recharts";
9
+
10
+ const DEMO_AGGREGATE = {
11
+ numSamples: 100,
12
+ baseline: { avgF1: 0.5523, avgEM: 0.3810, avgContextHit: 0.4520, avgTokens: 952, avgCost: 0.000203, avgLatency: 1240 },
13
+ graphrag: { avgF1: 0.6241, avgEM: 0.4230, avgContextHit: 0.5830, avgTokens: 2387, avgCost: 0.000518, avgLatency: 3820 },
14
+ f1WinRate: 0.62,
15
+ byType: [
16
+ { type: "bridge", count: 58, baselineF1: 0.52, graphragF1: 0.63 },
17
+ { type: "comparison", count: 42, baselineF1: 0.58, graphragF1: 0.61 },
18
+ ],
19
+ };
20
+
21
+ const radarData = [
22
+ { metric: "F1 Score", Baseline: 55, GraphRAG: 62 },
23
+ { metric: "Exact Match", Baseline: 38, GraphRAG: 42 },
24
+ { metric: "Context Hit", Baseline: 45, GraphRAG: 58 },
25
+ { metric: "Token Eff.", Baseline: 90, GraphRAG: 40 },
26
+ { metric: "Cost Eff.", Baseline: 85, GraphRAG: 35 },
27
+ ];
28
+
29
+ export function Benchmark() {
30
+ const [running, setRunning] = useState(false);
31
+ const [samples, setSamples] = useState(50);
32
+ const [data] = useState(DEMO_AGGREGATE);
33
+
34
+ const typeData = data.byType.map((t) => ({
35
+ name: t.type.charAt(0).toUpperCase() + t.type.slice(1),
36
+ Baseline: +(t.baselineF1 * 100).toFixed(1),
37
+ GraphRAG: +(t.graphragF1 * 100).toFixed(1),
38
+ }));
39
+
40
+ return (
41
+ <div>
42
+ {/* Controls */}
43
+ <div className="card mb-6">
44
+ <div className="flex flex-wrap items-end gap-6">
45
+ <div>
46
+ <div className="display-sm mb-2">Batch Benchmark</div>
47
+ <p className="body-sm" style={{ color: "#6c6a64" }}>
48
+ Run both pipelines on HotpotQA multi-hop questions
49
+ </p>
50
+ </div>
51
+ <div className="flex items-center gap-4 ml-auto">
52
+ <label className="caption">
53
+ Samples
54
+ <input
55
+ type="range"
56
+ min={10}
57
+ max={500}
58
+ step={10}
59
+ value={samples}
60
+ onChange={(e) => setSamples(+e.target.value)}
61
+ className="block w-32 mt-1 accent-[#FF6B00]"
62
+ />
63
+ <span className="body-sm font-mono">{samples}</span>
64
+ </label>
65
+ <button className="btn btn-primary" onClick={() => setRunning(true)} disabled={running}>
66
+ {running ? "Running…" : "🏃 Run Benchmark"}
67
+ </button>
68
+ </div>
69
+ </div>
70
+ </div>
71
+
72
+ {/* Summary Metrics */}
73
+ <div className="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
74
+ {[
75
+ { label: "Avg F1 (Baseline)", value: data.baseline.avgF1.toFixed(4), color: "#0072CE" },
76
+ { label: "Avg F1 (GraphRAG)", value: data.graphrag.avgF1.toFixed(4), color: "#FF6B00" },
77
+ { label: "GraphRAG Win Rate", value: (data.f1WinRate * 100).toFixed(0) + "%", color: "#5db872" },
78
+ { label: "Samples Evaluated", value: data.numSamples.toString(), color: "#002B49" },
79
+ ].map((m, i) => (
80
+ <div key={i} className="card-cream text-center" style={{ padding: "20px" }}>
81
+ <div className="metric-value-sm" style={{ color: m.color }}>{m.value}</div>
82
+ <div className="metric-label">{m.label}</div>
83
+ </div>
84
+ ))}
85
+ </div>
86
+
87
+ <div className="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-6">
88
+ {/* Radar Chart */}
89
+ <div className="card">
90
+ <div className="title-md mb-4">Multi-Metric Radar</div>
91
+ <ResponsiveContainer width="100%" height={340}>
92
+ <RadarChart data={radarData}>
93
+ <PolarGrid stroke="#002B49" strokeOpacity={0.12} />
94
+ <PolarAngleAxis dataKey="metric" tick={{ fill: "#6c6a64", fontSize: 12 }} />
95
+ <Radar name="Baseline" dataKey="Baseline" stroke="#0072CE" fill="#0072CE" fillOpacity={0.15} strokeWidth={2} />
96
+ <Radar name="GraphRAG" dataKey="GraphRAG" stroke="#FF6B00" fill="#FF6B00" fillOpacity={0.15} strokeWidth={2} />
97
+ <Legend />
98
+ <Tooltip contentStyle={{ background: "#faf9f5", border: "1px solid #e6dfd8", borderRadius: "8px" }} />
99
+ </RadarChart>
100
+ </ResponsiveContainer>
101
+ </div>
102
+
103
+ {/* By Question Type */}
104
+ <div className="card">
105
+ <div className="title-md mb-4">F1 by Question Type</div>
106
+ <ResponsiveContainer width="100%" height={340}>
107
+ <BarChart data={typeData} margin={{ top: 20, right: 20, left: 0, bottom: 0 }}>
108
+ <CartesianGrid strokeDasharray="3 3" stroke="#002B49" strokeOpacity={0.08} />
109
+ <XAxis dataKey="name" tick={{ fill: "#6c6a64", fontSize: 13 }} />
110
+ <YAxis domain={[0, 100]} tick={{ fill: "#6c6a64", fontSize: 12 }} />
111
+ <Tooltip contentStyle={{ background: "#faf9f5", border: "1px solid #e6dfd8", borderRadius: "8px" }} />
112
+ <Legend />
113
+ <Bar dataKey="Baseline" fill="#0072CE" radius={[4, 4, 0, 0]} />
114
+ <Bar dataKey="GraphRAG" fill="#FF6B00" radius={[4, 4, 0, 0]} />
115
+ </BarChart>
116
+ </ResponsiveContainer>
117
+ </div>
118
+ </div>
119
+
120
+ {/* Detailed Table */}
121
+ <div className="card">
122
+ <div className="title-md mb-4">Detailed Comparison</div>
123
+ <div className="overflow-x-auto">
124
+ <table style={{ width: "100%", borderCollapse: "collapse", fontSize: "0.875rem" }}>
125
+ <thead>
126
+ <tr style={{ borderBottom: "2px solid var(--color-hairline)" }}>
127
+ {["Metric", "Baseline RAG", "GraphRAG", "Winner"].map((h) => (
128
+ <th key={h} className="caption-uppercase text-left" style={{ padding: "12px 16px" }}>{h}</th>
129
+ ))}
130
+ </tr>
131
+ </thead>
132
+ <tbody>
133
+ {[
134
+ { metric: "Avg F1 Score", b: data.baseline.avgF1.toFixed(4), g: data.graphrag.avgF1.toFixed(4), winner: "graphrag" },
135
+ { metric: "Avg Exact Match", b: data.baseline.avgEM.toFixed(4), g: data.graphrag.avgEM.toFixed(4), winner: "graphrag" },
136
+ { metric: "Avg Context Hit", b: data.baseline.avgContextHit.toFixed(4), g: data.graphrag.avgContextHit.toFixed(4), winner: "graphrag" },
137
+ { metric: "Avg Tokens/Query", b: data.baseline.avgTokens.toFixed(0), g: data.graphrag.avgTokens.toFixed(0), winner: "baseline" },
138
+ { metric: "Avg Cost ($)", b: "$" + data.baseline.avgCost.toFixed(6), g: "$" + data.graphrag.avgCost.toFixed(6), winner: "baseline" },
139
+ { metric: "Avg Latency (ms)", b: data.baseline.avgLatency.toFixed(0), g: data.graphrag.avgLatency.toFixed(0), winner: "baseline" },
140
+ ].map((row, i) => (
141
+ <tr key={i} style={{ borderBottom: "1px solid var(--color-hairline-soft)" }}>
142
+ <td className="title-sm" style={{ padding: "12px 16px" }}>{row.metric}</td>
143
+ <td style={{ padding: "12px 16px", fontFamily: "var(--font-mono)", color: "#0072CE" }}>{row.b}</td>
144
+ <td style={{ padding: "12px 16px", fontFamily: "var(--font-mono)", color: "#FF6B00" }}>{row.g}</td>
145
+ <td style={{ padding: "12px 16px" }}>
146
+ <span className={row.winner === "graphrag" ? "badge-orange" : "badge-blue"} style={{ fontSize: "0.6875rem" }}>
147
+ {row.winner === "graphrag" ? "GraphRAG" : "Baseline"}
148
+ </span>
149
+ </td>
150
+ </tr>
151
+ ))}
152
+ </tbody>
153
+ </table>
154
+ </div>
155
+ </div>
156
+ </div>
157
+ );
158
+ }