{/* Controls */}

Batch Benchmark

Run both pipelines on HotpotQA multi-hop questions

Samples setSamples(+e.target.value)} className="block w-32 mt-1 accent-[#FF6B00]" /> {samples}

{/* Summary Metrics */}

{[ { label: "Avg F1 (Baseline)", value: data.baseline.avgF1.toFixed(4), color: "#0072CE" }, { label: "Avg F1 (GraphRAG)", value: data.graphrag.avgF1.toFixed(4), color: "#FF6B00" }, { label: "GraphRAG Win Rate", value: (data.f1WinRate * 100).toFixed(0) + "%", color: "#5db872" }, { label: "Samples Evaluated", value: data.numSamples.toString(), color: "#002B49" }, ].map((m, i) => (

{m.value}

{m.label}

))}

{/* Radar Chart */}

Multi-Metric Radar

{/* By Question Type */}

F1 by Question Type

{/* Detailed Table */}

Detailed Comparison

{["Metric", "Baseline RAG", "GraphRAG", "Winner"].map((h) => ( ))} {[ { metric: "Avg F1 Score", b: data.baseline.avgF1.toFixed(4), g: data.graphrag.avgF1.toFixed(4), winner: "graphrag" }, { metric: "Avg Exact Match", b: data.baseline.avgEM.toFixed(4), g: data.graphrag.avgEM.toFixed(4), winner: "graphrag" }, { metric: "Avg Context Hit", b: data.baseline.avgContextHit.toFixed(4), g: data.graphrag.avgContextHit.toFixed(4), winner: "graphrag" }, { metric: "Avg Tokens/Query", b: data.baseline.avgTokens.toFixed(0), g: data.graphrag.avgTokens.toFixed(0), winner: "baseline" }, { metric: "Avg Cost ($)", b: "$" + data.baseline.avgCost.toFixed(6), g: "$" + data.graphrag.avgCost.toFixed(6), winner: "baseline" }, { metric: "Avg Latency (ms)", b: data.baseline.avgLatency.toFixed(0), g: data.graphrag.avgLatency.toFixed(0), winner: "baseline" }, ].map((row, i) => ( ))}

{h}
{row.metric}	{row.b}	{row.g}	{row.winner === "graphrag" ? "GraphRAG" : "Baseline"}