"use client"; import { useState } from "react"; import { RadarChart, Radar, PolarGrid, PolarAngleAxis, ResponsiveContainer, Tooltip, Legend, BarChart, Bar, XAxis, YAxis, CartesianGrid, } from "recharts"; const DEMO_AGGREGATE = { numSamples: 100, baseline: { avgF1: 0.5523, avgEM: 0.3810, avgContextHit: 0.4520, avgTokens: 952, avgCost: 0.000203, avgLatency: 1240 }, graphrag: { avgF1: 0.6241, avgEM: 0.4230, avgContextHit: 0.5830, avgTokens: 2387, avgCost: 0.000518, avgLatency: 3820 }, f1WinRate: 0.62, byType: [ { type: "bridge", count: 58, baselineF1: 0.52, graphragF1: 0.63 }, { type: "comparison", count: 42, baselineF1: 0.58, graphragF1: 0.61 }, ], }; const radarData = [ { metric: "F1 Score", Baseline: 55, GraphRAG: 62 }, { metric: "Exact Match", Baseline: 38, GraphRAG: 42 }, { metric: "Context Hit", Baseline: 45, GraphRAG: 58 }, { metric: "Token Eff.", Baseline: 90, GraphRAG: 40 }, { metric: "Cost Eff.", Baseline: 85, GraphRAG: 35 }, ]; export function Benchmark() { const [running, setRunning] = useState(false); const [samples, setSamples] = useState(50); const [data] = useState(DEMO_AGGREGATE); const typeData = data.byType.map((t) => ({ name: t.type.charAt(0).toUpperCase() + t.type.slice(1), Baseline: +(t.baselineF1 * 100).toFixed(1), GraphRAG: +(t.graphragF1 * 100).toFixed(1), })); return (
{/* Controls */}
Batch Benchmark

Run both pipelines on HotpotQA multi-hop questions

{/* Summary Metrics */}
{[ { label: "Avg F1 (Baseline)", value: data.baseline.avgF1.toFixed(4), color: "#0072CE" }, { label: "Avg F1 (GraphRAG)", value: data.graphrag.avgF1.toFixed(4), color: "#FF6B00" }, { label: "GraphRAG Win Rate", value: (data.f1WinRate * 100).toFixed(0) + "%", color: "#5db872" }, { label: "Samples Evaluated", value: data.numSamples.toString(), color: "#002B49" }, ].map((m, i) => (
{m.value}
{m.label}
))}
{/* Radar Chart */}
Multi-Metric Radar
{/* By Question Type */}
F1 by Question Type
{/* Detailed Table */}
Detailed Comparison
{["Metric", "Baseline RAG", "GraphRAG", "Winner"].map((h) => ( ))} {[ { metric: "Avg F1 Score", b: data.baseline.avgF1.toFixed(4), g: data.graphrag.avgF1.toFixed(4), winner: "graphrag" }, { metric: "Avg Exact Match", b: data.baseline.avgEM.toFixed(4), g: data.graphrag.avgEM.toFixed(4), winner: "graphrag" }, { metric: "Avg Context Hit", b: data.baseline.avgContextHit.toFixed(4), g: data.graphrag.avgContextHit.toFixed(4), winner: "graphrag" }, { metric: "Avg Tokens/Query", b: data.baseline.avgTokens.toFixed(0), g: data.graphrag.avgTokens.toFixed(0), winner: "baseline" }, { metric: "Avg Cost ($)", b: "$" + data.baseline.avgCost.toFixed(6), g: "$" + data.graphrag.avgCost.toFixed(6), winner: "baseline" }, { metric: "Avg Latency (ms)", b: data.baseline.avgLatency.toFixed(0), g: data.graphrag.avgLatency.toFixed(0), winner: "baseline" }, ].map((row, i) => ( ))}
{h}
{row.metric} {row.b} {row.g} {row.winner === "graphrag" ? "GraphRAG" : "Baseline"}
); }