import React, { useState } from 'react'; import { BarChart, Bar, LineChart, Line, XAxis, YAxis, CartesianGrid, Tooltip, Legend, ResponsiveContainer, AreaChart, Area, ComposedChart } from 'recharts'; import { TrendingDown, TrendingUp, Activity, FileText, ArrowDownRight, Zap } from 'lucide-react'; const TokenReport = () => { const [activeTab, setActiveTab] = useState('overview'); // --- DATA PREPARATION --- // Raw data derived from user inputs const timeSeriesData = [ { turn: 'Turn 1', Baseline1: 45, Baseline2: 52, Enforce1: 48, Enforce2: 46, Enforce3: 55 }, { turn: 'Turn 2', Baseline1: 49, Baseline2: 68, Enforce1: 37, Enforce2: 33, Enforce3: 53 }, { turn: 'Turn 3', Baseline1: 57, Baseline2: 62, Enforce1: 35, Enforce2: 19, Enforce3: 32 }, { turn: 'Turn 4', Baseline1: 42, Baseline2: 63, Enforce1: 19, Enforce2: 13, Enforce3: 10 }, { turn: 'Turn 5', Baseline1: 37, Baseline2: 69, Enforce1: 17, Enforce2: 12, Enforce3: 5 }, ]; const totalComparisonData = [ { name: 'Baseline 1', total: 230, avg: 46, type: 'Standard' }, { name: 'Baseline 2', total: 316, avg: 63, type: 'Standard' }, { name: 'Enforcement 1', total: 156, avg: 31.2, type: 'Optimized' }, { name: 'Enforcement 2', total: 123, avg: 24.6, type: 'Optimized' }, // Recalculated sum based on rows { name: 'Enforcement 3', total: 155, avg: 31, type: 'Optimized' }, ]; // Specific calculation for Test 1 (3 turn subtotal requested by user) const baseline1SubtotalInput = 54; const baseline1SubtotalOutput = 97; const baseline1IncreaseRaw = baseline1SubtotalOutput - baseline1SubtotalInput; const baseline1IncreasePercent = ((baseline1IncreaseRaw / baseline1SubtotalInput) * 100).toFixed(1); // Efficiency Calculation (Baseline 2 vs Best Enforcement) const bestReduction = ((316 - 123) / 316 * 100).toFixed(1); return (
Analyzing the "Mirroring Effect" vs. "Enforcement Protocols" in conversational agents.
Token increase in Baseline Test 1 (Input vs Output)
Reduction in total volume (Baseline 2 vs. Enforcement 2)
Token drop from Turn 1 to Turn 5 in Enforcement models
Standard LLM behavior ("Baseline") resulted in a mirroring effect, where the model repeats and expands upon information, causing token count to grow or remain stagnant high. The "Enforcement" protocols reduced total token load by up to 61% compared to the worst-case baseline.
Tracking total tokens (Input + Output) per turn.
Remains consistently high (40-70 tokens/turn). The model feels "obligated" to reply with conversational filler ("Got it", "Just to confirm"), maintaining high volume.
Aggressive decay. By Turn 4 and 5, the model enters a "maintenance mode," utilizing only 5-15 tokens per turn to sustain the context.
"What is the % increase on this?" (Based on the first 3 turns of Baseline 1)
| Test Name | Turn 1 | Turn 2 | Turn 3 | Turn 4 | Turn 5 | Avg |
|---|---|---|---|---|---|---|
| {row.name} | {row.data.map((val, i) => ({val} | ))}{row.avg} |