import streamlit as st import base64 import pandas as pd import os BASE_DIR = os.path.dirname(os.path.abspath(__file__)) os.path.join(BASE_DIR, "utils", "title_icon.png") # 读取图片并转为 base64 def get_image_base64(image_path): with open(image_path, "rb") as f: return base64.b64encode(f.read()).decode() # 设置 dataframe 样式:斑马纹 + 表头黑色加粗 def style_dataframe(df): def row_style(row): if row.name % 2 == 0: return ['background-color: #f9f9f9'] * len(row) return ['background-color: #ffffff'] * len(row) return df.style.set_table_styles([ # 表头样式 {'selector': 'th', 'props': [ ('background-color', '#f0f0f0'), ('color', '#000000'), ('font-weight', 'bold'), ('text-align', 'left'), ('padding', '8px') ]}, # 单元格样式 {'selector': 'td', 'props': [ ('text-align', 'left'), ('padding', '8px') ]}, # 表头文字样式 {'selector': 'th.col_heading', 'props': [ ('background-color', '#f0f0f0'), ('color', '#000000'), ('font-weight', 'bold') ]} ]).apply(row_style, axis=1) def df_to_html_table(df, height=400): html = f'
' html += '' # 调整表头:font-weight 改为 normal,padding 第一个值调小 html += '' for col in df.columns: html += f'' html += '' # 调整单元格:padding 第一个值调小 for i, (_, row) in enumerate(df.iterrows()): bg = '#f5f5f5' if i % 2 == 0 else '#ffffff' html += f'' for val in row: html += f'' html += '' html += '
{col}
{val}
' return html st.set_page_config( page_title="RAGRouter-Bench: A Dataset and Benchmark for Adaptive RAG Routing", layout="wide", initial_sidebar_state="expanded", ) #背景颜色 st.markdown(""" """, unsafe_allow_html=True) #标题 title_icon = get_image_base64("utils/title_icon.png") st.markdown(f"""

RAGRouter-Bench:
A Dataset and Benchmark for Adaptive RAG Routing

""", unsafe_allow_html=True) # 统计横幅 st.markdown("""
📚 4 Corpus Domains| 📄 21K Documents| ❓ 7.7K Query Types| 📊 3 Dimension Evaluations| 🔄 5 RAG Paradigms| 🤖 2 LLMs Tested
""", unsafe_allow_html=True) # 主内容 - 添加锚点ID # About 部分 with st.container(): about_icon = get_image_base64("utils/about_icon.png") st.markdown(f"""

About

""", unsafe_allow_html=True) # About 内的标签页 about_tab1, about_tab2, about_tab3 = st.tabs(["📋 Overview", "⭐ Key Features", "🚀 Get Started"]) with about_tab1: pipeline_img = get_image_base64("utils/Overall_Pipeline.png") st.markdown(f"""

Overall Pipeline

Retrieval-Augmented Generation (RAG) has become a core paradigm for grounding large language models with external knowledge. Despite extensive efforts exploring diverse retrieval strategies, existing studies predominantly focus on query-side complexity or isolated method improvements, lacking a systematic understanding of how RAG paradigms behave across different query–corpus contexts and effectiveness–efficiency trade-offs. In this work, we introduce RAGRouter-Bench, the first dataset and benchmark designed for adaptive RAG routing. RAGRouter-Bench revisits retrieval from a query–corpus compatibility perspective and standardizes five representative RAG paradigms for systematic evaluation across 7,727 queries and 21,460 documents spanning diverse domains. The benchmark incorporates three canonical query types together with fine-grained semantic and structural corpus metrics, as well as a unified evaluation for both generation quality and resource consumption. Experiments with DeepSeek-V3 and LLaMA-3.1-8B demonstrate that no single RAG paradigm is universally optimal, that paradigm applicability is strongly shaped by query–corpus interactions, and that increased advanced mechanism does not necessarily yield better effectiveness–efficiency trade-offs. These findings underscore the necessity of routing-aware evaluation and establish a foundation for adaptive, interpretable, and generalizable next-generation RAG systems.

""", unsafe_allow_html=True) with about_tab2: bench_img = get_image_base64("utils/Data_Profile.png") st.markdown(f"""

Benchmark Features

🌐 Multi-Domain Corpora

❓ Three Query Types

🔄 Five RAG Paradigm

📊 Dual-View Corpus Evaluation

⚖️ Effectiveness-Efficiency Evaluation

""", unsafe_allow_html=True) with about_tab3: paradigms_img = get_image_base64("utils/RAG_Paradigms.png") st.markdown(f"""

RAG Paradigm

📥 Download RAGRouter-Bench Dataset

💻 Installation

git clone https://github.com/ziqiwang0908/RAGRouter-Bench
cd RAGRouter-Bench
conda env create -f environment.yml
conda activate ragBench

⚙️ Configuration

🚀 Quick Start

Step Command Description
1. Process python main.py process all --dataset musique Chunking, embedding, graph building
2. Retrieve python main.py retrieve graph --dataset musique Run RAG retrieval
3. Evaluate python main.py evaluate result --dataset musique --method graph_rag Evaluate results
Full Pipeline python main.py pipeline --dataset musique --method graph Run all steps

Available Datasets

Available RAG Paradigms

Data Format

Your data should be placed in Dataset/Rawutils/{{dataset_name}}/ with:

""", unsafe_allow_html=True) # Leaderboard 部分 leaderboard_icon = get_image_base64("utils/leaderboard_icon.png") st.markdown(f"""

Leaderboard

""", unsafe_allow_html=True) # Leaderboard 内的标签页 lb_tab1, lb_tab2, lb_tab3, lb_tab4 = st.tabs(["🏆 Full Leaderboard", "📁 Corpus Metrics", "📈 Effectiveness Metrics", "⚡ Efficiency Metrics"]) with lb_tab1: # Full Leaderboard Explanation st.markdown("""

📋 Columns Explained:

""", unsafe_allow_html=True) df_full = pd.read_csv("utils/full_lb.csv") col1_f, col2_f, col3_f, col4_f = st.columns([2, 2, 2, 3]) with col1_f: model_select_f = st.selectbox( "Model", options=["All"] + df_full["Model"].unique().tolist(), index=0, key="model_full" ) with col2_f: sort_by_f = st.selectbox( "Sort by", options=df_full.columns.tolist(), index=df_full.columns.tolist().index("Avg Acc"), key="sort_full" ) with col3_f: order_f = st.radio( "Order", options=["Descending", "Ascending"], horizontal=True, key="order_full" ) with col4_f: search_f = st.text_input("Search", placeholder="Search in all columns...", key="search_full") df_display_f = df_full.copy() if model_select_f != "All": df_display_f = df_display_f[df_display_f["Model"] == model_select_f] if search_f: mask_f = df_display_f.apply(lambda row: row.astype(str).str.contains(search_f, case=False).any(), axis=1) df_display_f = df_display_f[mask_f] ascending_f = True if order_f == "Ascending" else False df_display_f = df_display_f.sort_values(by=sort_by_f, ascending=ascending_f).reset_index(drop=True) st.markdown(df_to_html_table(df_display_f), unsafe_allow_html=True) with lb_tab2: # Structure Metrics Explanation st.markdown("""

🔗 Structural Topology Metrics:

""", unsafe_allow_html=True) df_structure = pd.read_csv("utils/corpus_structure.csv") col1_s, col2_s, col3_s = st.columns([2, 2, 3]) with col1_s: sort_by_s = st.selectbox( "Sort by", options=df_structure.columns.tolist(), index=0, key="sort_structure" ) with col2_s: order_s = st.radio( "Order", options=["Descending", "Ascending"], horizontal=True, key="order_structure" ) with col3_s: search_s = st.text_input("Search", placeholder="Search in all columns...", key="search_structure") df_display_s = df_structure.copy() if search_s: mask_s = df_display_s.apply(lambda row: row.astype(str).str.contains(search_s, case=False).any(), axis=1) df_display_s = df_display_s[mask_s] ascending_s = True if order_s == "Ascending" else False df_display_s = df_display_s.sort_values(by=sort_by_s, ascending=ascending_s).reset_index(drop=True) st.markdown(df_to_html_table(df_display_s, height=200), unsafe_allow_html=True) # Semantic Metrics Explanation st.markdown("""

🧠 Semantic Space Metrics:

""", unsafe_allow_html=True) df_semantic = pd.read_csv("utils/corpus_semantic.csv") col1_m, col2_m, col3_m = st.columns([2, 2, 3]) with col1_m: sort_by_m = st.selectbox( "Sort by", options=df_semantic.columns.tolist(), index=0, key="sort_semantic" ) with col2_m: order_m = st.radio( "Order", options=["Descending", "Ascending"], horizontal=True, key="order_semantic" ) with col3_m: search_m = st.text_input("Search", placeholder="Search in all columns...", key="search_semantic") df_display_m = df_semantic.copy() if search_m: mask_m = df_display_m.apply(lambda row: row.astype(str).str.contains(search_m, case=False).any(), axis=1) df_display_m = df_display_m[mask_m] ascending_m = True if order_m == "Ascending" else False df_display_m = df_display_m.sort_values(by=sort_by_m, ascending=ascending_m).reset_index(drop=True) st.markdown(df_to_html_table(df_display_m, height=200), unsafe_allow_html=True) with lb_tab3: # Metrics Explanation st.markdown("""

📊 Metrics Explained:

""", unsafe_allow_html=True) # Model files mapping model_files = { "DeepSeek-V3": "utils/effect_deepseek.csv", "Llama-3-8B": "utils/effect_llama.csv" } # Controls col1_e, col2_e, col3_e, col4_e = st.columns([2, 2, 2, 3]) with col1_e: model_select = st.selectbox( "Model", options=list(model_files.keys()), index=0, key="model_effect" ) df_effect = pd.read_csv(model_files[model_select]) with col2_e: sort_by_e = st.selectbox( "Sort by", options=df_effect.columns.tolist(), index=0, key="sort_effect" ) with col3_e: order_e = st.radio( "Order", options=["Descending", "Ascending"], horizontal=True, key="order_effect" ) with col4_e: search_e = st.text_input("Search", placeholder="Search in all columns...", key="search_effect") df_display_e = df_effect.copy() if search_e: mask_e = df_display_e.apply(lambda row: row.astype(str).str.contains(search_e, case=False).any(), axis=1) df_display_e = df_display_e[mask_e] ascending_e = True if order_e == "Ascending" else False df_display_e = df_display_e.sort_values(by=sort_by_e, ascending=ascending_e).reset_index(drop=True) st.markdown(df_to_html_table(df_display_e), unsafe_allow_html=True) with lb_tab4: # Cost Explanation st.markdown("""

💰 Cost Explained:

""", unsafe_allow_html=True) # Read data df_efficiency = pd.read_csv("utils/retrieval_generation_cost.csv") # Controls col1, col2, col3 = st.columns([2, 2, 3]) with col1: sort_by = st.selectbox( "Sort by", options=df_efficiency.columns.tolist(), index=df_efficiency.columns.tolist().index("Total_Tokens") # 默认按 total_tokens 排序 ) with col2: order = st.radio( "Order", options=["Descending", "Ascending"], horizontal=True ) with col3: search = st.text_input("Search", placeholder="Search in all columns...") df_display = df_efficiency.copy() if search: mask = df_display.apply(lambda row: row.astype(str).str.contains(search, case=False).any(), axis=1) df_display = df_display[mask] ascending = True if order == "Ascending" else False df_display = df_display.sort_values(by=sort_by, ascending=ascending).reset_index(drop=True) st.markdown(df_to_html_table(df_display), unsafe_allow_html=True) # Questions & Contact 部分 contact_icon = get_image_base64("utils/contact_icon.png") st.markdown(f"""

Questions & Contact

""", unsafe_allow_html=True) st.markdown("""

If you have any questions about RAGRouter-Bench, please feel free to reach out to us:

For bug reports or feature requests, please open an issue on our GitHub repository.

""", unsafe_allow_html=True)