| import gradio as gr |
| from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns |
| import pandas as pd |
|
|
| |
| custom_css = """ |
| /* 全局设置:简洁、高级的字体和背景 */ |
| :root { |
| --color-background-primary: #f8f8f8; /* 浅米白色背景 */ |
| --color-background-secondary: #ffffff; /* 卡片背景 */ |
| --color-text-primary: #333333; |
| --color-accent: #8e80ff; /* 浅紫色强调色 (Primary) */ |
| --color-accent-light: #a99dff; /* 浅紫色悬停色 */ |
| --shadow-medium: 0 4px 12px rgba(0, 0, 0, 0.08); |
| } |
| |
| body { |
| background-color: var(--color-background-primary) !important; |
| } |
| |
| /* 增加容器最大宽度以展示完整表格 */ |
| .gradio-container { |
| max-width: 1400px; /* 宽度从 1800px 调窄到 1400px */ |
| margin: 0 auto; |
| padding: 20px; |
| } |
| |
| /* 标题样式 */ |
| #space-title { |
| color: var(--color-text-primary); |
| font-size: 3em; |
| font-weight: 700; |
| margin-bottom: 0.5em; |
| padding-top: 20px; |
| } |
| |
| /* Group/Block 组件的卡片样式 */ |
| .gr-group, .gr-block { |
| background-color: var(--color-background-secondary); |
| border-radius: 12px; |
| box-shadow: var(--shadow-medium); |
| transition: box-shadow 0.3s ease; |
| padding: 15px; |
| margin-bottom: 20px; |
| } |
| |
| .gr-group:hover, .gr-block:hover { |
| box-shadow: 0 6px 18px rgba(0, 0, 0, 0.12); |
| } |
| |
| /* Leaderboard 容器:调整内部布局的关键 */ |
| .leaderboard_root { |
| padding: 0 !important; |
| } |
| |
| /* 搜索栏布局调整 (第一行) */ |
| .leaderboard_root > div:nth-child(1) { |
| padding: 0 15px 15px 15px; |
| } |
| |
| /* 过滤器和列选择布局调整 (第二行) */ |
| .leaderboard_root > div:nth-child(2) { |
| display: flex; |
| padding: 0 15px 15px 15px; |
| } |
| |
| .leaderboard_root .gr-form { |
| border: none; |
| } |
| |
| /* Search Bar */ |
| #search-bar-table-box { |
| width: 100%; |
| margin-bottom: 10px; |
| } |
| #search-bar-table-box > div:first-child { |
| background: none; |
| border: none; |
| } |
| |
| /* === Select Columns to Display: 强制单行展示 === */ |
| /* 定位 SelectColumns 的内部复选框容器 */ |
| .leaderboard-filter-column:first-child .gr-form-checkbox-group { |
| /* 使用 flex 容器 */ |
| display: flex !important; |
| flex-wrap: nowrap !important; /* 强制不换行 */ |
| overflow-x: auto !important; /* 允许水平滚动 */ |
| gap: 10px; |
| padding-bottom: 5px; |
| } |
| |
| /* 确保每个复选框标签保持内联块级元素 */ |
| .leaderboard-filter-column:first-child .gr-form-checkbox-group label { |
| flex-shrink: 0 !important; /* 防止选项被压缩 */ |
| display: inline-block !important; /* 确保每个选项占据其自然宽度 */ |
| margin: 0; |
| white-space: nowrap; /* 确保文字也不换行 */ |
| } |
| |
| #leaderboard-table, #leaderboard-table-lite { |
| margin-top: 15px; |
| border-radius: 8px; |
| overflow: hidden; |
| } |
| |
| #leaderboard-table th { |
| background-color: var(--color-accent); |
| color: white; |
| font-weight: 600; |
| text-transform: uppercase; |
| border-bottom: 2px solid var(--color-accent-light); |
| } |
| |
| #leaderboard-table tr:hover { |
| background-color: #f0f0f0; |
| cursor: pointer; |
| transition: background-color 0.2s ease; |
| } |
| |
| #leaderboard-table td:nth-child(2), |
| #leaderboard-table th:nth-child(2) { |
| max-width: 400px; |
| overflow: auto; |
| white-space: nowrap; |
| } |
| |
| #leaderboard-table td:nth-child(3) { |
| font-weight: bold; |
| color: var(--color-accent); |
| } |
| |
| /* Citation 区域 */ |
| #citation-group { |
| padding: 20px; |
| margin-top: 10px; |
| } |
| |
| #citation-button { |
| margin-top: 0; |
| padding: 0; |
| } |
| |
| /* 修复 Citation 复制图标重叠问题 */ |
| #citation-button label { |
| display: block; |
| position: relative; |
| } |
| |
| #citation-button textarea { |
| font-family: monospace; |
| background-color: #f1f1f1; |
| border: 1px solid #cccccc; |
| border-radius: 6px; |
| padding: 10px; |
| padding-right: 40px !important; /* 为复制按钮腾出空间 */ |
| font-size: 14px !important; |
| width: 100% !important; |
| box-sizing: border-box; |
| } |
| |
| /* 调整复制按钮的位置 */ |
| #citation-button > label > button { |
| position: absolute; |
| top: 10px; |
| right: 10px; |
| margin: 0; |
| transform: scale(1.1); |
| transition: transform 0.2s ease; |
| background-color: var(--color-accent) !important; |
| color: white !important; |
| border: none !important; |
| border-radius: 6px; |
| z-index: 10; |
| } |
| |
| #citation-button > label > button:hover { |
| transform: scale(1.2); |
| background-color: var(--color-accent-light) !important; |
| } |
| |
| /* Leaderboard 内部过滤/选择组件微调 */ |
| .leaderboard_root .leaderboard-filter-column:last-child { |
| flex-grow: 1; |
| max-width: 50%; |
| } |
| |
| .leaderboard_root .leaderboard-filter-column:first-child { |
| max-width: 50%; |
| padding-right: 20px; |
| } |
| |
| #filter_type{ |
| border: 0; |
| padding-left: 0; |
| padding-top: 0; |
| } |
| #filter_type label { |
| display: flex; |
| } |
| #filter_type label > span{ |
| margin-top: var(--spacing-lg); |
| margin-right: 0.5em; |
| } |
| #filter_type label > .wrap{ |
| width: 103px; |
| } |
| #filter_type label > .wrap .wrap-inner{ |
| padding: 2px; |
| } |
| #filter_type label > .wrap .wrap-inner input{ |
| width: 1px |
| } |
| #filter-columns-type{ |
| border:0; |
| padding:0.5; |
| } |
| #filter-columns-size{ |
| border:0; |
| padding:0.5; |
| } |
| #box-filter > .form{ |
| border: 0 |
| } |
| /* 其他 Gradio 元素的简洁化 */ |
| .wrap-inner input[type="text"], .wrap-inner input[type="number"] { |
| border-radius: 6px; |
| border: 1px solid #cccccc; |
| padding: 8px 12px; |
| } |
| """ |
|
|
| get_window_url_params = """ |
| function(url_params) { |
| const params = new URLSearchParams(window.location.search); |
| url_params = Object.fromEntries(params); |
| return url_params; |
| } |
| """ |
|
|
|
|
| TITLE = """<h1 align="center" id="space-title">SGI-Bench Leaderboard 🏆</h1>""" |
| INFO = """<p align="center"> |
| <a href="https://internscience.github.io/SGI-Page/"><b>🌐Official Site</b></a> · |
| <a href="https://arxiv.org/pdf/2512.16969"><b>📜arXiv</b></a> · |
| <a href="https://huggingface.co/collections/InternScience/sgi-bench"><b>🤗Hugging Face</b></a> · |
| <a href="https://github.com/InternScience/SGI-Bench"><b>💻GitHub</b></a> |
| </p>""" |
|
|
| CITATION_BUTTON_LABEL = "📖 Citation" |
| CITATION_BUTTON_TEXT = r"""@article{xu2025probing, |
| title={Probing Scientific General Intelligence of LLMs with Scientist-Aligned Workflows}, |
| author={Xu, Wanghan and Zhou, Yuhao and Zhou, Yifan and Cao, Qinglong and Li, Shuo and Bu, Jia and Liu, Bo and Chen, Yixin and He, Xuming and Zhao, Xiangyu and others}, |
| journal={arXiv preprint arXiv:2512.16969}, |
| year={2025} |
| }""" |
|
|
| LEADERBOARD_DATA = [ |
| {"name": "Intern-S1", "type": "Open", "scores": [15.74, 38.09, 28.79, 29.02, 28.87]}, |
| {"name": "Intern-S1-mini", "type": "Open", "scores": [11.06, 36.04, 16.97, 12.42, 16.84]}, |
| {"name": "Qwen3-VL-235B-A22B", "type": "Open", "scores": [11.97, 39.28, 28.41, 30.30, 31.62]}, |
| {"name": "Qwen3-8B", "type": "Open", "scores": [8.18, 35.78, 18.45, 9.96, 23.37]}, |
| {"name": "Llama-4-Scout", "type": "Open", "scores": [7.86, 29.72, 20.37, 21.66, 25.77]}, |
| {"name": "Qwen3-Max", "type": "Closed", "scores": [15.38, 39.83, 33.21, 33.62, 37.80]}, |
| {"name": "GPT-4o", "type": "Closed", "scores": [7.86, 35.95, 26.94, 31.31, 32.30]}, |
| {"name": "GPT-4.1", "type": "Closed", "scores": [11.32, 36.49, 34.32, 36.63, 38.49]}, |
| {"name": "GPT-5", "type": "Closed", "scores": [14.47, 55.40, 29.89, 16.31, 38.14]}, |
| {"name": "GPT-5.1", "type": "Closed", "scores": [11.64, 47.12, 31.00, 22.77, 34.02]}, |
| {"name": "GPT-5.2-Pro", "type": "Closed", "scores": [15.72, 55.03, 28.04, 17.50, 39.18, 31.09]}, |
| {"name": "o3", "type": "Closed", "scores": [12.89, 46.07, 31.73, 30.04, 32.65]}, |
| {"name": "o4-mini", "type": "Closed", "scores": [11.95, 40.78, 35.79, 28.86, 33.33]}, |
| {"name": "Gemini-2.5-Flash", "type": "Closed", "scores": [10.69, 39.13, 21.03, 18.55, 34.36]}, |
| {"name": "Gemini-2.5-Pro", "type": "Closed", "scores": [15.09, 39.95, 22.51, 22.05, 41.24]}, |
| {"name": "Gemini-3-Pro", "type": "Closed", "scores": [18.48, 39.68, 36.64, 32.45, 41.92]}, |
| {"name": "Claude-Opus-4.1", "type": "Closed", "scores": [12.93, 40.29, 34.69, 25.38, 38.83]}, |
| {"name": "Claude-Sonnet-4.5", "type": "Closed", "scores": [13.84, 43.20, 35.79, 30.15, 37.80]}, |
| {"name": "Grok-4", "type": "Closed", "scores": [13.31, 37.12, 33.71, 29.01, 30.24]}, |
| ] |
|
|
| def build_leaderboard_df(): |
| task_cols = ["Scientific Deep Research", "Idea Generation", "Dry Experiment", "Wet Experiment", "Experimental Reasoning"] |
| rows = [] |
| for item in LEADERBOARD_DATA: |
| name = item["name"] |
| type = item["type"] |
| scores = item["scores"] |
| row = { |
| "Model": name, |
| "Type": type, |
| "SGI-Score": round(sum(scores) / len(scores), 2), |
| } |
| for i, col in enumerate(task_cols): |
| row[col] = scores[i] |
| rows.append(row) |
| cols = ["Model", "Type", "SGI-Score"] + task_cols |
| df = pd.DataFrame(rows, columns=cols).sort_values(by=["SGI-Score"], ascending=False).round(decimals=2) |
| return df |
|
|
| LEADERBOARD_DF = build_leaderboard_df() |
|
|
|
|
| def init_leaderboard(dataframe): |
| datatypes = ["str", "str", "number", "number", "number", "number", "number", "number"] |
| default_selection = ["Model","Type","SGI-Score","Scientific Deep Research","Idea Generation","Dry Experiment","Wet Experiment","Experimental Reasoning"] |
| cant_deselect = ["Model","Type",] |
| return Leaderboard( |
| value=dataframe, |
| datatype=datatypes, |
| select_columns=SelectColumns( |
| default_selection=default_selection, |
| cant_deselect=cant_deselect, |
| label="Select Columns to Display:", |
| ), |
| search_columns=["Model"], |
| hide_columns=[], |
| filter_columns=[ColumnFilter("Type", type="checkboxgroup", label="Model Types:")], |
| interactive=False, |
| ) |
|
|
|
|
| demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft()) |
| with demo: |
| gr.HTML(TITLE) |
| gr.HTML(INFO) |
| |
| |
| with gr.Group(elem_id="leaderboard-group"): |
| leaderboard = init_leaderboard(LEADERBOARD_DF) |
|
|
| with gr.Row(): |
| with gr.Column(): |
| with gr.Group(elem_id="citation-group"): |
| citation_button = gr.Textbox( |
| value=CITATION_BUTTON_TEXT, |
| label=CITATION_BUTTON_LABEL, |
| lines=CITATION_BUTTON_TEXT.count('\n')+1, |
| elem_id="citation-button", |
| show_copy_button=True, |
| interactive=False |
| ) |
|
|
| demo.queue(default_concurrency_limit=40).launch() |