import matplotlib matplotlib.use('Agg') import gradio as gr import pandas as pd from ui_components import ( create_leaderboard_display, get_full_leaderboard_data, create_winners_by_category_html, ) from content import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, INTRO_PARAGRAPH ) from visualizations import ( create_evolution_over_time_chart, create_accuracy_by_size_chart ) from constants import MARK_BY_DEFAULT # --- Global State for Viewers (simple caching) --- CACHED_VIEWERS = {} CACHED_TAG_MAPS = {} def filter_complete_entries(df: pd.DataFrame) -> pd.DataFrame: if df.empty: return df.copy() category_score_columns = [ 'Issue Resolution Score', 'Frontend Score', 'Greenfield Score', 'Testing Score', 'Information Gathering Score', ] if all(column in df.columns for column in category_score_columns): return df[df[category_score_columns].notna().all(axis=1)].copy() if 'Categories Completed' in df.columns: categories_completed = pd.to_numeric(df['Categories Completed'], errors='coerce') return df[categories_completed >= 5].copy() if 'Categories Attempted' in df.columns: return df[df['Categories Attempted'] == '5/5'].copy() return df.copy() def build_page(): with gr.Row(elem_id="intro-row"): with gr.Column(scale=1): gr.HTML(INTRO_PARAGRAPH, elem_id="intro-paragraph") # --- Leaderboard Display Section --- gr.Markdown("---") CATEGORY_NAME = "Overall" gr.HTML(f'

OpenHands Index {CATEGORY_NAME} Leaderboard (Aggregate)

', elem_id="main-header") test_df, test_tag_map = get_full_leaderboard_data("test") if not test_df.empty: show_incomplete_checkbox, show_open_only_checkbox, mark_by_dropdown = create_leaderboard_display( full_df=test_df, tag_map=test_tag_map, category_name=CATEGORY_NAME, split_name="test" ) test_df_complete = filter_complete_entries(test_df) has_complete_entries = len(test_df_complete) > 0 if 'Openness' in test_df.columns: test_df_open = test_df[test_df['Openness'].str.lower() == 'open'].copy() else: test_df_open = test_df.copy() test_df_complete_open = filter_complete_entries(test_df_open) initial_df = test_df_complete if has_complete_entries else test_df # --- Winners by Category Section --- gr.Markdown("---") gr.HTML('

Winners by Category

', elem_id="winners-header") gr.Markdown("Top 5 performing systems in each benchmark category.") winners_component = gr.HTML( create_winners_by_category_html(initial_df, top_n=5), elem_id="winners-by-category", ) # --- New Visualization Sections --- gr.Markdown("---") # Evolution Over Time Section gr.HTML('

Evolution Over Time

', elem_id="evolution-header") gr.Markdown("Track how model performance has improved over time based on release dates.") evolution_component = gr.Plot( value=create_evolution_over_time_chart(initial_df, MARK_BY_DEFAULT), elem_id="evolution-chart", ) gr.Markdown("---") # Open Model Accuracy by Size Section (always shows open models only by design) gr.HTML('

Open Model Accuracy by Size

', elem_id="size-accuracy-header") gr.Markdown("Compare open-weights model performance against their parameter count.") size_component = gr.Plot( value=create_accuracy_by_size_chart(initial_df, MARK_BY_DEFAULT), elem_id="size-accuracy-chart", ) def update_extra_sections(show_incomplete, show_open_only, mark_by): include_incomplete = show_incomplete or not has_complete_entries base_df = test_df if include_incomplete else test_df_complete base_df_open = test_df_open if include_incomplete else test_df_complete_open winners_df = base_df_open if show_open_only else base_df winners_html = create_winners_by_category_html(winners_df, top_n=5) evolution_fig = create_evolution_over_time_chart(winners_df, mark_by) size_fig = create_accuracy_by_size_chart(base_df, mark_by) return winners_html, evolution_fig, size_fig show_incomplete_input = show_incomplete_checkbox if show_incomplete_checkbox is not None else gr.State(value=True) show_open_only_input = show_open_only_checkbox if show_open_only_checkbox is not None else gr.State(value=False) extra_section_inputs = [show_incomplete_input, show_open_only_input, mark_by_dropdown] if show_incomplete_checkbox is not None: show_incomplete_checkbox.change( fn=update_extra_sections, inputs=extra_section_inputs, outputs=[winners_component, evolution_component, size_component] ) if show_open_only_checkbox is not None: show_open_only_checkbox.change( fn=update_extra_sections, inputs=extra_section_inputs, outputs=[winners_component, evolution_component, size_component] ) if mark_by_dropdown is not None: mark_by_dropdown.change( fn=update_extra_sections, inputs=extra_section_inputs, outputs=[winners_component, evolution_component, size_component] ) else: gr.Markdown("No data available.") if __name__ == "__main__": demo.launch()