| |
|
|
| |
| |
|
|
| import json |
| import random |
| import pandas as pd |
| import streamlit as st |
| from datasets import load_dataset |
| from datasets import get_dataset_config_names |
|
|
| st.title("Code:blue[Arena]") |
|
|
| problem_dict = dict() |
|
|
| |
| with st.spinner("Loading Venus data...", show_time=True): |
| venus_ds = load_dataset("Elfsong/leetcode_data", split='train') |
| for problem in venus_ds: |
| problem_id = problem["title"] |
| problem['type'] = "leetcode" |
| problem_dict[problem_id] = problem |
|
|
| |
| with st.spinner("Loading APPS data...", show_time=True): |
| apps_ds = load_dataset("Elfsong/APPS_Python", split='test') |
| for problem in apps_ds: |
| problem_id = f'apps_{problem["problem_id"]}' |
| problem['type'] = "apps" |
| problem_dict[problem_id] = problem |
| |
| problem_count = len(problem_dict) |
|
|
|
|
| if "problem" in st.query_params: |
| problem_id = str(st.query_params["problem"]) |
| problem_instance = problem_dict[problem_id] |
| problem_type = problem_instance['type'] |
|
|
| st.header(problem_id) |
| |
| with st.expander("Problem Description"): |
| if problem_type == "leetcode": |
| st.markdown(problem_instance["question_content"]) |
| elif problem_type == "apps": |
| st.markdown(problem_instance["problem_content"]) |
|
|
| with st.expander("Test Cases"): |
| test_cases = json.loads(problem_instance["test_cases"]) |
| df = pd.DataFrame( |
| { |
| "input": [test_case['input'] for test_case in test_cases], |
| "output": [test_case['output'] for test_case in test_cases], |
| } |
| ) |
| st.dataframe( |
| df, |
| column_config={ |
| "input": st.column_config.TextColumn("Input"), |
| "output": st.column_config.TextColumn("Output"), |
| }, |
| column_order=("input", "output"), |
| ) |
|
|
| with st.expander("Test Case Generator"): |
| if problem_type == "leetcode": |
| test_case_generator = problem_instance["test_case_generator"] |
| prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n" |
| test_case_generator = "\n".join(test_case_generator.split("\n")[:20]) |
| st.code(prompt+test_case_generator) |
| else: |
| st.code("Stay tuned!") |
|
|
| |
| else: |
| tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"]) |
|
|
| with tab_problem: |
| with st.spinner("Loading Framework...", show_time=True): |
| df = pd.DataFrame( |
| { |
| "problem_id": [int(problem['problem_id']) for problem in problem_dict.values()], |
| "difficulty": [str(problem['difficulty']) for problem in problem_dict.values()], |
| "type": [str(problem['type']) for problem in problem_dict.values()], |
| "problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + (str(problem['title']) if problem['type'] == "leetcode" else f'apps_{problem["problem_id"]}') for problem in problem_dict.values()], |
| "acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()], |
| } |
| ) |
| st.dataframe( |
| df, |
| column_config={ |
| "problem_id": st.column_config.NumberColumn("Problem ID", width='small'), |
| "difficulty": st.column_config.TextColumn("Difficulty", width='small'), |
| "type": st.column_config.TextColumn("Type", width='small'), |
| "acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100), |
| "problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'), |
| }, |
| height=800, |
| column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"), |
| hide_index=True, |
| ) |
|
|
| with tab_submission: |
| st.header("Submissions") |
| models = get_dataset_config_names("Elfsong/Venus_Model_Evaluation") |
| model_name = st.selectbox("Which model you are looking for?", models, placeholder="Select a model...") |
| st.write("You selected:", model_name) |
|
|
| with st.spinner("Loading Data...", show_time=True): |
| ds = load_dataset("Elfsong/Venus_Model_Evaluation", model_name, split='train') |
| df = pd.DataFrame( |
| { |
| "problem_id": [int(problem['problem_id']) for problem in ds], |
| "solution": [str(problem['solution']) for problem in ds], |
| } |
| ) |
| st.dataframe( |
| df, |
| column_config={ |
| "problem_id": st.column_config.NumberColumn("Problem ID", width='small'), |
| "solution": st.column_config.TextColumn("Solution", width='big'), |
| }, |
| height=800, |
| column_order=("problem_id", "solution"), |
| hide_index=True, |
| ) |
|
|
|
|
| with tab_model: |
| model_list = [ |
| "deepSeek-Coder", |
| "GPT-4o", |
| "Claude-3-5-sonnet", |
| "Gemini-1.5-flash", |
| "DeepSeek-Coder-V2-Lite", |
| "Claude-3-Opus", |
| "Gemini-1.5-pro", |
| "Llama-3.1-8B", |
| "Llama-3-8B", |
| "GPT-4-Turbo", |
| "GPT-3.5-Turbo", |
| "Mistral-Nemo", |
| "CodeLlama-13b", |
| "Claude-3-Haiku", |
| "Mistral-7B-v0.3", |
| "Codestral-22B-v0.1", |
| "Claude-3-sonnet", |
| "CodeLlama-34b", |
| "CodeLlama-7b" |
| ] |
|
|
|
|
| df = pd.DataFrame( |
| { |
| "model_name": [model_name for model_name in model_list], |
| "dynamic_point": [0 for model_name in model_list], |
| "pass@1": [0 for model_name in model_list], |
| "beyond@t": [0 for model_name in model_list], |
| "beyond@m": [0 for model_name in model_list], |
| "model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list], |
| } |
| ) |
|
|
| st.dataframe( |
| df, |
| column_config={ |
| "model_name": st.column_config.TextColumn("Model Name"), |
| "dynamic_point": st.column_config.NumberColumn("Dynamic Point"), |
| "pass@1": st.column_config.NumberColumn("Pass@1"), |
| "beyond@t": st.column_config.NumberColumn("Beyond@Time"), |
| "beyond@m": st.column_config.NumberColumn("Beyond@Memory"), |
| "model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"), |
| }, |
| column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"), |
| height=800, |
| ) |
|
|
| with tab_about: |
| st.write("Hello World!") |
| st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.") |
| st.write("🚧 WIP: We will update real data very soon!") |
|
|