| from __future__ import annotations |
|
|
| import json |
| import inspect |
| import os |
| import shutil |
| import tempfile |
| import traceback |
| from datetime import datetime, timezone |
| from pathlib import Path |
|
|
| import gradio as gr |
| import pandas as pd |
| from huggingface_hub import Repository |
| from starlette.templating import Jinja2Templates |
|
|
| try: |
| import gradio_client.utils as gradio_client_utils |
|
|
| _original_get_type = gradio_client_utils.get_type |
|
|
| def _safe_get_type(schema): |
| if isinstance(schema, bool): |
| return "boolean" |
| return _original_get_type(schema) |
|
|
| gradio_client_utils.get_type = _safe_get_type |
| except Exception: |
| pass |
|
|
| try: |
| _original_template_response = Jinja2Templates.TemplateResponse |
| _template_response_params = list(inspect.signature(_original_template_response).parameters.keys()) |
| _template_response_request_first = len(_template_response_params) > 1 and _template_response_params[1] == "request" |
|
|
| def _compat_template_response(self, *args, **kwargs): |
| request = kwargs.pop("request", None) |
| name = kwargs.pop("name", None) |
| context = kwargs.pop("context", None) |
|
|
| if args: |
| if len(args) == 1: |
| if isinstance(args[0], str): |
| name = args[0] |
| else: |
| request = args[0] |
| else: |
| if isinstance(args[0], str) and isinstance(args[1], dict): |
| name = args[0] |
| context = args[1] |
| request = context.get("request", request) |
| else: |
| request = args[0] |
| name = args[1] |
| if len(args) > 2: |
| context = args[2] |
|
|
| if context is None: |
| context = {} |
| if request is None and isinstance(context, dict): |
| request = context.get("request") |
| if request is None: |
| raise TypeError("TemplateResponse requires a request object") |
| if not isinstance(context, dict): |
| context = dict(context) |
| if "request" not in context: |
| context = dict(context) |
| context["request"] = request |
|
|
| if _template_response_request_first: |
| return _original_template_response(self, request, name, context, **kwargs) |
| return _original_template_response(self, name, context, **kwargs) |
|
|
| Jinja2Templates.TemplateResponse = _compat_template_response |
| except Exception: |
| pass |
|
|
| from constants import ( |
| ALL_COLUMNS, |
| CITATION, |
| EXTERNAL_LINKS, |
| GOLD_PATHS, |
| HF_TOKEN, |
| INTRODUCTION, |
| MODEL_COLUMNS, |
| SCORE_COLUMNS, |
| SEED_LEADERBOARD_PATH, |
| SPACE_SUBTITLE, |
| SPACE_TITLE, |
| SUBMISSION_CSV_PATH, |
| SUBMISSION_REPO_ID, |
| SUBMISSION_REPO_TYPE, |
| SUBMIT_GUIDANCE, |
| ) |
| from eval import evaluate_submission |
|
|
|
|
| def _empty_leaderboard(): |
| return pd.DataFrame(columns=ALL_COLUMNS) |
|
|
|
|
| def _normalize_leaderboard_df(df): |
| for col in SCORE_COLUMNS: |
| if col in df.columns: |
| df[col] = pd.to_numeric(df[col], errors="coerce") |
| return df |
|
|
|
|
| def _seed_leaderboard(): |
| if not SEED_LEADERBOARD_PATH.exists(): |
| return _empty_leaderboard() |
|
|
| df = pd.read_csv(SEED_LEADERBOARD_PATH) |
| for col in ALL_COLUMNS: |
| if col not in df.columns: |
| df[col] = "" |
| return _normalize_leaderboard_df(df[ALL_COLUMNS]) |
|
|
|
|
| def _clone_submission_repo(): |
| if not SUBMISSION_REPO_ID: |
| return None, Path(".") |
|
|
| local_dir = Path(tempfile.mkdtemp(prefix="rpc_bench_submission_")) |
| repo = Repository( |
| local_dir=str(local_dir), |
| clone_from=SUBMISSION_REPO_ID, |
| repo_type=SUBMISSION_REPO_TYPE, |
| use_auth_token=HF_TOKEN, |
| ) |
| repo.git_pull() |
| return repo, local_dir |
|
|
|
|
| def _load_leaderboard(): |
| try: |
| seed_df = _seed_leaderboard() |
| repo, local_dir = _clone_submission_repo() |
| if repo is None: |
| return seed_df.sort_values(by=["Info"], ascending=False, na_position="last") |
|
|
| csv_path = local_dir / SUBMISSION_CSV_PATH |
| if not csv_path.exists(): |
| return seed_df.sort_values(by=["Info"], ascending=False, na_position="last") |
|
|
| df = pd.read_csv(csv_path) |
| for col in ALL_COLUMNS: |
| if col not in df.columns: |
| df[col] = "" |
| merged = pd.concat([seed_df, _normalize_leaderboard_df(df[ALL_COLUMNS])], ignore_index=True) |
| return merged.sort_values(by=["Info"], ascending=False, na_position="last") |
| except Exception: |
| print(traceback.format_exc()) |
| return _seed_leaderboard().sort_values(by=["Info"], ascending=False, na_position="last") |
|
|
|
|
| def _validate_submission_file(file_path): |
| path = Path(file_path) |
| if not path.exists(): |
| return False, "Uploaded file does not exist.", [] |
| if path.suffix.lower() not in {".jsonl", ".json"}: |
| return False, "Submission file must be JSONL or JSON.", [] |
|
|
| rows = [] |
| try: |
| if path.suffix.lower() == ".json": |
| loaded = json.loads(path.read_text(encoding="utf-8")) |
| if not isinstance(loaded, list): |
| return False, "JSON submissions must be a list of records.", [] |
| rows = loaded |
| else: |
| with path.open("r", encoding="utf-8") as f: |
| for line in f: |
| line = line.strip() |
| if not line: |
| continue |
| rows.append(json.loads(line)) |
| except Exception as exc: |
| return False, f"Failed to parse submission file: {exc}", [] |
|
|
| required = {"id", "part_idx", "question", "gen_answer", "category"} |
| for idx, row in enumerate(rows, start=1): |
| missing = required - set(row.keys()) |
| if missing: |
| return False, f"Row {idx} is missing fields: {sorted(missing)}", [] |
| return True, "Submission format is valid.", rows |
|
|
|
|
| def _append_submission_record(local_dir, leaderboard, row): |
| csv_path = local_dir / SUBMISSION_CSV_PATH |
| merged = pd.concat([leaderboard, pd.DataFrame([row])], ignore_index=True) |
| merged = merged.reindex(columns=ALL_COLUMNS) |
| merged.to_csv(csv_path, index=False) |
| return merged |
|
|
|
|
| def submit_prediction( |
| input_file, |
| model_name: str, |
| organization: str, |
| revision: str, |
| model_link: str, |
| input_config: str, |
| split: str, |
| ): |
| if input_file is None: |
| return "Error: please upload a prediction file.", gr.update(value=_load_leaderboard()) |
|
|
| path = input_file if isinstance(input_file, str) else getattr(input_file, "name", None) |
| if not path: |
| return "Error: could not access the uploaded file.", gr.update(value=_load_leaderboard()) |
|
|
| ok, message, _ = _validate_submission_file(path) |
| if not ok: |
| return f"Error: {message}", gr.update(value=_load_leaderboard()) |
|
|
| try: |
| repo, local_dir = _clone_submission_repo() |
| leaderboard = _load_leaderboard() |
|
|
| now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") |
| display_name = revision.strip() or model_name.strip() |
| if model_link.strip() and "](" not in display_name: |
| display_name = f"[{display_name}]({model_link.strip()})" |
|
|
| status = "pending" |
| score_row = {k: "" for k in SCORE_COLUMNS} |
| split_path = GOLD_PATHS.get(split.lower()) |
|
|
| if os.environ.get("OPENAI_API_KEY") and split_path and split_path.exists(): |
| eval_dir = local_dir / ".eval" if repo is not None else Path(tempfile.mkdtemp(prefix="rpc_bench_eval_")) |
| try: |
| score_row = evaluate_submission(split_path, path, eval_dir) |
| status = "scored" |
| except Exception: |
| print(traceback.format_exc()) |
| status = "uploaded, evaluation failed" |
| else: |
| status = "uploaded, evaluation pending" |
|
|
| record = { |
| "Model": display_name, |
| "Organization": organization.strip(), |
| "Input Config": input_config.strip().upper(), |
| "Date": now, |
| "Status": status, |
| **{k: score_row.get(k, "") for k in SCORE_COLUMNS}, |
| } |
|
|
| if repo is None: |
| return ( |
| "Submission accepted, but no submission repository is configured. " |
| "Set `SUBMISSION_REPO_ID` to enable persistent leaderboard updates.", |
| gr.update(value=_load_leaderboard()), |
| ) |
|
|
| submissions_dir = local_dir / "submissions" |
| submissions_dir.mkdir(parents=True, exist_ok=True) |
| stored_name = f"{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}_{Path(path).name}" |
| shutil.copy2(path, submissions_dir / stored_name) |
|
|
| updated_leaderboard = _append_submission_record(local_dir, leaderboard, record) |
| repo.push_to_hub() |
|
|
| return f"OK: {message}. Status: {status}", gr.update(value=updated_leaderboard) |
| except Exception as exc: |
| print(traceback.format_exc()) |
| return f"Error: {exc}", gr.update(value=_load_leaderboard()) |
|
|
|
|
| def refresh_leaderboard(): |
| return gr.update(value=_load_leaderboard()) |
|
|
|
|
| with gr.Blocks(title=SPACE_TITLE) as demo: |
| gr.Markdown(EXTERNAL_LINKS) |
| gr.Markdown(f"# {SPACE_TITLE}") |
| gr.Markdown(SPACE_SUBTITLE) |
| gr.Markdown(INTRODUCTION) |
|
|
| with gr.Tabs(): |
| with gr.TabItem("🏅 Leaderboard"): |
| with gr.Row(): |
| refresh_btn = gr.Button("Refresh") |
| leaderboard = gr.Dataframe( |
| value=_load_leaderboard(), |
| headers=ALL_COLUMNS, |
| datatype=["markdown", "str", "str", "str", "str", "number", "number", "number", "number", "number"], |
| interactive=False, |
| wrap=True, |
| ) |
| refresh_btn.click(fn=refresh_leaderboard, inputs=None, outputs=leaderboard) |
|
|
| with gr.TabItem("📝 Submit"): |
| gr.Markdown(SUBMIT_GUIDANCE) |
| with gr.Row(): |
| with gr.Column(): |
| model_name = gr.Textbox(label="Model name", placeholder="Your model name") |
| organization = gr.Textbox(label="Organization", placeholder="Your lab, company, or team name") |
| revision = gr.Textbox(label="Revision name", placeholder="Optional revision label") |
| with gr.Column(): |
| model_link = gr.Textbox(label="Model link", placeholder="https://huggingface.co/...") |
| input_config = gr.Dropdown( |
| choices=["TEXT", "VISUAL"], |
| value="TEXT", |
| label="Input config", |
| interactive=True, |
| ) |
| split = gr.Dropdown( |
| choices=["test", "dev"], |
| value="test", |
| label="Evaluation split", |
| interactive=True, |
| ) |
|
|
| input_file = gr.File(label="Upload prediction file", file_count="single", type="filepath") |
| submit_btn = gr.Button("Submit and evaluate") |
| submit_result = gr.Markdown() |
|
|
| submit_btn.click( |
| fn=submit_prediction, |
| inputs=[input_file, model_name, organization, revision, model_link, input_config, split], |
| outputs=[submit_result, leaderboard], |
| ) |
|
|
| with gr.TabItem("ℹ️ About"): |
| gr.Markdown("## Citation") |
| gr.Markdown(f"```bibtex\n{CITATION}\n```") |
|
|
| gr.Markdown( |
| "If you want inline evaluation, configure `OPENAI_API_KEY` and `OPENAI_BASE_URL` in the Space secrets." |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch(show_api=False) |
|
|