Spaces:
Running
Running
| """EgoMemReason leaderboard β Gradio Space app. | |
| Tabs: | |
| - Leaderboard public, auto-refresh, toggle selected-only / show-all | |
| - Submit HF login required; JSON upload + metadata form | |
| - Manage toggle is_selected on your own past submissions | |
| - About paper description + citation | |
| """ | |
| import os | |
| # Workaround for a long-standing gradio_client bug that hits the /info endpoint | |
| # when any component emits a JSON schema with `additionalProperties: True/False` | |
| # (a plain bool). Both get_type() and _json_schema_to_python_type() assume the | |
| # schema is a dict and crash on bools. Patch both before Gradio loads its | |
| # FastAPI routes. | |
| import gradio_client.utils as _gcu | |
| _orig_get_type = _gcu.get_type | |
| def _safe_get_type(schema): | |
| if not isinstance(schema, dict): | |
| return "Any" | |
| return _orig_get_type(schema) | |
| _gcu.get_type = _safe_get_type | |
| _orig_json_schema = _gcu._json_schema_to_python_type | |
| def _safe_json_schema(schema, defs=None): | |
| if not isinstance(schema, dict): | |
| # `additionalProperties: True` β accepts anything; `False` β accepts nothing. | |
| return "Any" if schema else "None" | |
| return _orig_json_schema(schema, defs) | |
| _gcu._json_schema_to_python_type = _safe_json_schema | |
| import gradio as gr | |
| import pandas as pd | |
| import auth | |
| import evaluator | |
| import ledger | |
| # --------------------------------------------------------------------------- | |
| # Boot: pull annotations_private.json from the private dataset repo. | |
| # --------------------------------------------------------------------------- | |
| try: | |
| ledger.ensure_private_annotations() | |
| except RuntimeError as e: | |
| # In local dev without HF_TOKEN, allow the app to come up with a clear banner. | |
| BOOT_ERROR = str(e) | |
| else: | |
| BOOT_ERROR = None | |
| LEADERBOARD_COLUMNS = [ | |
| "#", | |
| "Method", | |
| "Team", | |
| "Overall", | |
| "Cumul", | |
| "Count", | |
| "Order", | |
| "Link", | |
| "Spatial", | |
| "Activity", | |
| "Size", | |
| "Ext", | |
| "Modality", | |
| "Links", | |
| ] | |
| def _row_from_submission(sub, rank): | |
| m = sub["metrics"] | |
| links = [] | |
| if sub.get("project_url"): | |
| links.append(f"[project]({sub['project_url']})") | |
| if sub.get("publication_url"): | |
| links.append(f"[paper]({sub['publication_url']})") | |
| return [ | |
| rank, | |
| sub["method_name"], | |
| sub["team_name"], | |
| m["Overall"], | |
| m["Cumulative State Tracking"], | |
| m["Temporal Counting"], | |
| m["Event Ordering"], | |
| m["Event Linking"], | |
| m["Spatial Preference"], | |
| m["Activity Pattern"], | |
| sub.get("model_size") or "β", | |
| "yes" if sub.get("uses_external_data") else "no", | |
| sub.get("uses_video_frames") or "β", | |
| " Β· ".join(links) or "β", | |
| ] | |
| def load_leaderboard(show_all): | |
| subs = ledger.list_submissions() | |
| if not show_all: | |
| subs = [s for s in subs if s.get("is_selected")] | |
| subs = sorted(subs, key=lambda s: s["metrics"]["Overall"], reverse=True) | |
| rows = [_row_from_submission(s, i + 1) for i, s in enumerate(subs)] | |
| return pd.DataFrame(rows, columns=LEADERBOARD_COLUMNS) | |
| # --------------------------------------------------------------------------- | |
| # Submit | |
| # --------------------------------------------------------------------------- | |
| def handle_submission(file, team_name, method_name, model_size, uses_external, | |
| uses_frames, method_description, project_url, | |
| publication_url, profile: gr.OAuthProfile | None): | |
| user = auth.resolve_user(profile) | |
| if user is None: | |
| return "**Error:** sign in with Hugging Face first (button at the top of the page)." | |
| if not team_name or not method_name: | |
| return "**Error:** `team_name` and `method_name` are required." | |
| if uses_external not in ("yes", "no"): | |
| return "**Error:** answer `Uses external data?` (yes/no)." | |
| if not uses_frames: | |
| return "**Error:** pick a video input modality." | |
| if file is None: | |
| return "**Error:** upload a `.json` submission file." | |
| recent = ledger.count_recent(user, hours=24) | |
| if recent >= 5: | |
| return (f"**Rate limit:** you have **{recent}** submissions in the last 24 h " | |
| "(max 5). Try again later.") | |
| try: | |
| metrics = evaluator.score_submission(file.name) | |
| except ValueError as e: | |
| return f"**Validation error:**\n```\n{e}\n```" | |
| except Exception as e: | |
| return f"**Internal error scoring submission:** `{type(e).__name__}: {e}`" | |
| try: | |
| sid = ledger.append_submission( | |
| hf_user_id=user, | |
| team_name=team_name, | |
| method_name=method_name, | |
| model_size=model_size, | |
| uses_external_data=(uses_external == "yes"), | |
| uses_video_frames=uses_frames, | |
| method_description=method_description, | |
| project_url=project_url, | |
| publication_url=publication_url, | |
| metrics=metrics, | |
| ) | |
| except Exception as e: | |
| return (f"**Scored, but failed to persist to ledger:** `{type(e).__name__}: {e}`\n\n" | |
| f"Your metrics were:\n```\n{metrics}\n```") | |
| rows = "\n".join(f"| {k} | **{v:.2f}** |" for k, v in metrics.items()) | |
| return ( | |
| f"β **Submission logged.** `submission_id = {sid}`\n\n" | |
| f"| Metric | Score (%) |\n|---|---|\n{rows}\n\n" | |
| "Go to **Manage my submissions** to mark this as your official entry." | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Manage | |
| # --------------------------------------------------------------------------- | |
| MANAGE_COLUMNS = ["submission_id", "method_name", "Overall", "is_selected", "submitted_at_utc"] | |
| def load_my_submissions(profile: gr.OAuthProfile | None): | |
| user = auth.resolve_user(profile) | |
| if user is None: | |
| return pd.DataFrame(columns=MANAGE_COLUMNS) | |
| rows = [] | |
| for sub in ledger.list_submissions(): | |
| if sub.get("hf_user_id") != user: | |
| continue | |
| rows.append([ | |
| sub["submission_id"], | |
| sub["method_name"], | |
| sub["metrics"]["Overall"], | |
| sub.get("is_selected", False), | |
| sub.get("submitted_at_utc", ""), | |
| ]) | |
| rows.sort(key=lambda r: r[4], reverse=True) | |
| return pd.DataFrame(rows, columns=MANAGE_COLUMNS) | |
| def set_my_selected(submission_id, profile: gr.OAuthProfile | None): | |
| user = auth.resolve_user(profile) | |
| if user is None: | |
| return "**Error:** sign in first.", load_my_submissions(profile) | |
| if not submission_id or not submission_id.strip(): | |
| return "**Error:** paste a submission_id.", load_my_submissions(profile) | |
| try: | |
| ledger.set_selected(submission_id.strip(), user) | |
| except (ValueError, PermissionError) as e: | |
| return f"**Error:** {e}", load_my_submissions(profile) | |
| return f"β `{submission_id.strip()}` is now your selected entry.", load_my_submissions(profile) | |
| # --------------------------------------------------------------------------- | |
| # About | |
| # --------------------------------------------------------------------------- | |
| ABOUT_MD = """\ | |
| ## EgoMemReason | |
| **A Memory-driven Reasoning Benchmark for Long-Horizon Egocentric Video Understanding.** | |
| EgoMemReason is a 500-question multiple-choice benchmark over week-long egocentric | |
| videos (built on [EgoLife](https://egolife-ai.github.io/)). Models must answer | |
| questions whose evidence is sparsely distributed across hours or days, exercising | |
| three memory types: | |
| - **Entity memory** β Cumulative State Tracking, Temporal Counting | |
| - **Event memory** β Event Ordering, Event Linking | |
| - **Behavior memory** β Spatial Preference Inference, Activity Pattern Inference | |
| 500 Qs Β· avg. 5.1 evidence segments / Q Β· avg. 25.9 h memory backtracking. The | |
| strongest model in the paper reaches **39.6% Overall**. | |
| ### Resources | |
| - π Project page: <https://egomemreason.github.io/> | |
| - π Paper: <https://arxiv.org/abs/2605.09874> | |
| - π» Code & reference eval scripts: <https://github.com/Ziyang412/EgoMemReason> | |
| - π¦ Public questions (no answers): <https://huggingface.co/datasets/Ted412/EgoMemReason> | |
| - π¬ EgoLife video frames: <https://egolife-ai.github.io/> | |
| ### Submission | |
| Upload a JSON file with 500 entries: | |
| ```json | |
| [ | |
| {"example_id": 1, "predicted_answer": "A"}, | |
| ... | |
| ] | |
| ``` | |
| Questions have 4-10 options (letters A-J) β `predicted_answer` must be a letter | |
| that appears in that question's `options` dict. See | |
| [SUBMISSION_FORMAT.md](https://github.com/Ziyang412/EgoMemReason/blob/main/SUBMISSION_FORMAT.md) | |
| for the full spec. | |
| ### License | |
| - **Annotations** (this Space + the public dataset): CC BY-NC 4.0. | |
| - **Video frames**: governed by the [EgoLife data license](https://egolife-ai.github.io/) β you must accept their terms separately. | |
| ### Citation | |
| ```bibtex | |
| @misc{wang2026egomemreasonmemorydrivenreasoningbenchmark, | |
| title={EgoMemReason: A Memory-Driven Reasoning Benchmark for Long-Horizon Egocentric Video Understanding}, | |
| author={Ziyang Wang and Yue Zhang and Shoubin Yu and Ce Zhang and Zengqi Zhao and Jaehong Yoon and Hyunji Lee and Gedas Bertasius and Mohit Bansal}, | |
| year={2026}, | |
| eprint={2605.09874}, | |
| archivePrefix={arXiv}, | |
| primaryClass={cs.CV}, | |
| url={https://arxiv.org/abs/2605.09874}, | |
| } | |
| ``` | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # UI | |
| # --------------------------------------------------------------------------- | |
| with gr.Blocks(title="EgoMemReason Leaderboard") as demo: | |
| gr.Markdown("# π§ EgoMemReason β Leaderboard") | |
| gr.Markdown( | |
| "*Memory-driven reasoning over week-long egocentric video. 500 MCQs Β· " | |
| "Entity / Event / Behavior memory.*" | |
| ) | |
| if BOOT_ERROR: | |
| gr.Markdown(f"β οΈ **Boot warning:** {BOOT_ERROR}\n\nSubmissions are disabled.") | |
| login_btn = gr.LoginButton() | |
| with gr.Tab("About"): | |
| gr.Markdown(ABOUT_MD) | |
| with gr.Tab("Leaderboard"): | |
| with gr.Row(): | |
| show_all = gr.Checkbox( | |
| value=False, | |
| label="Show all submissions (not just each team's selected entry)", | |
| ) | |
| refresh_btn = gr.Button("Refresh", size="sm") | |
| leaderboard_df = gr.Dataframe( | |
| value=load_leaderboard(False), | |
| headers=LEADERBOARD_COLUMNS, | |
| interactive=False, | |
| wrap=False, | |
| ) | |
| show_all.change(load_leaderboard, inputs=[show_all], outputs=[leaderboard_df]) | |
| refresh_btn.click(load_leaderboard, inputs=[show_all], outputs=[leaderboard_df]) | |
| with gr.Tab("Submit"): | |
| gr.Markdown("**Sign in with Hugging Face (button above) before submitting.** " | |
| "Limit: 5 submissions per HF user per 24 h.") | |
| with gr.Row(): | |
| team_name = gr.Textbox(label="Team name *", max_lines=1) | |
| method_name = gr.Textbox(label="Method name *", max_lines=1) | |
| with gr.Row(): | |
| model_size = gr.Textbox(label="Model size (e.g. 8B, 32B, API)", max_lines=1) | |
| uses_external = gr.Radio( | |
| ["yes", "no"], label="Uses training data beyond EgoLife? *", | |
| ) | |
| uses_frames = gr.Radio( | |
| ["frames-only", "video-only", "frames+audio", "captions-only", "other"], | |
| label="Video input modality *", | |
| ) | |
| method_description = gr.Textbox(label="Method description", lines=3) | |
| with gr.Row(): | |
| project_url = gr.Textbox(label="Project URL", max_lines=1) | |
| publication_url = gr.Textbox(label="Publication URL (arXiv/OpenReview)", max_lines=1) | |
| submission_file = gr.File(label="submission.json", file_types=[".json"]) | |
| submit_btn = gr.Button("Score & log", variant="primary") | |
| result_md = gr.Markdown() | |
| submit_btn.click( | |
| handle_submission, | |
| inputs=[submission_file, team_name, method_name, model_size, | |
| uses_external, uses_frames, method_description, | |
| project_url, publication_url], | |
| outputs=[result_md], | |
| ) | |
| with gr.Tab("Manage my submissions"): | |
| gr.Markdown( | |
| "Toggle which of your past submissions is the official **selected** entry. " | |
| "Only your own submissions appear here. " | |
| "Only one entry per HF user can be selected at a time." | |
| ) | |
| my_subs = gr.Dataframe( | |
| value=pd.DataFrame(columns=MANAGE_COLUMNS), | |
| headers=MANAGE_COLUMNS, | |
| interactive=False, | |
| wrap=False, | |
| ) | |
| selected_id = gr.Textbox(label="submission_id to mark as selected", max_lines=1) | |
| select_btn = gr.Button("Mark as my selected entry") | |
| manage_msg = gr.Markdown() | |
| demo.load(load_my_submissions, outputs=[my_subs]) | |
| select_btn.click(set_my_selected, inputs=[selected_id], outputs=[manage_msg, my_subs]) | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |