Spaces:
Running
Running
| """Seed the public leaderboard dataset with the paper's Table 1 baselines. | |
| Generates one JSON record per system under seeds/submissions/<uuid>.json. | |
| UUIDs are deterministic (uuid5 of the method name), so re-running this script | |
| overwrites existing seed records rather than creating duplicates. | |
| To publish to the public dataset, either: | |
| (a) Drag-drop the resulting folder onto the HF UI of | |
| Ted412/EgoMemReason-Leaderboard → submissions/, or | |
| (b) Run: | |
| from huggingface_hub import HfApi | |
| HfApi().upload_folder( | |
| folder_path="seeds/submissions", | |
| path_in_repo="submissions", | |
| repo_id="Ted412/EgoMemReason-Leaderboard", | |
| repo_type="dataset", | |
| ) | |
| """ | |
| import json | |
| import pathlib | |
| import uuid | |
| # Stable seed namespace so uuid5 ids are reproducible across runs. | |
| SEED_NS = uuid.UUID("5a4f2b1c-0000-4ab2-9c01-deadbeef0001") | |
| # Authors uploaded the paper's numbers under this HF handle. | |
| SEED_HF_USER = "Ted412" | |
| SEED_DATE = "2026-05-01T00:00:00Z" # COLM submission date — adjust if needed. | |
| # Source of truth: Table 1 of the paper PDF (Long_Video_Memory_for_Arxiv_v24.pdf). | |
| # Columns: Tracking, Counting, Ordering, Linking, Spatial, Activity, Overall. | |
| PAPER_TABLE = [ | |
| # (method_name, category, model_size, uses_external_data, modality, | |
| # tracking, counting, ordering, linking, spatial, activity, overall) | |
| ("Random", "Baseline", "—", False, "other", | |
| 19.6, 16.7, 11.1, 17.3, 19.3, 19.2, 16.8), | |
| ("InternVL3.5-8B", "General MLLM", "8B", False, "frames-only", | |
| 23.0, 29.0, 23.0, 27.0, 34.0, 42.0, 28.0), | |
| ("Qwen-3-VL-8B", "General MLLM", "8B", False, "frames-only", | |
| 35.0, 28.0, 23.0, 21.0, 40.0, 42.0, 29.6), | |
| ("InternVL3.5-38B", "General MLLM", "38B", False, "frames-only", | |
| 33.0, 40.0, 27.0, 24.0, 46.0, 32.0, 32.6), | |
| ("Qwen-3-VL-30B-A3B", "General MLLM", "30B MoE", False, "frames-only", | |
| 36.0, 48.0, 25.0, 26.0, 40.0, 30.0, 34.0), | |
| ("Qwen-3-VL-32B", "General MLLM", "32B", False, "frames-only", | |
| 35.0, 46.0, 27.0, 27.0, 50.0, 46.0, 36.8), | |
| ("GPT-5", "General MLLM", "API", False, "frames-only", | |
| 29.0, 42.0, 20.0, 18.0, 32.0, 28.0, 27.8), | |
| ("Gemini-3-Flash", "General MLLM", "API", False, "frames-only", | |
| 46.0, 28.0, 36.0, 44.0, 44.0, 44.0, 39.6), | |
| ("Gemini-3.1-Pro", "General MLLM", "API", False, "frames-only", | |
| 40.0, 26.0, 44.0, 33.0, 40.0, 48.0, 37.4), | |
| ("LongVA-7B", "Video-specific MLLM", "7B", False, "frames-only", | |
| 22.0, 18.0, 20.0, 20.0, 20.0, 22.0, 20.6), | |
| ("StreamingVLM", "Video-specific MLLM", "—", False, "video-only", | |
| 25.0, 29.0, 21.0, 20.0, 20.0, 32.0, 24.2), | |
| ("InternVideo-2.5-8B", "Video-specific MLLM", "8B", False, "frames-only", | |
| 29.0, 27.0, 25.0, 15.0, 32.0, 32.0, 25.6), | |
| ("VideoLLaMA3-8B", "Video-specific MLLM", "8B", False, "frames-only", | |
| 23.0, 31.0, 27.0, 32.0, 38.0, 36.0, 30.0), | |
| ("Molmo2-8B", "Video-specific MLLM", "8B", False, "frames-only", | |
| 36.0, 50.0, 27.0, 25.0, 34.0, 22.0, 33.2), | |
| ("SiLVR", "Agentic Video Framework", "API + 8B", False, "captions-only", | |
| 31.0, 14.0, 27.0, 17.0, 18.0, 28.0, 22.4), | |
| ("Ego-R1", "Agentic Video Framework", "API", False, "frames-only", | |
| 30.0, 18.0, 23.0, 18.0, 48.0, 32.0, 25.8), | |
| ("WorldMM", "Agentic Video Framework", "API", False, "frames-only", | |
| 32.0, 44.0, 21.0, 21.0, 34.0, 36.0, 30.6), | |
| ("AVP", "Agentic Video Framework", "API", False, "frames-only", | |
| 34.0, 42.0, 31.0, 27.0, 38.0, 34.0, 34.0), | |
| ] | |
| PROJECT_URL = "https://github.com/Ziyang412/EgoMemReason" | |
| PUBLICATION_URL = "https://arxiv.org/abs/2605.09874" | |
| def build_record(row): | |
| (name, category, size, uses_ext, modality, | |
| tracking, counting, ordering, linking, spatial, activity, overall) = row | |
| sid = str(uuid.uuid5(SEED_NS, f"paper-baseline:{name}")) | |
| return { | |
| "submission_id": sid, | |
| "submitted_at_utc": SEED_DATE, | |
| "hf_user_id": SEED_HF_USER, | |
| "team_name": "EgoMemReason", | |
| "method_name": name, | |
| "model_size": size, | |
| "uses_external_data": uses_ext, | |
| "uses_video_frames": modality, | |
| "method_description": f"Baseline from Table 1 of the EgoMemReason paper ({category}).", | |
| "project_url": PROJECT_URL, | |
| "publication_url": PUBLICATION_URL, | |
| "is_selected": True, | |
| "metrics": { | |
| "Cumulative State Tracking": tracking, | |
| "Temporal Counting": counting, | |
| "Event Ordering": ordering, | |
| "Event Linking": linking, | |
| "Spatial Preference": spatial, | |
| "Activity Pattern": activity, | |
| "Overall": overall, | |
| }, | |
| } | |
| def main(): | |
| out_dir = pathlib.Path(__file__).resolve().parents[1] / "seeds" / "submissions" | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| for row in PAPER_TABLE: | |
| record = build_record(row) | |
| path = out_dir / f"{record['submission_id']}.json" | |
| path.write_text(json.dumps(record, indent=2)) | |
| print(f"Wrote {len(PAPER_TABLE)} seed records to {out_dir}/") | |
| print("Next: upload to Ted412/EgoMemReason-Leaderboard via HF UI or:") | |
| print(" python -c \"from huggingface_hub import HfApi; " | |
| "HfApi().upload_folder(" | |
| "folder_path='seeds/submissions', " | |
| "path_in_repo='submissions', " | |
| "repo_id='Ted412/EgoMemReason-Leaderboard', " | |
| "repo_type='dataset')\"") | |
| if __name__ == "__main__": | |
| main() | |