Spaces:
Running
Running
File size: 5,849 Bytes
6375fde 9cf02ac 1b38b03 6375fde ed7ed80 6375fde | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | """Seed the public leaderboard dataset with the paper's Table 1 baselines.
Generates one JSON record per system under seeds/submissions/<uuid>.json.
UUIDs are deterministic (uuid5 of the method name), so re-running this script
overwrites existing seed records rather than creating duplicates.
To publish to the public dataset, either:
(a) Drag-drop the resulting folder onto the HF UI of
Ted412/EgoMemReason-Leaderboard → submissions/, or
(b) Run:
from huggingface_hub import HfApi
HfApi().upload_folder(
folder_path="seeds/submissions",
path_in_repo="submissions",
repo_id="Ted412/EgoMemReason-Leaderboard",
repo_type="dataset",
)
"""
import json
import pathlib
import uuid
# Stable seed namespace so uuid5 ids are reproducible across runs.
SEED_NS = uuid.UUID("5a4f2b1c-0000-4ab2-9c01-deadbeef0001")
# Authors uploaded the paper's numbers under this HF handle.
SEED_HF_USER = "Ted412"
SEED_DATE = "2026-05-01T00:00:00Z" # COLM submission date — adjust if needed.
# Source of truth: Table 1 of the paper PDF (Long_Video_Memory_for_Arxiv_v24.pdf).
# Columns: Tracking, Counting, Ordering, Linking, Spatial, Activity, Overall.
PAPER_TABLE = [
# (method_name, category, model_size, uses_external_data, modality,
# tracking, counting, ordering, linking, spatial, activity, overall)
("Random", "Baseline", "—", False, "other",
19.6, 16.7, 11.1, 17.3, 19.3, 19.2, 16.8),
("InternVL3.5-8B", "General MLLM", "8B", False, "frames-only",
23.0, 29.0, 23.0, 27.0, 34.0, 42.0, 28.0),
("Qwen-3-VL-8B", "General MLLM", "8B", False, "frames-only",
35.0, 28.0, 23.0, 21.0, 40.0, 42.0, 29.6),
("InternVL3.5-38B", "General MLLM", "38B", False, "frames-only",
33.0, 40.0, 27.0, 24.0, 46.0, 32.0, 32.6),
("Qwen-3-VL-30B-A3B", "General MLLM", "30B MoE", False, "frames-only",
36.0, 48.0, 25.0, 26.0, 40.0, 30.0, 34.0),
("Qwen-3-VL-32B", "General MLLM", "32B", False, "frames-only",
35.0, 46.0, 27.0, 27.0, 50.0, 46.0, 36.8),
("GPT-5", "General MLLM", "API", False, "frames-only",
29.0, 42.0, 20.0, 18.0, 32.0, 28.0, 27.8),
("Gemini-3-Flash", "General MLLM", "API", False, "frames-only",
46.0, 28.0, 36.0, 44.0, 44.0, 44.0, 39.6),
("Gemini-3.1-Pro", "General MLLM", "API", False, "frames-only",
40.0, 26.0, 44.0, 33.0, 40.0, 48.0, 37.4),
("LongVA-7B", "Video-specific MLLM", "7B", False, "frames-only",
22.0, 18.0, 20.0, 20.0, 20.0, 22.0, 20.6),
("StreamingVLM", "Video-specific MLLM", "—", False, "video-only",
25.0, 29.0, 21.0, 20.0, 20.0, 32.0, 24.2),
("InternVideo-2.5-8B", "Video-specific MLLM", "8B", False, "frames-only",
29.0, 27.0, 25.0, 15.0, 32.0, 32.0, 25.6),
("VideoLLaMA3-8B", "Video-specific MLLM", "8B", False, "frames-only",
23.0, 31.0, 27.0, 32.0, 38.0, 36.0, 30.0),
("Molmo2-8B", "Video-specific MLLM", "8B", False, "frames-only",
36.0, 50.0, 27.0, 25.0, 34.0, 22.0, 33.2),
("SiLVR", "Agentic Video Framework", "API + 8B", False, "captions-only",
31.0, 14.0, 27.0, 17.0, 18.0, 28.0, 22.4),
("Ego-R1", "Agentic Video Framework", "API", False, "frames-only",
30.0, 18.0, 23.0, 18.0, 48.0, 32.0, 25.8),
("WorldMM", "Agentic Video Framework", "API", False, "frames-only",
32.0, 44.0, 21.0, 21.0, 34.0, 36.0, 30.6),
("AVP", "Agentic Video Framework", "API", False, "frames-only",
34.0, 42.0, 31.0, 27.0, 38.0, 34.0, 34.0),
]
PROJECT_URL = "https://github.com/Ziyang412/EgoMemReason"
PUBLICATION_URL = "https://arxiv.org/abs/2605.09874"
def build_record(row):
(name, category, size, uses_ext, modality,
tracking, counting, ordering, linking, spatial, activity, overall) = row
sid = str(uuid.uuid5(SEED_NS, f"paper-baseline:{name}"))
return {
"submission_id": sid,
"submitted_at_utc": SEED_DATE,
"hf_user_id": SEED_HF_USER,
"team_name": "EgoMemReason",
"method_name": name,
"model_size": size,
"uses_external_data": uses_ext,
"uses_video_frames": modality,
"method_description": f"Baseline from Table 1 of the EgoMemReason paper ({category}).",
"project_url": PROJECT_URL,
"publication_url": PUBLICATION_URL,
"is_selected": True,
"metrics": {
"Cumulative State Tracking": tracking,
"Temporal Counting": counting,
"Event Ordering": ordering,
"Event Linking": linking,
"Spatial Preference": spatial,
"Activity Pattern": activity,
"Overall": overall,
},
}
def main():
out_dir = pathlib.Path(__file__).resolve().parents[1] / "seeds" / "submissions"
out_dir.mkdir(parents=True, exist_ok=True)
for row in PAPER_TABLE:
record = build_record(row)
path = out_dir / f"{record['submission_id']}.json"
path.write_text(json.dumps(record, indent=2))
print(f"Wrote {len(PAPER_TABLE)} seed records to {out_dir}/")
print("Next: upload to Ted412/EgoMemReason-Leaderboard via HF UI or:")
print(" python -c \"from huggingface_hub import HfApi; "
"HfApi().upload_folder("
"folder_path='seeds/submissions', "
"path_in_repo='submissions', "
"repo_id='Ted412/EgoMemReason-Leaderboard', "
"repo_type='dataset')\"")
if __name__ == "__main__":
main()
|