File size: 5,849 Bytes
6375fde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9cf02ac
1b38b03
6375fde
 
 
 
 
 
 
 
 
 
ed7ed80
6375fde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""Seed the public leaderboard dataset with the paper's Table 1 baselines.

Generates one JSON record per system under seeds/submissions/<uuid>.json.
UUIDs are deterministic (uuid5 of the method name), so re-running this script
overwrites existing seed records rather than creating duplicates.

To publish to the public dataset, either:
  (a) Drag-drop the resulting folder onto the HF UI of
      Ted412/EgoMemReason-Leaderboard → submissions/, or
  (b) Run:
        from huggingface_hub import HfApi
        HfApi().upload_folder(
            folder_path="seeds/submissions",
            path_in_repo="submissions",
            repo_id="Ted412/EgoMemReason-Leaderboard",
            repo_type="dataset",
        )
"""

import json
import pathlib
import uuid

# Stable seed namespace so uuid5 ids are reproducible across runs.
SEED_NS = uuid.UUID("5a4f2b1c-0000-4ab2-9c01-deadbeef0001")

# Authors uploaded the paper's numbers under this HF handle.
SEED_HF_USER = "Ted412"
SEED_DATE = "2026-05-01T00:00:00Z"  # COLM submission date — adjust if needed.

# Source of truth: Table 1 of the paper PDF (Long_Video_Memory_for_Arxiv_v24.pdf).
# Columns: Tracking, Counting, Ordering, Linking, Spatial, Activity, Overall.
PAPER_TABLE = [
    # (method_name, category, model_size, uses_external_data, modality,
    #  tracking, counting, ordering, linking, spatial, activity, overall)
    ("Random",              "Baseline",                  "—",      False, "other",
     19.6, 16.7, 11.1, 17.3, 19.3, 19.2, 16.8),

    ("InternVL3.5-8B",      "General MLLM",              "8B",     False, "frames-only",
     23.0, 29.0, 23.0, 27.0, 34.0, 42.0, 28.0),
    ("Qwen-3-VL-8B",        "General MLLM",              "8B",     False, "frames-only",
     35.0, 28.0, 23.0, 21.0, 40.0, 42.0, 29.6),
    ("InternVL3.5-38B",     "General MLLM",              "38B",    False, "frames-only",
     33.0, 40.0, 27.0, 24.0, 46.0, 32.0, 32.6),
    ("Qwen-3-VL-30B-A3B",   "General MLLM",              "30B MoE", False, "frames-only",
     36.0, 48.0, 25.0, 26.0, 40.0, 30.0, 34.0),
    ("Qwen-3-VL-32B",       "General MLLM",              "32B",    False, "frames-only",
     35.0, 46.0, 27.0, 27.0, 50.0, 46.0, 36.8),
    ("GPT-5",               "General MLLM",              "API",    False, "frames-only",
     29.0, 42.0, 20.0, 18.0, 32.0, 28.0, 27.8),
    ("Gemini-3-Flash",      "General MLLM",              "API",    False, "frames-only",
     46.0, 28.0, 36.0, 44.0, 44.0, 44.0, 39.6),
    ("Gemini-3.1-Pro",      "General MLLM",              "API",    False, "frames-only",
     40.0, 26.0, 44.0, 33.0, 40.0, 48.0, 37.4),

    ("LongVA-7B",           "Video-specific MLLM",       "7B",     False, "frames-only",
     22.0, 18.0, 20.0, 20.0, 20.0, 22.0, 20.6),
    ("StreamingVLM",        "Video-specific MLLM",       "—",      False, "video-only",
     25.0, 29.0, 21.0, 20.0, 20.0, 32.0, 24.2),
    ("InternVideo-2.5-8B",  "Video-specific MLLM",       "8B",     False, "frames-only",
     29.0, 27.0, 25.0, 15.0, 32.0, 32.0, 25.6),
    ("VideoLLaMA3-8B",      "Video-specific MLLM",       "8B",     False, "frames-only",
     23.0, 31.0, 27.0, 32.0, 38.0, 36.0, 30.0),
    ("Molmo2-8B",           "Video-specific MLLM",       "8B",     False, "frames-only",
     36.0, 50.0, 27.0, 25.0, 34.0, 22.0, 33.2),

    ("SiLVR",               "Agentic Video Framework",   "API + 8B", False, "captions-only",
     31.0, 14.0, 27.0, 17.0, 18.0, 28.0, 22.4),
    ("Ego-R1",              "Agentic Video Framework",   "API",    False, "frames-only",
     30.0, 18.0, 23.0, 18.0, 48.0, 32.0, 25.8),
    ("WorldMM",             "Agentic Video Framework",   "API",    False, "frames-only",
     32.0, 44.0, 21.0, 21.0, 34.0, 36.0, 30.6),
    ("AVP",                 "Agentic Video Framework",   "API",    False, "frames-only",
     34.0, 42.0, 31.0, 27.0, 38.0, 34.0, 34.0),
]

PROJECT_URL     = "https://github.com/Ziyang412/EgoMemReason"
PUBLICATION_URL = "https://arxiv.org/abs/2605.09874"


def build_record(row):
    (name, category, size, uses_ext, modality,
     tracking, counting, ordering, linking, spatial, activity, overall) = row
    sid = str(uuid.uuid5(SEED_NS, f"paper-baseline:{name}"))
    return {
        "submission_id": sid,
        "submitted_at_utc": SEED_DATE,
        "hf_user_id": SEED_HF_USER,
        "team_name": "EgoMemReason",
        "method_name": name,
        "model_size": size,
        "uses_external_data": uses_ext,
        "uses_video_frames": modality,
        "method_description": f"Baseline from Table 1 of the EgoMemReason paper ({category}).",
        "project_url": PROJECT_URL,
        "publication_url": PUBLICATION_URL,
        "is_selected": True,
        "metrics": {
            "Cumulative State Tracking": tracking,
            "Temporal Counting":         counting,
            "Event Ordering":            ordering,
            "Event Linking":             linking,
            "Spatial Preference":        spatial,
            "Activity Pattern":          activity,
            "Overall":                   overall,
        },
    }


def main():
    out_dir = pathlib.Path(__file__).resolve().parents[1] / "seeds" / "submissions"
    out_dir.mkdir(parents=True, exist_ok=True)

    for row in PAPER_TABLE:
        record = build_record(row)
        path = out_dir / f"{record['submission_id']}.json"
        path.write_text(json.dumps(record, indent=2))

    print(f"Wrote {len(PAPER_TABLE)} seed records to {out_dir}/")
    print("Next: upload to Ted412/EgoMemReason-Leaderboard via HF UI or:")
    print("  python -c \"from huggingface_hub import HfApi; "
          "HfApi().upload_folder("
          "folder_path='seeds/submissions', "
          "path_in_repo='submissions', "
          "repo_id='Ted412/EgoMemReason-Leaderboard', "
          "repo_type='dataset')\"")


if __name__ == "__main__":
    main()