Spaces:

Ted412
/

EgoMemReason

Running

File size: 5,849 Bytes

"""Seed the public leaderboard dataset with the paper's Table 1 baselines.

Generates one JSON record per system under seeds/submissions/<uuid>.json.
UUIDs are deterministic (uuid5 of the method name), so re-running this script
overwrites existing seed records rather than creating duplicates.

To publish to the public dataset, either:
  (a) Drag-drop the resulting folder onto the HF UI of
      Ted412/EgoMemReason-Leaderboard → submissions/, or
  (b) Run:
        from huggingface_hub import HfApi
        HfApi().upload_folder(
            folder_path="seeds/submissions",
            path_in_repo="submissions",
            repo_id="Ted412/EgoMemReason-Leaderboard",
            repo_type="dataset",
        )
"""

import json
import pathlib
import uuid

# Stable seed namespace so uuid5 ids are reproducible across runs.
SEED_NS = uuid.UUID("5a4f2b1c-0000-4ab2-9c01-deadbeef0001")

# Authors uploaded the paper's numbers under this HF handle.
SEED_HF_USER = "Ted412"
SEED_DATE = "2026-05-01T00:00:00Z"  # COLM submission date — adjust if needed.

# Source of truth: Table 1 of the paper PDF (Long_Video_Memory_for_Arxiv_v24.pdf).
# Columns: Tracking, Counting, Ordering, Linking, Spatial, Activity, Overall.
PAPER_TABLE = [
    # (method_name, category, model_size, uses_external_data, modality,
    #  tracking, counting, ordering, linking, spatial, activity, overall)
    ("Random",              "Baseline",                  "—",      False, "other",
     19.6, 16.7, 11.1, 17.3, 19.3, 19.2, 16.8),

    ("InternVL3.5-8B",      "General MLLM",              "8B",     False, "frames-only",
     23.0, 29.0, 23.0, 27.0, 34.0, 42.0, 28.0),
    ("Qwen-3-VL-8B",        "General MLLM",              "8B",     False, "frames-only",
     35.0, 28.0, 23.0, 21.0, 40.0, 42.0, 29.6),
    ("InternVL3.5-38B",     "General MLLM",              "38B",    False, "frames-only",
     33.0, 40.0, 27.0, 24.0, 46.0, 32.0, 32.6),
    ("Qwen-3-VL-30B-A3B",   "General MLLM",              "30B MoE", False, "frames-only",
     36.0, 48.0, 25.0, 26.0, 40.0, 30.0, 34.0),
    ("Qwen-3-VL-32B",       "General MLLM",              "32B",    False, "frames-only",
     35.0, 46.0, 27.0, 27.0, 50.0, 46.0, 36.8),
    ("GPT-5",               "General MLLM",              "API",    False, "frames-only",
     29.0, 42.0, 20.0, 18.0, 32.0, 28.0, 27.8),
    ("Gemini-3-Flash",      "General MLLM",              "API",    False, "frames-only",
     46.0, 28.0, 36.0, 44.0, 44.0, 44.0, 39.6),
    ("Gemini-3.1-Pro",      "General MLLM",              "API",    False, "frames-only",
     40.0, 26.0, 44.0, 33.0, 40.0, 48.0, 37.4),

    ("LongVA-7B",           "Video-specific MLLM",       "7B",     False, "frames-only",
     22.0, 18.0, 20.0, 20.0, 20.0, 22.0, 20.6),
    ("StreamingVLM",        "Video-specific MLLM",       "—",      False, "video-only",
     25.0, 29.0, 21.0, 20.0, 20.0, 32.0, 24.2),
    ("InternVideo-2.5-8B",  "Video-specific MLLM",       "8B",     False, "frames-only",
     29.0, 27.0, 25.0, 15.0, 32.0, 32.0, 25.6),
    ("VideoLLaMA3-8B",      "Video-specific MLLM",       "8B",     False, "frames-only",
     23.0, 31.0, 27.0, 32.0, 38.0, 36.0, 30.0),
    ("Molmo2-8B",           "Video-specific MLLM",       "8B",     False, "frames-only",
     36.0, 50.0, 27.0, 25.0, 34.0, 22.0, 33.2),

    ("SiLVR",               "Agentic Video Framework",   "API + 8B", False, "captions-only",
     31.0, 14.0, 27.0, 17.0, 18.0, 28.0, 22.4),
    ("Ego-R1",              "Agentic Video Framework",   "API",    False, "frames-only",
     30.0, 18.0, 23.0, 18.0, 48.0, 32.0, 25.8),
    ("WorldMM",             "Agentic Video Framework",   "API",    False, "frames-only",
     32.0, 44.0, 21.0, 21.0, 34.0, 36.0, 30.6),
    ("AVP",                 "Agentic Video Framework",   "API",    False, "frames-only",
     34.0, 42.0, 31.0, 27.0, 38.0, 34.0, 34.0),
]

PROJECT_URL     = "https://github.com/Ziyang412/EgoMemReason"
PUBLICATION_URL = "https://arxiv.org/abs/2605.09874"


def build_record(row):
    (name, category, size, uses_ext, modality,
     tracking, counting, ordering, linking, spatial, activity, overall) = row
    sid = str(uuid.uuid5(SEED_NS, f"paper-baseline:{name}"))
    return {
        "submission_id": sid,
        "submitted_at_utc": SEED_DATE,
        "hf_user_id": SEED_HF_USER,
        "team_name": "EgoMemReason",
        "method_name": name,
        "model_size": size,
        "uses_external_data": uses_ext,
        "uses_video_frames": modality,
        "method_description": f"Baseline from Table 1 of the EgoMemReason paper ({category}).",
        "project_url": PROJECT_URL,
        "publication_url": PUBLICATION_URL,
        "is_selected": True,
        "metrics": {
            "Cumulative State Tracking": tracking,
            "Temporal Counting":         counting,
            "Event Ordering":            ordering,
            "Event Linking":             linking,
            "Spatial Preference":        spatial,
            "Activity Pattern":          activity,
            "Overall":                   overall,
        },
    }


def main():
    out_dir = pathlib.Path(__file__).resolve().parents[1] / "seeds" / "submissions"
    out_dir.mkdir(parents=True, exist_ok=True)

    for row in PAPER_TABLE:
        record = build_record(row)
        path = out_dir / f"{record['submission_id']}.json"
        path.write_text(json.dumps(record, indent=2))

    print(f"Wrote {len(PAPER_TABLE)} seed records to {out_dir}/")
    print("Next: upload to Ted412/EgoMemReason-Leaderboard via HF UI or:")
    print("  python -c \"from huggingface_hub import HfApi; "
          "HfApi().upload_folder("
          "folder_path='seeds/submissions', "
          "path_in_repo='submissions', "
          "repo_id='Ted412/EgoMemReason-Leaderboard', "
          "repo_type='dataset')\"")


if __name__ == "__main__":
    main()