MiniCPM-Evaluation / scripts /merge_shards.py
Rakancorle11's picture
Upload folder using huggingface_hub
b2c2640 verified
#!/usr/bin/env python3
"""Merge sharded eval_results.shard*.jsonl files and recompute metrics.
Usage:
python merge_shards.py --bench videomme \
--label-dir /home/ubuntu/eval_results/videomme/vmme_minicpmo_4_5
The script finds all `eval_results.shard*.jsonl` under `--label-dir`,
concatenates them into `eval_results.jsonl` (deduping by a bench-specific
primary key), then re-runs the bench's `compute_metrics` + `print_summary`.
Final outputs: `eval_results.jsonl`, `metrics.json`, `summary.txt`.
"""
from __future__ import annotations
import _common # noqa: F401
import argparse
import contextlib
import io
import json
import sys
from pathlib import Path
# Primary key per bench (must match the field written by each eval script).
PK = {
"videomme": "question_id",
"lvbench": "uid",
"worldsense": "question_id",
"daily_omni": "question_id",
"dpo_sync": "video",
"vggsoundsync": "uid",
}
# Extra label used when printing the summary
LABEL_HINT = {
"videomme": "Video-MME",
"lvbench": "LVBench",
"worldsense": "WorldSense",
"daily_omni": "Daily-Omni",
"dpo_sync": "Sync",
"vggsoundsync": "VGGSoundSync",
}
def main() -> int:
p = argparse.ArgumentParser()
p.add_argument("--bench", required=True,
choices=list(PK.keys()),
help="Which benchmark this label-dir belongs to.")
p.add_argument("--label-dir", type=Path, required=True,
help="Eval output dir containing eval_results.shard*.jsonl.")
args = p.parse_args()
ch = _common.ch(args.bench)
pk = PK[args.bench]
shard_files = sorted(args.label_dir.glob("eval_results.shard*.jsonl"))
if not shard_files:
print(f"[merge] ERROR: no eval_results.shard*.jsonl in {args.label_dir}",
file=sys.stderr)
return 1
print(f"[merge] Found {len(shard_files)} shard file(s):")
for sf in shard_files:
print(f" - {sf.name}")
merged_path = args.label_dir / "eval_results.jsonl"
all_results = []
seen: set = set()
n_dup = 0
with open(merged_path, "w", encoding="utf-8") as out:
for sf in shard_files:
with open(sf) as f:
for line in f:
line = line.strip()
if not line:
continue
obj = json.loads(line)
key = obj.get(pk)
if key in seen:
n_dup += 1
continue
seen.add(key)
out.write(line + "\n")
all_results.append(obj)
print(f"[merge] Merged {len(all_results)} unique results "
f"({n_dup} duplicates skipped) -> {merged_path}")
metrics = ch.compute_metrics(all_results)
# Preserve eval_config from any shard if present
for sf in shard_files:
try:
with open(sf) as f:
first = f.readline().strip()
if first:
obj = json.loads(first)
if "eval_config" in obj:
metrics["eval_config"] = obj["eval_config"]
break
except Exception:
pass
metrics_json = args.label_dir / "metrics.json"
summary_txt = args.label_dir / "summary.txt"
with open(metrics_json, "w", encoding="utf-8") as f:
json.dump(metrics, f, indent=2, ensure_ascii=False)
label = args.label_dir.name
ch.print_summary(metrics, label)
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
ch.print_summary(metrics, label)
with open(summary_txt, "w", encoding="utf-8") as f:
f.write(buf.getvalue())
print(f"\n[merge] Done.")
print(f" Results: {merged_path}")
print(f" Metrics: {metrics_json}")
print(f" Summary: {summary_txt}")
return 0
if __name__ == "__main__":
sys.exit(main())