| import os |
| import json |
| import re |
|
|
| |
| datasets = [ |
| "ImageNet-1K", "N24News", "HatefulMemes", "VOC2007", "SUN397", "Place365", "ImageNet-A", "ImageNet-R", "ObjectNet", "Country211", |
| "OK-VQA", "A-OKVQA", "DocVQA", "InfographicsVQA", "ChartQA", "Visual7W", "ScienceQA", "VizWiz", "GQA", "TextVQA", |
| "VisDial", "CIRR", "VisualNews_t2i", "VisualNews_i2t", "MSCOCO_t2i", "MSCOCO_i2t", "NIGHTS", "WebQA", "FashionIQ", "Wiki-SS-NQ", "OVEN", "EDIS", |
| "MSCOCO", "RefCOCO", "RefCOCO-Matching", "Visual7W-Pointing" |
| ] |
|
|
|
|
| |
| checkpoint_paths = [ |
| "checkpoint_dir/vlm2vec-qwen2vl-v2.0-2b/image/" |
| ] |
|
|
|
|
| |
| def extract_step(checkpoint_name): |
| match = re.search(r'checkpoint-(\d+)', checkpoint_name) |
| return int(match.group(1)) if match else float('inf') |
|
|
|
|
| |
| gathered_scores_by_exp = {} |
|
|
| |
| for checkpoint_path in checkpoint_paths: |
| print(checkpoint_path) |
| step = extract_step(checkpoint_path) |
| experiment_dir = checkpoint_path.split("/")[-3] |
|
|
| |
| if str.isdigit(str(step)): |
| |
| checkpoint_scores = {"experiment": experiment_dir, "checkpoint": str(step)} |
| else: |
| checkpoint_scores = {"experiment": experiment_dir, "checkpoint": "default"} |
|
|
| |
| for dataset in datasets: |
| score_file = os.path.join(checkpoint_path, f"{dataset}_score.json") |
|
|
| |
| if os.path.isfile(score_file): |
| with open(score_file, "r") as f: |
| score_data = json.load(f) |
| if "acc" in score_data: |
| score = score_data["acc"] |
| elif "precision@1" in score_data: |
| score = score_data["precision@1"] |
| else: |
| raise Exception(f'no valid metric (acc or precision@1) found in the {dataset}_score.json') |
| checkpoint_scores[dataset] = score |
| else: |
| checkpoint_scores[dataset] = "N/A" |
| print(checkpoint_scores) |
|
|
| |
| gathered_scores_by_exp[experiment_dir] = checkpoint_scores |
|
|
|
|
|
|
| print('\n' * 5) |
| |
| header = ["experiment", "checkpoint"] + datasets |
| print(",".join(header)) |
|
|
| for experiment, scores in gathered_scores_by_exp.items(): |
| row = [scores["experiment"], scores["checkpoint"]] + [str(scores[dataset]) for dataset in datasets] |
| print(",".join(row)) |
|
|
|
|
|
|
| header = ["dataset"] + list(gathered_scores_by_exp.keys()) |
| print(",".join(header)) |
| |
| |
| for dataset in datasets: |
| row = [] |
| for experiment, scores in gathered_scores_by_exp.items(): |
| row.append(str(scores[dataset])) |
| print(",".join([dataset] + row)) |
|
|
|
|
| import pandas as pd |
|
|
| |
| rows = [] |
| for dataset in datasets: |
| row = [dataset] |
| for experiment in gathered_scores_by_exp.keys(): |
| row.append(gathered_scores_by_exp[experiment][dataset]) |
| rows.append(row) |
|
|
| |
| df = pd.DataFrame(rows, columns=header) |
|
|
| |
| df.to_csv("output_scores.csv", index=False) |
| print("CSV saved to output_scores.csv") |
|
|
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|