| import glob | |
| import os | |
| import json | |
| import tqdm | |
| import numpy as np | |
| from multiprocessing import Pool | |
| from autofaiss import build_index | |
| def load_file(file): | |
| return np.load(file) | |
| in_dir = '/data0/ImageBindFeatures/cc3m/' | |
| save_path = "cc3m_imagebind.npy" | |
| files = glob.glob(in_dir + '*/*.npy') | |
| pool = Pool(256) | |
| results = pool.map(load_file, files) | |
| results = np.stack(results, axis=0) | |
| np.save(save_path, results) | |
| with open('cc3m_imagebind_files.json', 'w') as f: | |
| json.dump([x.replace(in_dir, '') for x in files], f) | |
| # build index | |
| build_index(embeddings="./", | |
| index_path="knn.index", | |
| index_infos_path="infos.json", | |
| max_index_memory_usage = "32G", | |
| current_memory_available = "100G", | |
| metric_type='ip') |