import glob import os import json import tqdm import numpy as np from multiprocessing import Pool from autofaiss import build_index def load_file(file): return np.load(file) in_dir = '/data0/ImageBindFeatures/cc3m/' save_path = "cc3m_imagebind.npy" files = glob.glob(in_dir + '*/*.npy') pool = Pool(256) results = pool.map(load_file, files) results = np.stack(results, axis=0) np.save(save_path, results) with open('cc3m_imagebind_files.json', 'w') as f: json.dump([x.replace(in_dir, '') for x in files], f) # build index build_index(embeddings="./", index_path="knn.index", index_infos_path="infos.json", max_index_memory_usage = "32G", current_memory_available = "100G", metric_type='ip')