import glob | |
import os | |
import json | |
import tqdm | |
import numpy as np | |
from multiprocessing import Pool | |
from autofaiss import build_index | |
def load_file(file): | |
return np.load(file) | |
in_dir = '/data0/ImageBindFeatures/cc3m/' | |
save_path = "cc3m_imagebind.npy" | |
files = glob.glob(in_dir + '*/*.npy') | |
pool = Pool(256) | |
results = pool.map(load_file, files) | |
results = np.stack(results, axis=0) | |
np.save(save_path, results) | |
with open('cc3m_imagebind_files.json', 'w') as f: | |
json.dump([x.replace(in_dir, '') for x in files], f) | |
# build index | |
build_index(embeddings="./", | |
index_path="knn.index", | |
index_infos_path="infos.json", | |
max_index_memory_usage = "32G", | |
current_memory_available = "100G", | |
metric_type='ip') |