Upload with huggingface_hub
Browse files- .gitattributes +2 -0
- build_cc3m_index.py +34 -0
- cc3m_imagebind_files.json +3 -0
- faiss_cc3m_search.py +12 -0
- infos.json +1 -0
- knn.index +3 -0
.gitattributes
CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
cc3m_imagebind_files.json filter=lfs diff=lfs merge=lfs -text
|
36 |
+
knn.index filter=lfs diff=lfs merge=lfs -text
|
build_cc3m_index.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import glob
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
import tqdm
|
5 |
+
import numpy as np
|
6 |
+
from multiprocessing import Pool
|
7 |
+
from autofaiss import build_index
|
8 |
+
|
9 |
+
def load_file(file):
|
10 |
+
return np.load(file)
|
11 |
+
|
12 |
+
|
13 |
+
in_dir = '/data0/ImageBindFeatures/cc3m/'
|
14 |
+
save_path = "cc3m_imagebind.npy"
|
15 |
+
|
16 |
+
files = glob.glob(in_dir + '*/*.npy')
|
17 |
+
|
18 |
+
pool = Pool(256)
|
19 |
+
results = pool.map(load_file, files)
|
20 |
+
results = np.stack(results, axis=0)
|
21 |
+
|
22 |
+
np.save(save_path, results)
|
23 |
+
|
24 |
+
with open('cc3m_imagebind_files.json', 'w') as f:
|
25 |
+
json.dump([x.replace(in_dir, '') for x in files], f)
|
26 |
+
|
27 |
+
|
28 |
+
# build index
|
29 |
+
build_index(embeddings="./",
|
30 |
+
index_path="knn.index",
|
31 |
+
index_infos_path="infos.json",
|
32 |
+
max_index_memory_usage = "32G",
|
33 |
+
current_memory_available = "100G",
|
34 |
+
metric_type='ip')
|
cc3m_imagebind_files.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed9ea276a776b47c47868fc13bab2c1813f167c3155b6aab58d37dc2939e8df8
|
3 |
+
size 129571307
|
faiss_cc3m_search.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import faiss
|
2 |
+
import torch
|
3 |
+
|
4 |
+
my_index = faiss.read_index("knn.index")
|
5 |
+
|
6 |
+
|
7 |
+
query = torch.rand(3, 1024)
|
8 |
+
|
9 |
+
k = 5
|
10 |
+
distances, indices = my_index.search(query, k)
|
11 |
+
|
12 |
+
import pdb;pdb.set_trace()
|
infos.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"index_key": "HNSW32", "index_param": "efSearch=16", "index_path": "/data1/llama_adapter_project/extract_ImageBind_features/autofaiss_cc3m/autofaiss/knn.index", "size in bytes": 13264998158, "avg_search_speed_ms": 36.55575554355653, "99p_search_speed_ms": 46.38748399913309, "reconstruction error %": 0.0, "nb vectors": 3036761, "vectors dimension": 1024, "compression ratio": 0.9376988151708419}
|
knn.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c39db2288566cebf9aca85306567ce54dd5d2459df8f568717be726af376560b
|
3 |
+
size 13264998158
|