csuhan commited on
Commit
decbc4d
1 Parent(s): 919748f

Upload with huggingface_hub

Browse files
.gitattributes CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ cc3m_imagebind_files.json filter=lfs diff=lfs merge=lfs -text
36
+ knn.index filter=lfs diff=lfs merge=lfs -text
build_cc3m_index.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import os
3
+ import json
4
+ import tqdm
5
+ import numpy as np
6
+ from multiprocessing import Pool
7
+ from autofaiss import build_index
8
+
9
+ def load_file(file):
10
+ return np.load(file)
11
+
12
+
13
+ in_dir = '/data0/ImageBindFeatures/cc3m/'
14
+ save_path = "cc3m_imagebind.npy"
15
+
16
+ files = glob.glob(in_dir + '*/*.npy')
17
+
18
+ pool = Pool(256)
19
+ results = pool.map(load_file, files)
20
+ results = np.stack(results, axis=0)
21
+
22
+ np.save(save_path, results)
23
+
24
+ with open('cc3m_imagebind_files.json', 'w') as f:
25
+ json.dump([x.replace(in_dir, '') for x in files], f)
26
+
27
+
28
+ # build index
29
+ build_index(embeddings="./",
30
+ index_path="knn.index",
31
+ index_infos_path="infos.json",
32
+ max_index_memory_usage = "32G",
33
+ current_memory_available = "100G",
34
+ metric_type='ip')
cc3m_imagebind_files.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9ea276a776b47c47868fc13bab2c1813f167c3155b6aab58d37dc2939e8df8
3
+ size 129571307
faiss_cc3m_search.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import torch
3
+
4
+ my_index = faiss.read_index("knn.index")
5
+
6
+
7
+ query = torch.rand(3, 1024)
8
+
9
+ k = 5
10
+ distances, indices = my_index.search(query, k)
11
+
12
+ import pdb;pdb.set_trace()
infos.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"index_key": "HNSW32", "index_param": "efSearch=16", "index_path": "/data1/llama_adapter_project/extract_ImageBind_features/autofaiss_cc3m/autofaiss/knn.index", "size in bytes": 13264998158, "avg_search_speed_ms": 36.55575554355653, "99p_search_speed_ms": 46.38748399913309, "reconstruction error %": 0.0, "nb vectors": 3036761, "vectors dimension": 1024, "compression ratio": 0.9376988151708419}
knn.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c39db2288566cebf9aca85306567ce54dd5d2459df8f568717be726af376560b
3
+ size 13264998158