Ana Sanchez commited on
Commit
5bd2a17
1 Parent(s): 66c0de1

Add data folder

Browse files
app.py CHANGED
@@ -27,17 +27,16 @@ from rdkit.Chem import AllChem
27
  from rdkit.Chem import DataStructs
28
 
29
 
30
-
31
-
32
  basepath = os.path.dirname(__file__)
 
33
 
34
- MODEL_PATH = os.path.join(basepath, "epoch_55.pt")
35
  CLOOME_PATH = "/home/ana/gitrepos/hti-cloob"
36
- npzs = os.path.join(basepath, "npzs")
 
 
 
 
37
  imgname = "I1"
38
- molecule_features = "all_molecule_cellpainting_features.pkl"
39
- image_features = "subset_image_cellpainting_features.pkl"
40
- images_arr = "subset_npzs_dict_.npz"
41
 
42
  device = "cuda" if torch.cuda.is_available() else "cpu"
43
  model_type = "RN50"
@@ -113,7 +112,6 @@ def get_features(dataset, model, device):
113
 
114
  all_ids.append(ids)
115
 
116
-
117
  all_ids = list(chain.from_iterable(all_ids))
118
 
119
  if imgs is not None and mols is not None:
@@ -158,9 +156,6 @@ def main(df, model_path, model, img_path=None, mol_path=None, image_resolution=N
158
  val_img_features, val_ids = result
159
  return val_img_features, val_ids
160
 
161
- #val_img_features, val_ids = get_features(val, model, device)
162
-
163
- #return val_img_features, val_text_features, val_ids
164
 
165
  def img_to_numpy(file):
166
  img = Image.open(file)
@@ -305,9 +300,6 @@ def reshape_image(arr):
305
 
306
  # missing functions: save morgan to to_hdf, create index, load features, calculate similarities
307
 
308
-
309
- #model = load(MODEL_PATH, device, model_type, image_resolution)
310
-
311
  ##### STREAMLIT FUNCTIONS ######
312
  st.title('CLOOME: Contrastive Learning for Molecule Representation with Microscopy Images and Chemical Structures')
313
 
@@ -375,22 +367,22 @@ def molecules_from_image():
375
  morgan = [morgan_from_smiles(s) for s in smiles]
376
  molnames = [f"M{i}" for i in range(len(morgan))]
377
  mol_index_fname = "mol_index.csv"
378
- mol_index = create_index(basepath, molnames, mol_index_fname)
379
- molpath = os.path.join(basepath, "mols.hdf")
380
  fps_fname = save_hdf(morgan, molnames, molpath)
381
  mol_imgs = draw_molecules(smiles)
382
  mol_features, mol_ids = main(mol_index, MODEL_PATH, model_type, mol_path=molpath, image_resolution=image_resolution)
383
  predefined_features = False
384
  else:
385
  mol_index = pd.read_csv("cellpainting-unique-molecule.csv")
386
- mol_features_torch = torch.load("all_molecule_cellpainting_features.pkl", map_location=device)
387
  mol_features = mol_features_torch["mol_features"]
388
  mol_ids = mol_features_torch["mol_ids"]
389
  print(len(mol_ids))
390
  predefined_features = True
391
 
392
  img_index_fname = "img_index.csv"
393
- img_index = create_index(basepath, imgname, img_index_fname)
394
  img_features, img_ids = main(img_index, MODEL_PATH, model_type, img_path=npzs, image_resolution=image_resolution)
395
 
396
  print(img_features.shape)
@@ -434,8 +426,8 @@ def images_from_molecule():
434
  morgan = [morgan_from_smiles(s) for s in smiles]
435
  molnames = [f"M{i}" for i in range(len(morgan))]
436
  mol_index_fname = "mol_index.csv"
437
- mol_index = create_index(basepath, molnames, mol_index_fname)
438
- molpath = os.path.join(basepath, "mols.hdf")
439
  fps_fname = save_hdf(morgan, molnames, molpath)
440
  mol_imgs = draw_molecules(smiles)
441
 
@@ -493,6 +485,3 @@ page_names_to_funcs = {
493
 
494
  selected_page = st.sidebar.selectbox("What would you like to retrieve?", page_names_to_funcs.keys())
495
  page_names_to_funcs[selected_page]()
496
-
497
- # print(img_features.shape)
498
- # print(img_ids)
 
27
  from rdkit.Chem import DataStructs
28
 
29
 
 
 
30
  basepath = os.path.dirname(__file__)
31
+ datapath = os.path.join(basepath, "data")
32
 
 
33
  CLOOME_PATH = "/home/ana/gitrepos/hti-cloob"
34
+ MODEL_PATH = os.path.join(datapath, "epoch_55.pt")
35
+ npzs = os.path.join(datapath, "npzs")
36
+ molecule_features = os.path.join(datapath, "all_molecule_cellpainting_features.pkl")
37
+ image_features = os.path.join(datapath, "subset_image_cellpainting_features.pkl")
38
+ images_arr = os.path.join(datapath, "subset_npzs_dict_.npz")
39
  imgname = "I1"
 
 
 
40
 
41
  device = "cuda" if torch.cuda.is_available() else "cpu"
42
  model_type = "RN50"
 
112
 
113
  all_ids.append(ids)
114
 
 
115
  all_ids = list(chain.from_iterable(all_ids))
116
 
117
  if imgs is not None and mols is not None:
 
156
  val_img_features, val_ids = result
157
  return val_img_features, val_ids
158
 
 
 
 
159
 
160
  def img_to_numpy(file):
161
  img = Image.open(file)
 
300
 
301
  # missing functions: save morgan to to_hdf, create index, load features, calculate similarities
302
 
 
 
 
303
  ##### STREAMLIT FUNCTIONS ######
304
  st.title('CLOOME: Contrastive Learning for Molecule Representation with Microscopy Images and Chemical Structures')
305
 
 
367
  morgan = [morgan_from_smiles(s) for s in smiles]
368
  molnames = [f"M{i}" for i in range(len(morgan))]
369
  mol_index_fname = "mol_index.csv"
370
+ mol_index = create_index(datapath, molnames, mol_index_fname)
371
+ molpath = os.path.join(datapath, "mols.hdf")
372
  fps_fname = save_hdf(morgan, molnames, molpath)
373
  mol_imgs = draw_molecules(smiles)
374
  mol_features, mol_ids = main(mol_index, MODEL_PATH, model_type, mol_path=molpath, image_resolution=image_resolution)
375
  predefined_features = False
376
  else:
377
  mol_index = pd.read_csv("cellpainting-unique-molecule.csv")
378
+ mol_features_torch = torch.load(molecule_features, map_location=device)
379
  mol_features = mol_features_torch["mol_features"]
380
  mol_ids = mol_features_torch["mol_ids"]
381
  print(len(mol_ids))
382
  predefined_features = True
383
 
384
  img_index_fname = "img_index.csv"
385
+ img_index = create_index(datapath, imgname, img_index_fname)
386
  img_features, img_ids = main(img_index, MODEL_PATH, model_type, img_path=npzs, image_resolution=image_resolution)
387
 
388
  print(img_features.shape)
 
426
  morgan = [morgan_from_smiles(s) for s in smiles]
427
  molnames = [f"M{i}" for i in range(len(morgan))]
428
  mol_index_fname = "mol_index.csv"
429
+ mol_index = create_index(datapath, molnames, mol_index_fname)
430
+ molpath = os.path.join(datapath, "mols.hdf")
431
  fps_fname = save_hdf(morgan, molnames, molpath)
432
  mol_imgs = draw_molecules(smiles)
433
 
 
485
 
486
  selected_page = st.sidebar.selectbox("What would you like to retrieve?", page_names_to_funcs.keys())
487
  page_names_to_funcs[selected_page]()
 
 
 
data/all_molecule_cellpainting_features.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8979250025350c6ff67f986c657c14a881710cfe73e315ef5d126abaecf50b4b
3
+ size 62906027
data/cellpainting-all-imgpermol.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10cc3285a7b1c3275c30b2aa3654d00651ae4211d5e057118f32c40725e09ff
3
+ size 14270985
data/cellpainting-unique-molecule.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65592c0ee09203c2ba5be15c159c7944c49feea65a24cfb7862bb49af7cd112a
3
+ size 14265091
data/epoch_55.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c612c6da6f943caac839b9102fe98ba944838600942897aaa035f552d9a535bd
3
+ size 352013623
data/subset_image_cellpainting_features.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a7cbfdf80d0ee6197f4e9118be2d7569399601d6c55c4db7fbc2dcbeadd9d6a
3
+ size 62906027
data/subset_npzs_dict_.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ddb3451d5d46a1eed6613c914777e013a6d9a392cfc4f9448d0b9488d099da1
3
+ size 3656596390