Spaces:

anasanchezf
/

cloome

Sleeping

App Files Files Community

Ana Sanchez commited on Sep 9, 2022

Commit

5bd2a17

•

1 Parent(s): 66c0de1

Add data folder

Browse files

Files changed (7) hide show

app.py +12 -23
data/all_molecule_cellpainting_features.pkl +3 -0
data/cellpainting-all-imgpermol.csv +3 -0
data/cellpainting-unique-molecule.csv +3 -0
data/epoch_55.pt +3 -0
data/subset_image_cellpainting_features.pkl +3 -0
data/subset_npzs_dict_.npz +3 -0

app.py CHANGED Viewed

@@ -27,17 +27,16 @@ from rdkit.Chem import AllChem
 from rdkit.Chem import DataStructs
 basepath = os.path.dirname(__file__)
-MODEL_PATH = os.path.join(basepath, "epoch_55.pt")
 CLOOME_PATH = "/home/ana/gitrepos/hti-cloob"
-npzs = os.path.join(basepath, "npzs")
 imgname = "I1"
-molecule_features = "all_molecule_cellpainting_features.pkl"
-image_features = "subset_image_cellpainting_features.pkl"
-images_arr = "subset_npzs_dict_.npz"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_type = "RN50"
@@ -113,7 +112,6 @@ def get_features(dataset, model, device):
                 all_ids.append(ids)
         all_ids = list(chain.from_iterable(all_ids))
     if imgs is not None and mols is not None:
@@ -158,9 +156,6 @@ def main(df, model_path, model, img_path=None, mol_path=None, image_resolution=N
         val_img_features, val_ids = result
         return val_img_features, val_ids
-    #val_img_features, val_ids = get_features(val, model, device)
-    #return val_img_features, val_text_features, val_ids
 def img_to_numpy(file):
     img = Image.open(file)
@@ -305,9 +300,6 @@ def reshape_image(arr):
 # missing functions: save morgan to to_hdf, create index, load features, calculate similarities
-#model = load(MODEL_PATH, device, model_type, image_resolution)
 ##### STREAMLIT FUNCTIONS ######
 st.title('CLOOME: Contrastive Learning for Molecule Representation with Microscopy Images and Chemical Structures')
@@ -375,22 +367,22 @@ def molecules_from_image():
             morgan = [morgan_from_smiles(s) for s in smiles]
             molnames = [f"M{i}" for i in range(len(morgan))]
             mol_index_fname = "mol_index.csv"
-            mol_index = create_index(basepath, molnames, mol_index_fname)
-            molpath = os.path.join(basepath, "mols.hdf")
             fps_fname = save_hdf(morgan, molnames, molpath)
             mol_imgs = draw_molecules(smiles)
             mol_features, mol_ids = main(mol_index, MODEL_PATH, model_type, mol_path=molpath, image_resolution=image_resolution)
             predefined_features = False
         else:
             mol_index = pd.read_csv("cellpainting-unique-molecule.csv")
-            mol_features_torch = torch.load("all_molecule_cellpainting_features.pkl", map_location=device)
             mol_features = mol_features_torch["mol_features"]
             mol_ids = mol_features_torch["mol_ids"]
             print(len(mol_ids))
             predefined_features = True
         img_index_fname = "img_index.csv"
-        img_index = create_index(basepath, imgname, img_index_fname)
         img_features, img_ids = main(img_index, MODEL_PATH, model_type, img_path=npzs, image_resolution=image_resolution)
         print(img_features.shape)
@@ -434,8 +426,8 @@ def images_from_molecule():
         morgan = [morgan_from_smiles(s) for s in smiles]
         molnames = [f"M{i}" for i in range(len(morgan))]
         mol_index_fname = "mol_index.csv"
-        mol_index = create_index(basepath, molnames, mol_index_fname)
-        molpath = os.path.join(basepath, "mols.hdf")
         fps_fname = save_hdf(morgan, molnames, molpath)
         mol_imgs = draw_molecules(smiles)
@@ -493,6 +485,3 @@ page_names_to_funcs = {
 selected_page = st.sidebar.selectbox("What would you like to retrieve?", page_names_to_funcs.keys())
 page_names_to_funcs[selected_page]()
-# print(img_features.shape)
-# print(img_ids)

 from rdkit.Chem import DataStructs
 basepath = os.path.dirname(__file__)
+datapath = os.path.join(basepath, "data")
 CLOOME_PATH = "/home/ana/gitrepos/hti-cloob"
+MODEL_PATH = os.path.join(datapath, "epoch_55.pt")
+npzs = os.path.join(datapath, "npzs")
+molecule_features = os.path.join(datapath, "all_molecule_cellpainting_features.pkl")
+image_features = os.path.join(datapath, "subset_image_cellpainting_features.pkl")
+images_arr = os.path.join(datapath, "subset_npzs_dict_.npz")
 imgname = "I1"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_type = "RN50"
                 all_ids.append(ids)
         all_ids = list(chain.from_iterable(all_ids))
     if imgs is not None and mols is not None:
         val_img_features, val_ids = result
         return val_img_features, val_ids
 def img_to_numpy(file):
     img = Image.open(file)
 # missing functions: save morgan to to_hdf, create index, load features, calculate similarities
 ##### STREAMLIT FUNCTIONS ######
 st.title('CLOOME: Contrastive Learning for Molecule Representation with Microscopy Images and Chemical Structures')
             morgan = [morgan_from_smiles(s) for s in smiles]
             molnames = [f"M{i}" for i in range(len(morgan))]
             mol_index_fname = "mol_index.csv"
+            mol_index = create_index(datapath, molnames, mol_index_fname)
+            molpath = os.path.join(datapath, "mols.hdf")
             fps_fname = save_hdf(morgan, molnames, molpath)
             mol_imgs = draw_molecules(smiles)
             mol_features, mol_ids = main(mol_index, MODEL_PATH, model_type, mol_path=molpath, image_resolution=image_resolution)
             predefined_features = False
         else:
             mol_index = pd.read_csv("cellpainting-unique-molecule.csv")
+            mol_features_torch = torch.load(molecule_features, map_location=device)
             mol_features = mol_features_torch["mol_features"]
             mol_ids = mol_features_torch["mol_ids"]
             print(len(mol_ids))
             predefined_features = True
         img_index_fname = "img_index.csv"
+        img_index = create_index(datapath, imgname, img_index_fname)
         img_features, img_ids = main(img_index, MODEL_PATH, model_type, img_path=npzs, image_resolution=image_resolution)
         print(img_features.shape)
         morgan = [morgan_from_smiles(s) for s in smiles]
         molnames = [f"M{i}" for i in range(len(morgan))]
         mol_index_fname = "mol_index.csv"
+        mol_index = create_index(datapath, molnames, mol_index_fname)
+        molpath = os.path.join(datapath, "mols.hdf")
         fps_fname = save_hdf(morgan, molnames, molpath)
         mol_imgs = draw_molecules(smiles)
 selected_page = st.sidebar.selectbox("What would you like to retrieve?", page_names_to_funcs.keys())
 page_names_to_funcs[selected_page]()

data/all_molecule_cellpainting_features.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8979250025350c6ff67f986c657c14a881710cfe73e315ef5d126abaecf50b4b
+size 62906027

data/cellpainting-all-imgpermol.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a10cc3285a7b1c3275c30b2aa3654d00651ae4211d5e057118f32c40725e09ff
+size 14270985

data/cellpainting-unique-molecule.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65592c0ee09203c2ba5be15c159c7944c49feea65a24cfb7862bb49af7cd112a
+size 14265091

data/epoch_55.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c612c6da6f943caac839b9102fe98ba944838600942897aaa035f552d9a535bd
+size 352013623

data/subset_image_cellpainting_features.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a7cbfdf80d0ee6197f4e9118be2d7569399601d6c55c4db7fbc2dcbeadd9d6a
+size 62906027

data/subset_npzs_dict_.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ddb3451d5d46a1eed6613c914777e013a6d9a392cfc4f9448d0b9488d099da1
+size 3656596390