Spaces:

cmahima
/

PictureCleanUp

Sleeping

App Files Files Community

divimund95 commited on Sep 8

Commit

d911050

•

1 Parent(s): cc8944c

use original PyTorch model for inference

Browse files

Files changed (9) hide show

.gitignore +3 -0
LaMa.mlpackage/Data/com.apple.CoreML/model.mlmodel +0 -3
LaMa.mlpackage/Data/com.apple.CoreML/weights/weight.bin +0 -3
LaMa.mlpackage/Manifest.json +0 -18
app.py +90 -18
default.yaml +24 -0
enter_env.sh +11 -0
requirements.txt +19 -2
setup.sh +14 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ lama/
2	+ big-lama/
3	+

LaMa.mlpackage/Data/com.apple.CoreML/model.mlmodel DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:289f2c611bd3e52805ee3e686e290981d96d3b9674db93fe6bf30962f7e60d87
-size 1166404

LaMa.mlpackage/Data/com.apple.CoreML/weights/weight.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:aae26da8deca02ead81120f1d683b6c38361cd593c5a685e543c4b84726500e1
-size 204086656

LaMa.mlpackage/Manifest.json DELETED Viewed

@@ -1,18 +0,0 @@
-{
-    "fileFormatVersion": "1.0.0",
-    "itemInfoEntries": {
-        "058403EC-D454-47EC-9C08-D1149DC8311C": {
-            "author": "com.apple.CoreML",
-            "description": "CoreML Model Specification",
-            "name": "model.mlmodel",
-            "path": "com.apple.CoreML/model.mlmodel"
-        },
-        "BCCB46DC-D6B9-4B28-8D24-B59CF8160E49": {
-            "author": "com.apple.CoreML",
-            "description": "CoreML Model Weights",
-            "name": "weights",
-            "path": "com.apple.CoreML/weights"
-        }
-    },
-    "rootModelIdentifier": "058403EC-D454-47EC-9C08-D1149DC8311C"
-}

app.py CHANGED Viewed

@@ -1,30 +1,101 @@
 import gradio as gr
-import coremltools as ct
 import numpy as np
 from PIL import Image
 import io
 # Load the model
-coreml_model_file_name = "LaMa.mlpackage"
-loaded_model = ct.models.MLModel(coreml_model_file_name)
 def inpaint(input_dict):
-    # Resize input image and mask to 800x800
-    input_image = input_dict["background"].convert("RGB").resize((800, 800), Image.LANCZOS)
-    input_mask = pil_to_binary_mask(input_dict['layers'][0].resize((800, 800), Image.NEAREST))
-    # Convert mask to grayscale
-    input_mask = input_mask.convert("L")
-    # Run inference
-    prediction = loaded_model.predict({"image": input_image, "mask": input_mask})
-    # Access the output
-    output_image = prediction["output"]
-    return output_image, input_mask
 def pil_to_binary_mask(pil_image, threshold=0):
     np_image = np.array(pil_image)
     grayscale_image = Image.fromarray(np_image).convert("L")
     binary_mask = np.array(grayscale_image) > threshold
@@ -35,7 +106,8 @@ def pil_to_binary_mask(pil_image, threshold=0):
                 mask[i,j] = 1
     mask = (mask*255).astype(np.uint8)
     output_mask = Image.fromarray(mask)
-    return output_mask
 # Create Gradio interface
 with gr.Blocks() as demo:
@@ -43,13 +115,13 @@ with gr.Blocks() as demo:
     gr.Markdown("Upload an image and draw a mask to remove unwanted objects.")
     with gr.Row():
-        input_image = gr.ImageEditor(type="pil", label='Input image & Mask', interactive=True)
         output_image = gr.Image(type="pil", label="Output Image")
-    with gr.Column():
-        masked_image = gr.Image(label="Masked image", type="pil")
     inpaint_button = gr.Button("Inpaint")
-    inpaint_button.click(fn=inpaint, inputs=[input_image], outputs=[output_image, masked_image])
 # Launch the interface
 if __name__ == "__main__":

 import gradio as gr
 import numpy as np
+import torch
 from PIL import Image
 import io
+from omegaconf import OmegaConf
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lama'))
+from lama.saicinpainting.evaluation.refinement import refine_predict
+from lama.saicinpainting.training.trainers import load_checkpoint
 # Load the model
+def get_inpaint_model():
+    """
+    Loads and initializes the inpainting model.
+    Returns: Tuple of (model, predict_config)
+    """
+    predict_config = OmegaConf.load('./default.yaml')
+    predict_config.model.path = './big-lama/models/'
+    predict_config.refiner.gpu_ids = '0'
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    # Instead of setting device directly, we'll use it when loading the model
+    predict_config.device = str(device)  # Store as string in config
+    train_config_path = './big-lama/config.yaml'
+    train_config = OmegaConf.load(train_config_path)
+    train_config.training_model.predict_only = True
+    train_config.visualizer.kind = 'noop'
+    checkpoint_path = os.path.join(predict_config.model.path,
+                                   predict_config.model.checkpoint)
+    model = load_checkpoint(train_config, checkpoint_path, strict=False, map_location=device)
+    model.freeze()
+    model.to(device)
+    return model, predict_config
 def inpaint(input_dict):
+    """
+    Performs image inpainting on the input image using the provided mask.
+    Args: input_dict containing 'background' (image) and 'layers' (mask)
+    Returns: Tuple of (output_image, input_mask)
+    """
+    input_image = input_dict["background"].convert("RGB")
+    input_mask = pil_to_binary_mask(input_dict['layers'][0])
+    # TODO: check if this is correct; (C,H,W) or (H,W,C)
+    # batch = dict(image=input_image, mask=input_mask[None, ...])
+    np_input_image = np.transpose(np.array(input_image), (2, 0, 1))
+    np_input_mask = np.array(input_mask)[None, :, :]  # Add channel dimension for grayscale images
+    batch = dict(image=np_input_image, mask=np_input_mask)
+    print('lol', batch['image'].shape)
+    print('lol', batch['mask'].shape)
+    inpaint_model, predict_config = get_inpaint_model()
+    device = torch.device(predict_config.device)
+    batch['unpad_to_size'] = [torch.tensor([batch['image'].shape[1]]),torch.tensor([batch['image'].shape[2]])]
+    batch['image'] = torch.tensor(pad_img_to_modulo(batch['image'], predict_config.dataset.pad_out_to_modulo))[None].to(device)
+    batch['mask'] = torch.tensor(pad_img_to_modulo(batch['mask'], predict_config.dataset.pad_out_to_modulo))[None].float().to(device)
+    cur_res = refine_predict(batch, inpaint_model, **predict_config.refiner)
+    cur_res = cur_res[0].permute(1,2,0).detach().cpu().numpy()
+    cur_res = np.clip(cur_res * 255, 0, 255).astype('uint8')
+    output_image = Image.fromarray(cur_res)
+    return output_image
+def ceil_modulo(x, mod):
+    if x % mod == 0:
+        return x
+    return (x // mod + 1) * mod
+def pad_img_to_modulo(img, mod):
+    channels, height, width = img.shape
+    out_height = ceil_modulo(height, mod)
+    out_width = ceil_modulo(width, mod)
+    return np.pad(img, ((0, 0), (0, out_height - height), (0, out_width - width)), mode='symmetric')
 def pil_to_binary_mask(pil_image, threshold=0):
+    """
+    Converts a PIL image to a binary mask.
+    Args:
+        pil_image (PIL.Image): The input PIL image.
+        threshold (int, optional): The threshold value for binarization. Defaults to 0.
+    Returns:
+        PIL.Image: A grayscale PIL image representing the binary mask.
+    """
     np_image = np.array(pil_image)
     grayscale_image = Image.fromarray(np_image).convert("L")
     binary_mask = np.array(grayscale_image) > threshold
                 mask[i,j] = 1
     mask = (mask*255).astype(np.uint8)
     output_mask = Image.fromarray(mask)
+    # Convert mask to grayscale
+    return output_mask.convert("L")
 # Create Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("Upload an image and draw a mask to remove unwanted objects.")
     with gr.Row():
+        input_image = gr.ImageEditor(type="pil", label='Input image & Mask', interactive=True, height="auto", width="auto")
         output_image = gr.Image(type="pil", label="Output Image")
+    # with gr.Column():
+    #     masked_image = gr.Image(label="Masked image", type="pil")
     inpaint_button = gr.Button("Inpaint")
+    inpaint_button.click(fn=inpaint, inputs=[input_image], outputs=[output_image])
 # Launch the interface
 if __name__ == "__main__":

default.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+indir: no  # to be overriden in CLI
+outdir: no  # to be overriden in CLI
+model:
+  path: no  # to be overriden in CLI
+  checkpoint: best.ckpt
+dataset:
+  kind: default
+  img_suffix: .png
+  pad_out_to_modulo: 8
+device: cuda
+out_key: inpainted
+refine: False # refiner will only run if this is True
+refiner:
+  gpu_ids: 0,1 # the GPU ids of the machine to use. If only single GPU, use: "0,"
+  modulo: ${dataset.pad_out_to_modulo}
+  n_iters: 15 # number of iterations of refinement for each scale
+  lr: 0.002 # learning rate
+  min_side: 512 # all sides of image on all scales should be >= min_side / sqrt(2)
+  max_scales: 3 # max number of downscaling scales for the image-mask pyramid
+  px_budget: 1800000 # pixels budget. Any image will be resized to satisfy height*width <= px_budget

enter_env.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+#!/bin/bash
+# Initialize conda
+eval "$(conda shell.bash hook)"
+# Activate the cleanup environment
+conda activate cleanup
+# Additional commands or environment setup can be added here
+export TORCH_HOME=$(pwd) && export PYTHONPATH=$(pwd)

requirements.txt CHANGED Viewed

@@ -1,4 +1,21 @@
 gradio
-coremltools
 numpy
-pillow

 gradio
 numpy
+pillow
+pyyaml
+tqdm
+easydict==1.9.0
+scikit-image
+scikit-learn
+opencv-python
+tensorflow
+joblib
+matplotlib
+pandas
+albumentations==0.5.2
+hydra-core==1.1.0
+pytorch-lightning==1.2.9
+tabulate
+kornia==0.5.0
+webdataset
+packaging
+wldhx.yadisk-direct

setup.sh ADDED Viewed

	@@ -0,0 +1,14 @@

+conda create -n cleanup python=3.10 -y
+conda activate cleanup
+# conda install pytorch==1.9.0 torchvision==0.10.0 cudatoolkit=11.1 -c pytorch -c nvidia
+conda install pytorch torchvision -c pytorch -y
+pip install -r requirements.txt
+# Clone dependency repos
+git clone https://github.com/advimman/lama.git
+# Download big-lama model
+curl -LJO https://huggingface.co/smartywu/big-lama/resolve/main/big-lama.zip
+unzip big-lama.zip