|
import os |
|
import base64 |
|
import io |
|
import uuid |
|
from ultralytics import YOLO |
|
import cv2 |
|
import torch |
|
import numpy as np |
|
from PIL import Image |
|
from torchvision import transforms |
|
import imageio.v2 as imageio |
|
from trainer import Trainer |
|
from utils.tools import get_config |
|
import torch.nn.functional as F |
|
from iopaint.single_processing import batch_inpaint_cv2 |
|
from pathlib import Path |
|
|
|
|
|
os.environ["TORCH_HOME"] = "./pretrained-model" |
|
os.environ["HUGGINGFACE_HUB_CACHE"] = "./pretrained-model" |
|
|
|
def resize_image(input_image_path, width=640, height=640): |
|
"""Resizes an image from image data and returns the resized image.""" |
|
try: |
|
|
|
img = cv2.imread(input_image_path, cv2.IMREAD_COLOR) |
|
|
|
|
|
shape = img.shape[:2] |
|
new_shape = (width, height) |
|
|
|
|
|
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) |
|
ratio = r, r |
|
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) |
|
|
|
|
|
im = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) |
|
|
|
|
|
color = (114, 114, 114) |
|
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] |
|
|
|
dw /= 2 |
|
dh /= 2 |
|
|
|
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) |
|
left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) |
|
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) |
|
return im |
|
|
|
except Exception as e: |
|
print(f"Error resizing image: {e}") |
|
return None |
|
|
|
|
|
def load_weights(path, device): |
|
model_weights = torch.load(path) |
|
return { |
|
k: v.to(device) |
|
for k, v in model_weights.items() |
|
} |
|
|
|
|
|
|
|
def convert_image_to_base64(image): |
|
|
|
_, buffer = cv2.imencode('.png', image) |
|
|
|
image_base64 = base64.b64encode(buffer).decode('utf-8') |
|
return image_base64 |
|
|
|
|
|
def convert_to_base64(image): |
|
|
|
image_data = image.read() |
|
|
|
base64_encoded = base64.b64encode(image_data).decode('utf-8') |
|
return base64_encoded |
|
|
|
def convert_to_base64_file(image): |
|
|
|
image_data = cv2.imencode('.png', image)[1].tobytes() |
|
|
|
base64_encoded = base64.b64encode(image_data).decode('utf-8') |
|
return base64_encoded |
|
|
|
|
|
def process_images(input_image, append_image, default_class="chair"): |
|
|
|
config_path = Path('configs/config.yaml') |
|
model_path = Path('pretrained-model/torch_model.p') |
|
|
|
|
|
img = resize_image(input_image) |
|
|
|
if img is None: |
|
return {'error': 'Failed to decode resized image'}, 419 |
|
|
|
H, W, _ = img.shape |
|
x_point = 0 |
|
y_point = 0 |
|
width = 1 |
|
height = 1 |
|
|
|
|
|
model = YOLO('pretrained-model/yolov8m-seg.pt') |
|
|
|
|
|
results = model(img, imgsz=(W,H), conf=0.5) |
|
names = model.names |
|
|
|
class_found = False |
|
for result in results: |
|
for i, label in enumerate(result.boxes.cls): |
|
|
|
if names[int(label)] == default_class: |
|
class_found = True |
|
|
|
chair_mask_np = result.masks.data[i].numpy() |
|
|
|
kernel = np.ones((5, 5), np.uint8) |
|
chair_mask_np = cv2.dilate(chair_mask_np, kernel, iterations=2) |
|
|
|
|
|
contours, _ = cv2.findContours((chair_mask_np == 1).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
|
|
for contour in contours: |
|
x, y, w, h = cv2.boundingRect(contour) |
|
x_point = x |
|
y_point = y |
|
width = w |
|
height = h |
|
|
|
|
|
mask = result.masks.data[i].numpy() * 255 |
|
dilated_mask = cv2.dilate(mask, kernel, iterations=2) |
|
|
|
resized_mask = cv2.resize(dilated_mask, (img.shape[1], img.shape[0])) |
|
|
|
|
|
output_base64 = repaitingAndMerge(append_image,str(model_path), str(config_path),width, height, x_point, y_point, img, resized_mask) |
|
|
|
return output_base64 |
|
|
|
|
|
if not class_found: |
|
return {'message': f'{default_class} object not found in the image'}, 200 |
|
|
|
def repaitingAndMerge(append_image_path, model_path, config_path, width, height, xposition, yposition, input_base, mask_base): |
|
config = get_config(config_path) |
|
device = torch.device("cpu") |
|
trainer = Trainer(config) |
|
trainer.load_state_dict(load_weights(model_path, device), strict=False) |
|
trainer.eval() |
|
|
|
|
|
print("lama inpainting start") |
|
inpaint_result_np = batch_inpaint_cv2('lama', 'cpu', input_base, mask_base) |
|
print("lama inpainting end") |
|
|
|
|
|
final_image = Image.fromarray(inpaint_result_np) |
|
|
|
print("merge start") |
|
|
|
|
|
append_image = cv2.imread(append_image_path, cv2.IMREAD_UNCHANGED) |
|
cv2.imwrite('appneded-image.png',append_image) |
|
|
|
resized_image = cv2.resize(append_image, (width, height), interpolation=cv2.INTER_AREA) |
|
|
|
resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGRA2RGBA) |
|
|
|
append_image_pil = Image.fromarray(resized_image) |
|
|
|
|
|
final_image.paste(append_image_pil, (xposition, yposition), append_image_pil) |
|
|
|
print("merge end") |
|
|
|
|
|
with io.BytesIO() as output_buffer: |
|
final_image.save(output_buffer, format='PNG') |
|
output_numpy = np.array(final_image) |
|
|
|
return output_numpy |
|
|