deathCertReader

Sleeping

App Files Files Community

Alealejandrooo commited on May 2, 2023

Commit

d1b3545

•

1 Parent(s): f7610e7

Changes to Main Script

Browse files

Files changed (1) hide show

app.py +70 -157

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# from alessandro
 import re
 import cv2
 import numpy as np
@@ -7,43 +6,34 @@ from PIL import Image
 import matplotlib.pyplot as plt
 import pandas as pd
 import matplotlib.pyplot as plt
-ocr = PaddleOCR(lang='sl')
-# def convert_to_image(document):
-#     '''
-#     Function: converts the pdf to image
-#     Input: pdf document
-#     Output: image
-#     '''
-#     # reads PDFs
-#     # reads only first page of PDF documents
-#     # os.path.join(document.name, 'sample.pdf')
-#     pdf_document = load_from_file(document)
-#     page_1 = pdf_document.create_page(0)
-#     images = renderer.render_page(page_1)
-#     image_data = image.data
-#     # convert the image to numpy array
-#     image = np.array(images)
-#     # handles non-PDF formats (e.g., .tif)
-#     # else:
-#     #     images = Image.open(document)
-#     #     # convert the image to RGB
-#     #     image = images.convert('RGB')
-#     #     # convert the image to numpy array
-#     #     image = np.array(image)
-#     #     # TODO: change to dynamic scaling
-#     #     # downscale the image
-#     #     scale = 1.494
-#     #     width = int(image.shape[1] / scale)
-#     #     height = int(image.shape[0] / scale)
-#     #     dim = (width, height)
-#     #     image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
-#     # fig, ax = plt.subplots(figsize=(15, 10))
-#     # ax.imshow(image, cmap = 'gray')
-#     return image
 def deskew(image, model):
@@ -52,7 +42,6 @@ def deskew(image, model):
     Input: takes an image as an array
     Output: deskewed image
     '''
     # map the model classes to the actual degree of skew
     map = { 0: '-1', 1: '-10', 2: '-11', 3: '-12', 4: '-13',
             5: '-14',6: '-15', 7: '-2',  8: '-3',  9: '-4',
@@ -102,7 +91,6 @@ def prepare_image_to_autoencoder(image):
     Input: image (_type_): deskewed image
     Output: resized image to be passed to the autoencoder
     '''
     height, width = image.shape[:2]
     target_height = 600
     target_width = 600
@@ -123,68 +111,39 @@ def autoencode_ONNX(image, model):
     Input: image and autoencoder model
     Output: image
     '''
     image = image.astype(np.float32).reshape(1, 600, 600, 1)
     image = model.run(None, {'input_2': image})
     image = image[0]
     image = image.squeeze()
     image = image * 255
     image = image.astype('uint8')
-    # fig, ax = plt.subplots(figsize=(8, 5))
-    # ax.imshow(image, cmap = 'gray')
     return image
-def detect_entries_ONNX(denoised, model):
-    '''
-    Function: detect boxes Priimek, Ime and Datum boxes
-    Priimek: lastname
-    Ime: firstname
-    Datum smrti: date of death
-    Input: image
-    Output: boxes and confidence scores
-    '''
-    # the object detection model requires a tensor(1, h, w, 3)
-    autoencoded_RGB = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB)
-    # adds the 1 to the tensor
-    autoencoded_expanded = np.expand_dims(autoencoded_RGB, axis=0)
-    detections = model.run(None, {'input_tensor': autoencoded_expanded})
-    boxes = detections[1]
-    confidence = detections[4]  # returns a ndarray in a list of list
-    boxes = np.array(boxes[0])
-    confidence = np.array(confidence).reshape(5, 1)
-    boxes_and_confidence = np.append(boxes, confidence, axis=1)
-    # reshapes the boxes to be sorted
-    boxes_and_confidence = boxes_and_confidence.reshape(5, 5)
-    # sorts
-    boxes_and_confidence = \
-        boxes_and_confidence[boxes_and_confidence[:, 0].argsort()]
-    # boxes (expressed in image %)
-    boxes = boxes_and_confidence[:, :-1]
-    # boxes (expressed in actual pixels: ymin, xmin, ymax, xmax)
-    boxes = boxes * 600
-    # confidence boxes
-    confidence_boxes = boxes_and_confidence[:, -1].tolist()
-    for box in boxes:
-      ymin, xmin, ymax, xmax = box.astype(int)
-      cv2.rectangle(autoencoded_RGB, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
-    plt.figure()
-    plt.imshow(cv2.cvtColor(autoencoded_RGB, cv2.COLOR_BGR2RGB))
-    plt.title("Detected Boxes")
-    plt.savefig("test.jpg")
-    img = cv2.imread("test.jpg")
-    return Image.fromarray(img), confidence_boxes
 def extract_detected_entries_pdl(image):
-    result = ocr.ocr(image, cls=False)
-    # boxes = [line[0] for line in result]
-    # txts = [line[1][0] for line in result]
-    # scores = [line[1][1] for line in result]
-    # im_show = draw_ocr(image, boxes, txts, scores, font_path ='/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf')
     txt = []
     scores = []
     boxes = []
@@ -193,7 +152,7 @@ def extract_detected_entries_pdl(image):
       scores.append(r[-1][1])
       boxes.append(r[0])
-    return pd.DataFrame(np.transpose([txt,scores, boxes]),columns = ["Text","Score", "Boundary Box"])
 def cleanString_basic(word):
   word = word.replace("$", "s")
@@ -233,82 +192,41 @@ def clean_dates(date: 'str'):
     string = re.sub(r'[a-zA-Z!\[\|]', '', date)
     return string, date_flags
-def regex_string(string):
-    '''
-    Function: swaps the carachters with the "hat" with the regular ones
-    Input: string
-    Output: cleaned string
-    '''
-    map = {'Č': 'C',
-       'č': 'c',
-       'Š': 'S',
-       'š': 's',
-       'Ž': 'Z',
-       'ž':'z'}
-    for x in string:
-        if x in map:
-            string = string.replace(x, map[x])
-    return string
-import onnxruntime
-def pdf_deskew_gr (document):
-  img = convert_to_image(document)
-  model = onnxruntime.InferenceSession("./models/CNN_deskew_v0.0.2.onnx")
-  deskewed_image, angle, skew_confidence = deskew(img, model)
-  return deskewed_image, angle, skew_confidence
-def pdf_clean_gr(document):
-  img = convert_to_image(document)
-  model = onnxruntime.InferenceSession("./models/CNN_deskew_v0.0.2.onnx")
-  deskewed_image, angle, skew_confidence = deskew(img, model)
-  img = prepare_image_to_autoencoder(img)
-  model = onnxruntime.InferenceSession("./models/autoencoder_denoise_v0.0.2.onnx")
-  img = autoencode_ONNX(img, model)
-  return img
-def pdf_resnet_gr(document):
-  img = convert_to_image(document)
-  model = onnxruntime.InferenceSession("/content/drive/MyDrive/cpo/Alessandro/ai_models/Latest/CNN_deskew_v0.0.2.onnx")
-  deskewed_image, angle, skew_confidence = deskew(img, model)
-  img = prepare_image_to_autoencoder(img)
-  model = onnxruntime.InferenceSession("/content/drive/MyDrive/cpo/Alessandro/ai_models/Latest/autoencoder_denoise_v0.0.2.onnx")
-  img = autoencode_ONNX(img, model)
-  model = onnxruntime.InferenceSession("/content/drive/MyDrive/cpo/Alessandro/ai_models/Latest/ResNet_od_v0.0.2.onnx")
-  boxes, confidence_boxes = detect_entries_ONNX(img, model)
-  return boxes, confidence_boxes
-def pdf_extract_gr(extractimg):
-  # extractimg = convert_to_image(document)
-  extractimg = np.array(extractimg)
-  model = onnxruntime.InferenceSession("./models/CNN_deskew_v0.0.2.onnx")
-  deskewed_image, angle, skew_confidence = deskew(extractimg, model)
   cleanimg = prepare_image_to_autoencoder(deskewed_image)
-  model = onnxruntime.InferenceSession("./models/autoencoder_denoise_v0.0.2.onnx")
-  img = autoencode_ONNX(cleanimg, model)
-  # model = onnxruntime.InferenceSession("./models/ResNet_od_v0.0.2.onnx")
-  # boxes, confidence_boxes = detect_entries_ONNX(img, model)
-  # confidence_entries, lastname, firstname, death_date = extract_detected_entries_pdl(img, boxes)
   df = extract_detected_entries_pdl(img)
   firstnamerow = df.iloc[0]
   firstname = firstnamerow[0]
   firstnameconfidence = round(float(firstnamerow[1]) * 100,3)
   firstnameconfidence = f"{firstnameconfidence}%"
   surnamerow = df.iloc[1]
   surname = surnamerow[0]
   surnameconfidence = round(float(surnamerow[1]) * 100,3)
   surnameconfidence = f"{surnameconfidence}%"
   dodrow = df.iloc[2]
   dodname = dodrow[0]
   dodconfidence = round(float(dodrow[1]) * 100,3)
   dodconfidence = f"{dodconfidence}%"
   return df, deskewed_image, angle, skew_confidence, img, firstname, firstnameconfidence, surname, surnameconfidence, dodname, dodconfidence
 css = """
 .run_container {
   display: flex;
@@ -316,7 +234,6 @@ css = """
   align-items: center;
   gap: 10px;
 }
 .run_btn {
   margin: auto;
   width: 50%;
@@ -326,19 +243,15 @@ css = """
   margin: auto;
   display: flex;
 }
 .results_container {
   display: flex;
   justify-content: space-evenly;
 }
 .results_cell {
 }
 """
-import gradio as gr
 with gr.Blocks(css = css) as demo:
   gr.Markdown("""

 import re
 import cv2
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import matplotlib.pyplot as plt
+import onnxruntime
+import gradio as gr
+# initialize the OCR
+ocr = PaddleOCR(lang='sl',
+                enable_mkldnn=True,
+                cls=False,
+                show_log= False)
+# initialize the models
+model_deskew = onnxruntime.InferenceSession("/content/CNN_deskew_v0.0.2.onnx")
+model_denoise = onnxruntime.InferenceSession("/content/autoencoder_denoise_v0.0.2.onnx")
+##### All Functions #####
+def preprocess_image(image):
+    '''
+    Function: preprocess image to make it lighter to work on
+    Input: resized image
+    Output: image
+    '''
+    image = np.array(image)
+    scale = 1.494
+    width = int(image.shape[1] / scale)
+    height = int(image.shape[0] / scale)
+    dim = (width, height)
+    image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
+    return image
 def deskew(image, model):
     Input: takes an image as an array
     Output: deskewed image
     '''
     # map the model classes to the actual degree of skew
     map = { 0: '-1', 1: '-10', 2: '-11', 3: '-12', 4: '-13',
             5: '-14',6: '-15', 7: '-2',  8: '-3',  9: '-4',
     Input: image (_type_): deskewed image
     Output: resized image to be passed to the autoencoder
     '''
     height, width = image.shape[:2]
     target_height = 600
     target_width = 600
     Input: image and autoencoder model
     Output: image
     '''
     image = image.astype(np.float32).reshape(1, 600, 600, 1)
     image = model.run(None, {'input_2': image})
     image = image[0]
     image = image.squeeze()
     image = image * 255
     image = image.astype('uint8')
     return image
 def extract_detected_entries_pdl(image):
+    """
+    Extracts text, scores, and boundary boxes from an image using OCR and returns a DataFrame.
+    This function takes an input image, applies OCR to detect text in the image, and then extracts
+    the detected text, confidence scores, and boundary boxes for each text entry. The extracted
+    information is returned in a DataFrame with columns "Text", "Score", and "Boundary Box".
+    Parameters
+    ----------
+    image : numpy.ndarray
+        The input image to be processed.
+    Returns
+    -------
+    pandas.DataFrame
+        A DataFrame containing the extracted text, confidence scores, and boundary boxes
+        for each detected text entry. The DataFrame has the following columns:
+        - "Text": The detected text.
+        - "Score": The confidence score for the detected text.
+        - "Boundary Box": The coordinates of the boundary box for the detected text.
+    """
+    # run the OCR
+    result = ocr.ocr(image)
+    # creates the Pandas dataframe
     txt = []
     scores = []
     boxes = []
       scores.append(r[-1][1])
       boxes.append(r[0])
+    return pd.DataFrame(np.transpose([txt, scores, boxes]),columns = ["Text","Score", "Boundary Box"])
 def cleanString_basic(word):
   word = word.replace("$", "s")
     string = re.sub(r'[a-zA-Z!\[\|]', '', date)
     return string, date_flags
+##### Main Function #####
+def pdf_extract_gr(image):
+  extractimg = preprocess_image(image)
+  #extractimg = np.array(image)
+  # deskew the image
+  deskewed_image, angle, skew_confidence = deskew(extractimg, model_deskew)
+  # prepare the image for the autoencoder
   cleanimg = prepare_image_to_autoencoder(deskewed_image)
+  # clean the image
+  img = autoencode_ONNX(cleanimg, model_denoise)
+  # extract the entries from the image
   df = extract_detected_entries_pdl(img)
+  # first name
   firstnamerow = df.iloc[0]
   firstname = firstnamerow[0]
   firstnameconfidence = round(float(firstnamerow[1]) * 100,3)
   firstnameconfidence = f"{firstnameconfidence}%"
+  # surname
   surnamerow = df.iloc[1]
   surname = surnamerow[0]
   surnameconfidence = round(float(surnamerow[1]) * 100,3)
   surnameconfidence = f"{surnameconfidence}%"
+  # death date condifence
   dodrow = df.iloc[2]
   dodname = dodrow[0]
   dodconfidence = round(float(dodrow[1]) * 100,3)
   dodconfidence = f"{dodconfidence}%"
+  # return all the results
   return df, deskewed_image, angle, skew_confidence, img, firstname, firstnameconfidence, surname, surnameconfidence, dodname, dodconfidence
+##### Gradio Style #####
 css = """
 .run_container {
   display: flex;
   align-items: center;
   gap: 10px;
 }
 .run_btn {
   margin: auto;
   width: 50%;
   margin: auto;
   display: flex;
 }
 .results_container {
   display: flex;
   justify-content: space-evenly;
 }
 .results_cell {
 }
 """
+##### Gradio Blocks #####
 with gr.Blocks(css = css) as demo:
   gr.Markdown("""