to-be commited on
Commit
15e0f69
1 Parent(s): d11b5d1
Files changed (4) hide show
  1. Sample1.jpg +0 -0
  2. Sample2.jpg +0 -0
  3. app.py +191 -0
  4. requirements.txt +4 -0
Sample1.jpg ADDED
Sample2.jpg ADDED
app.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from PIL import Image
3
+ import numpy as np
4
+ import time
5
+ import math
6
+ import gradio as gr
7
+
8
+
9
+ def find_signature_bounding_boxes(image):
10
+ # Start measuring time
11
+ start_time = time.time()
12
+
13
+ if image is None:
14
+ raise ValueError("Could not open or find the image")
15
+
16
+ # Binarize the image using Otsu's thresholding method
17
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
18
+ # Threshold the image using Otsu's method
19
+ _, binary_image = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
20
+
21
+ # Find connected components
22
+ num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=8, ltype=cv2.CV_32S)
23
+
24
+ # Calculate median area of components
25
+ areas = stats[1:, cv2.CC_STAT_AREA] # Exclude background
26
+ median_area = np.median(areas)
27
+ print('median_area: ' + str(median_area))
28
+ median_character_width = int(math.sqrt(median_area))
29
+ print('median_character_width: ' + str(median_character_width))
30
+
31
+ # Define area thresholds
32
+ min_area_threshold = median_area * 4
33
+ max_area_threshold = median_area * 50
34
+
35
+ # Filter components based on area thresholds
36
+ possible_signatures = []
37
+ for i in range(1, num_labels): # Exclude background
38
+ area = stats[i, cv2.CC_STAT_AREA]
39
+ if min_area_threshold < area < max_area_threshold:
40
+ left = stats[i, cv2.CC_STAT_LEFT]
41
+ top = stats[i, cv2.CC_STAT_TOP]
42
+ width = stats[i, cv2.CC_STAT_WIDTH]
43
+ height = stats[i, cv2.CC_STAT_HEIGHT]
44
+ print('Found candidate with area: ' + str(area))
45
+ #filter horizontal lines
46
+ if height < median_character_width * 5 and width > median_character_width*30:
47
+ print(' -> candidate is horizontal line with width, height: ' + str(width) + ',' + str(height))
48
+ continue
49
+ #filter vertical lines
50
+ if width < median_character_width * 5 and height > median_character_width*30:
51
+ print(' -> candidate is vertical line with width, height: ' + str(width) + ',' + str(height))
52
+ continue
53
+ #filter on a ratio of black pixels (logos for example have a higher ratio)for now guestimate is 0.3
54
+ roi = binary_image[top:top+height, left:left+width]
55
+ num_black_pixels = cv2.countNonZero(roi) # Calculate the number of black pixels in the ROI
56
+ total_pixels = width * height # Calculate the total number of pixels in the ROI
57
+ ratio = num_black_pixels / total_pixels # Calculate and return the ratio of black pixels
58
+ print(' -> candidate has black pixel ratio: ' + str(ratio))
59
+ if ratio > 0.30:
60
+ print(' -> candidate has too high black pixel ratio: ' )
61
+ continue
62
+ possible_signatures.append((left, top, width, height))
63
+
64
+ print('Nr of signatures found before merging: ' + str(len(possible_signatures)))
65
+ possible_signatures = merge_nearby_rectangles(possible_signatures, nearness=median_character_width*4)
66
+
67
+ # End measuring time
68
+ end_time = time.time()
69
+ print(f"Function took {end_time - start_time:.2f} seconds to process the image.")
70
+
71
+ return possible_signatures
72
+
73
+ def merge_nearby_rectangles(rectangles, nearness):
74
+ def is_near(rect1, rect2):
75
+ left1, top1, width1, height1 = rect1
76
+ left2, top2, width2, height2 = rect2
77
+ right1, bottom1 = left1 + width1, top1 + height1
78
+ right2, bottom2 = left2 + width2, top2 + height2
79
+ return not (right1 < left2 - nearness or left1 > right2 + nearness or
80
+ bottom1 < top2 - nearness or top1 > bottom2 + nearness)
81
+
82
+ def merge(rect1, rect2):
83
+ left1, top1, width1, height1 = rect1
84
+ left2, top2, width2, height2 = rect2
85
+ right1, bottom1 = left1 + width1, top1 + height1
86
+ right2, bottom2 = left2 + width2, top2 + height2
87
+ min_left = min(left1, left2)
88
+ min_top = min(top1, top2)
89
+ max_right = max(right1, right2)
90
+ max_bottom = max(bottom1, bottom2)
91
+ return (min_left, min_top, max_right - min_left, max_bottom - min_top)
92
+
93
+ merged = []
94
+ while rectangles:
95
+ current = rectangles.pop(0)
96
+ has_merged = False
97
+
98
+ for i, other in enumerate(merged):
99
+ if is_near(current, other):
100
+ merged[i] = merge(current, other)
101
+ has_merged = True
102
+ break
103
+
104
+ if not has_merged:
105
+ for i in range(len(rectangles) - 1, -1, -1):
106
+ if is_near(current, rectangles[i]):
107
+ current = merge(current, rectangles.pop(i))
108
+
109
+ if not has_merged:
110
+ merged.append(current)
111
+
112
+ return merged
113
+
114
+
115
+ def run_detection(input_image):
116
+
117
+ """"
118
+ init_image = input_image.convert("RGB")
119
+ original_size = init_image.size
120
+
121
+ _, image_tensor = image_transform_grounding(init_image)
122
+ image_pil: Image = image_transform_grounding_for_vis(init_image)
123
+
124
+ # run grounidng
125
+ boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
126
+ annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
127
+ image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
128
+ """
129
+
130
+ # inputimage is PIL as RGB
131
+ image = np.asarray(input_image.convert("RGB"))
132
+
133
+ # Find bounding boxes of possible signatures on the document
134
+ signatures = find_signature_bounding_boxes(image)
135
+ print('Nr of signatures found: ' + str(len(signatures)))
136
+ # Draw bounding boxes on the image
137
+ for (x, y, w, h) in signatures:
138
+ cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
139
+ image_with_box = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
140
+
141
+
142
+ return image_with_box
143
+
144
+ if __name__ == "__main__":
145
+
146
+
147
+ css = """
148
+ #mkd {
149
+ height: 500px;
150
+ overflow: auto;
151
+ border: 1px solid #ccc;
152
+ }
153
+ """
154
+ block = gr.Blocks(css=css).queue()
155
+ with block:
156
+ gr.Markdown("<h1><center>Signature detection<h1><center>")
157
+ gr.Markdown("<h3><center>See article<a href='https://github.com/IDEA-Research/GroundingDINO'>Grounding DINO</a><h3><center>")
158
+ gr.Markdown("<h3><center>Serves as an example where deep learning is not needed.<h3><center>")
159
+
160
+ with gr.Row():
161
+ with gr.Column():
162
+ input_image = gr.Image(source='upload', type="pil")
163
+ grounding_caption = gr.Textbox(label="Detection Prompt")
164
+ run_button = gr.Button(label="Run")
165
+ with gr.Accordion("Advanced options", open=False):
166
+ box_threshold = gr.Slider(
167
+ label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
168
+ )
169
+ text_threshold = gr.Slider(
170
+ label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
171
+ )
172
+
173
+ with gr.Column():
174
+ gallery = gr.outputs.Image(
175
+ type="pil",
176
+ # label="grounding results"
177
+ ).style(full_width=True, full_height=True)
178
+ # gallery = gr.Gallery(label="Generated images", show_label=False).style(
179
+ # grid=[1], height="auto", container=True, full_width=True, full_height=True)
180
+
181
+ run_button.click(fn=run_detection, inputs=[
182
+ input_image], outputs=[gallery])
183
+ gr.Examples(
184
+ [["Sample1.jpg", "coffee cup"],["Sample2.jpg", "coffee cup"]],
185
+ inputs = [input_image],
186
+ outputs = [gallery],
187
+ fn=run_detection,
188
+ cache_examples=True,
189
+ label='Try this example input!'
190
+ )
191
+ block.launch(share=False, show_api=False, show_error=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ opencv-python
2
+ nump
3
+ time
4
+ math