cdcvd commited on
Commit
afa3a48
1 Parent(s): 8f74b38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -46
app.py CHANGED
@@ -1,18 +1,32 @@
1
  import os
 
2
  from PIL import Image, ImageOps, ImageChops
3
  import io
4
  import fitz # PyMuPDF
5
  from docx import Document
6
  from rembg import remove
7
  import gradio as gr
8
- import os
9
- import os
10
- from io import BytesIO
11
- from docx import Document
12
- from PIL import Image
13
 
14
- # ایجاد دایرکتوری static
15
  os.makedirs("static", exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def trim_whitespace(image):
17
  gray_image = ImageOps.grayscale(image)
18
  inverted_image = ImageChops.invert(gray_image)
@@ -21,34 +35,20 @@ def trim_whitespace(image):
21
  return trimmed_image
22
 
23
  def convert_pdf_to_images(pdf_path, zoom=2):
24
-
25
-
26
  pdf_document = fitz.open(pdf_path)
27
-
28
  images = []
29
  for page_num in range(len(pdf_document)):
30
- page = pdf_document.load_page(page_num)
31
- matrix = fitz.Matrix(zoom, zoom)
32
- pix = page.get_pixmap(matrix=matrix)
33
- image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
34
- trimmed_image = trim_whitespace(image)
35
- images.append(trimmed_image)
36
  return images
37
 
38
-
39
  def convert_docx_to_jpeg(docx_bytes):
40
- """
41
- Convert each image in a DOCX file to a separate JPEG image and return them as a list.
42
-
43
- Args:
44
- - docx_bytes: The binary content of the DOCX file.
45
-
46
- Returns:
47
- - A list of PIL Image objects in JPEG format.
48
- """
49
  document = Document(BytesIO(docx_bytes))
50
  images = []
51
-
52
  for rel in document.part.rels.values():
53
  if "image" in rel.target_ref:
54
  image_stream = rel.target_part.blob
@@ -57,49 +57,124 @@ def convert_docx_to_jpeg(docx_bytes):
57
  image.convert('RGB').save(jpeg_image, format="JPEG")
58
  jpeg_image.seek(0)
59
  images.append(Image.open(jpeg_image))
60
-
61
  return images
62
 
63
- # Example usage:
64
- # with open("example.docx", "rb") as f:
65
- # docx_bytes = f.read()
66
- # images = convert_docx_to_jpeg(docx_bytes)
67
- # for img in images:
68
- # img.show()
69
-
70
  def remove_background_from_image(image):
71
  return remove(image)
72
 
73
-
74
-
75
-
76
-
77
  def process_file(input_file):
78
  file_extension = os.path.splitext(input_file.name)[1].lower()
 
79
 
80
  if file_extension in ['.png', '.jpeg', '.jpg', '.bmp', '.gif']:
81
  image = Image.open(input_file)
82
- image = image.convert('RGB')
83
  output_image = remove_background_from_image(image)
84
- return output_image
85
  elif file_extension == '.pdf':
86
  images = convert_pdf_to_images(input_file.name)
87
- return [remove_background_from_image(image) for image in images]
88
  elif file_extension in ['.docx', '.doc']:
89
  images = convert_docx_to_jpeg(input_file.name)
90
- return [remove_background_from_image(image) for image in images]
91
  else:
92
  return "File format not supported."
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def gradio_interface(input_file):
95
- return process_file(input_file)
 
 
 
96
 
97
  iface = gr.Interface(
98
  fn=gradio_interface,
99
  inputs=gr.File(label="Upload Word, PDF, or Image"),
100
- outputs=gr.Image(type="pil", label="Processed Image(s)"),
101
- title="Document to Image Converter with Background Removal"
102
  )
103
 
104
  if __name__ == "__main__":
105
- iface.launch()
 
 
 
 
 
 
1
  import os
2
+
3
  from PIL import Image, ImageOps, ImageChops
4
  import io
5
  import fitz # PyMuPDF
6
  from docx import Document
7
  from rembg import remove
8
  import gradio as gr
9
+ from hezar.models import Model
10
+ from ultralytics import YOLO
11
+ import json
 
 
12
 
13
+ # ایجاد دایرکتوری‌های لازم
14
  os.makedirs("static", exist_ok=True)
15
+ os.makedirs("output_images", exist_ok=True)
16
+
17
+
18
+ def remove_readonly(func, path, excinfo):
19
+ os.chmod(path, stat.S_IWRITE)
20
+ func(path)
21
+
22
+ current_dir = os.path.dirname(os.path.abspath(__file__))
23
+ ultralytics_path = os.path.join(current_dir, 'runs')
24
+
25
+
26
+
27
+ if os.path.exists(ultralytics_path):
28
+
29
+ shutil.rmtree(ultralytics_path, onerror=remove_readonly)
30
  def trim_whitespace(image):
31
  gray_image = ImageOps.grayscale(image)
32
  inverted_image = ImageChops.invert(gray_image)
 
35
  return trimmed_image
36
 
37
  def convert_pdf_to_images(pdf_path, zoom=2):
 
 
38
  pdf_document = fitz.open(pdf_path)
 
39
  images = []
40
  for page_num in range(len(pdf_document)):
41
+ page = pdf_document.load_page(page_num)
42
+ matrix = fitz.Matrix(zoom, zoom)
43
+ pix = page.get_pixmap(matrix=matrix)
44
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
45
+ trimmed_image = trim_whitespace(image)
46
+ images.append(trimmed_image)
47
  return images
48
 
 
49
  def convert_docx_to_jpeg(docx_bytes):
 
 
 
 
 
 
 
 
 
50
  document = Document(BytesIO(docx_bytes))
51
  images = []
 
52
  for rel in document.part.rels.values():
53
  if "image" in rel.target_ref:
54
  image_stream = rel.target_part.blob
 
57
  image.convert('RGB').save(jpeg_image, format="JPEG")
58
  jpeg_image.seek(0)
59
  images.append(Image.open(jpeg_image))
 
60
  return images
61
 
 
 
 
 
 
 
 
62
  def remove_background_from_image(image):
63
  return remove(image)
64
 
 
 
 
 
65
  def process_file(input_file):
66
  file_extension = os.path.splitext(input_file.name)[1].lower()
67
+ images = []
68
 
69
  if file_extension in ['.png', '.jpeg', '.jpg', '.bmp', '.gif']:
70
  image = Image.open(input_file)
 
71
  output_image = remove_background_from_image(image)
72
+ images.append(output_image)
73
  elif file_extension == '.pdf':
74
  images = convert_pdf_to_images(input_file.name)
75
+ images = [remove_background_from_image(image) for image in images]
76
  elif file_extension in ['.docx', '.doc']:
77
  images = convert_docx_to_jpeg(input_file.name)
78
+ images = [remove_background_from_image(image) for image in images]
79
  else:
80
  return "File format not supported."
81
 
82
+ input_folder = 'output_images'
83
+ for i, img in enumerate(images):
84
+ if img.mode == 'RGBA':
85
+ img = img.convert('RGB')
86
+ img.save(os.path.join(input_folder, f'image_{i}.jpg'))
87
+
88
+ return images
89
+
90
+
91
+ import shutil
92
+
93
+
94
+
95
+ def run_detection_and_ocr():
96
+ # Load models
97
+ ocr_model = Model.load('hezarai/crnn-fa-printed-96-long')
98
+ yolo_model_check = YOLO("best_300_D_check.pt")
99
+ yolo_model_numbers = YOLO("P_D_T.pt")
100
+
101
+ input_folder = 'output_images'
102
+ yolo_model_check.predict(input_folder, save=True, conf=0.5, save_crop=True)
103
+
104
+ output_folder = 'runs/detect/predict'
105
+ crop_folder = os.path.join(output_folder, 'crops')
106
+
107
+ results = []
108
+
109
+ for filename in os.listdir(input_folder):
110
+ if filename.endswith('.JPEG') or filename.endswith('.jpg'):
111
+ image_path = os.path.join(input_folder, filename)
112
+
113
+ if os.path.exists(crop_folder):
114
+ crops = []
115
+ for crop_label in os.listdir(crop_folder):
116
+ crop_label_folder = os.path.join(crop_folder, crop_label)
117
+ if os.path.isdir(crop_label_folder):
118
+ for crop_filename in os.listdir(crop_label_folder):
119
+ crop_image_path = os.path.join(crop_label_folder, crop_filename)
120
+ if crop_label in ['mablagh_H', 'owner', 'vajh']:
121
+ text_prediction = predict_text(ocr_model, crop_image_path)
122
+ else:
123
+ text_prediction = process_numbers(yolo_model_numbers, crop_image_path)
124
+ crops.append({
125
+ 'crop_image_path': crop_image_path,
126
+ 'text_prediction': text_prediction,
127
+ 'class_label': crop_label
128
+ })
129
+ results.append({
130
+ 'image': filename,
131
+ 'crops': crops
132
+ })
133
+
134
+ output_json_path = 'output.json'
135
+ with open(output_json_path, 'w', encoding='utf-8') as f:
136
+ json.dump(results, f, ensure_ascii=False, indent=4)
137
+
138
+ return output_json_path
139
+
140
+ def predict_text(model, image_path):
141
+ try:
142
+ image = Image.open(image_path)
143
+ image = image.resize((320, 320))
144
+ output = model.predict(image)
145
+ if isinstance(output, list):
146
+ return ' '.join([item['text'] for item in output])
147
+ return str(output)
148
+ except FileNotFoundError:
149
+ return "N/A"
150
+
151
+ def process_numbers(model, image_path):
152
+ results = model(image_path, conf=0.5, save_crop=False)
153
+ detected_objects = []
154
+ for result in results[0].boxes:
155
+ class_id = int(result.cls[0].cpu().numpy())
156
+ label = model.names[class_id]
157
+ detected_objects.append({'bbox': result.xyxy[0].cpu().numpy().tolist(), 'label': label})
158
+ sorted_objects = sorted(detected_objects, key=lambda x: x['bbox'][0])
159
+ return ''.join([obj['label'] for obj in sorted_objects])
160
+
161
  def gradio_interface(input_file):
162
+ process_file(input_file)
163
+ json_output = run_detection_and_ocr()
164
+ with open(json_output, 'r', encoding='utf-8') as f:
165
+ return json.load(f)
166
 
167
  iface = gr.Interface(
168
  fn=gradio_interface,
169
  inputs=gr.File(label="Upload Word, PDF, or Image"),
170
+ outputs=gr.JSON(label="JSON Output"),
171
+ title="Document to JSON Converter with Background Removal"
172
  )
173
 
174
  if __name__ == "__main__":
175
+ iface.launch()
176
+
177
+
178
+
179
+
180
+