cdcvd commited on
Commit
8a01ec0
1 Parent(s): 176abf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -36
app.py CHANGED
@@ -6,13 +6,6 @@ from docx import Document
6
  from rembg import remove
7
  import gradio as gr
8
 
9
- def convert_image_to_jpeg(input_path):
10
- image = Image.open(input_path)
11
- image = image.convert('RGB')
12
- output_path = os.path.splitext(input_path)[0] + ".jpg"
13
- image.save(output_path, 'JPEG')
14
- return output_path
15
-
16
  def trim_whitespace(image):
17
  gray_image = ImageOps.grayscale(image)
18
  inverted_image = ImageChops.invert(gray_image)
@@ -22,60 +15,55 @@ def trim_whitespace(image):
22
 
23
  def convert_pdf_to_images(pdf_path, zoom=2):
24
  pdf_document = fitz.open(pdf_path)
25
- name_with_extension = os.path.basename(pdf_path)
26
- name = os.path.splitext(name_with_extension)[0]
27
-
28
- output_paths = []
29
  for page_num in range(len(pdf_document)):
30
  page = pdf_document.load_page(page_num)
31
  matrix = fitz.Matrix(zoom, zoom)
32
  pix = page.get_pixmap(matrix=matrix)
33
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
34
  trimmed_image = trim_whitespace(image)
35
- output_path = f"{name}_page_{page_num + 1}.jpg"
36
- trimmed_image.save(output_path, 'JPEG')
37
- output_paths.append(output_path)
38
- return output_paths
39
 
40
- def convert_docx_to_jpeg(docx_path):
41
  document = Document(docx_path)
42
- output_paths = []
43
- for i, image_shape in enumerate(document.inline_shapes, start=1):
44
  image_stream = image_shape.image.blob
45
  image = Image.open(io.BytesIO(image_stream))
46
- output_path = f"{os.path.splitext(os.path.basename(docx_path))[0]}_page_{i}.jpg"
47
- image.save(output_path, 'JPEG')
48
- output_paths.append(output_path)
49
- return output_paths
 
 
 
 
 
50
 
51
  def process_file(input_file):
52
  file_extension = os.path.splitext(input_file.name)[1].lower()
53
 
54
  if file_extension in ['.png', '.jpeg', '.jpg', '.bmp', '.gif']:
55
- output_path = convert_image_to_jpeg(input_file.name)
56
- return remove_background(output_path)
 
 
57
  elif file_extension == '.pdf':
58
- image_paths = convert_pdf_to_images(input_file.name)
59
- return [remove_background(path) for path in image_paths]
60
  elif file_extension in ['.docx', '.doc']:
61
- image_paths = convert_docx_to_jpeg(input_file.name)
62
- return [remove_background(path) for path in image_paths]
63
  else:
64
  return "File format not supported."
65
 
66
- def remove_background(image_path):
67
- input_image = Image.open(image_path)
68
- output_image = remove(input_image)
69
- output_path = image_path.replace('.jpg', '_no_bg.png')
70
- output_image.save(output_path, 'PNG')
71
- return output_path
72
-
73
  def gradio_interface(input_file):
74
  return process_file(input_file)
75
 
76
  iface = gr.Interface(
77
  fn=gradio_interface,
78
- inputs=gr.inputs.File(label="Upload Word, PDF, or Image"),
79
  outputs=gr.outputs.Image(type="file", label="Processed Image(s)"),
80
  title="Document to Image Converter with Background Removal"
81
  )
 
6
  from rembg import remove
7
  import gradio as gr
8
 
 
 
 
 
 
 
 
9
  def trim_whitespace(image):
10
  gray_image = ImageOps.grayscale(image)
11
  inverted_image = ImageChops.invert(gray_image)
 
15
 
16
  def convert_pdf_to_images(pdf_path, zoom=2):
17
  pdf_document = fitz.open(pdf_path)
18
+ images = []
 
 
 
19
  for page_num in range(len(pdf_document)):
20
  page = pdf_document.load_page(page_num)
21
  matrix = fitz.Matrix(zoom, zoom)
22
  pix = page.get_pixmap(matrix=matrix)
23
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
24
  trimmed_image = trim_whitespace(image)
25
+ images.append(trimmed_image)
26
+ return images
 
 
27
 
28
+ def convert_docx_to_images(docx_path):
29
  document = Document(docx_path)
30
+ images = []
31
+ for image_shape in document.inline_shapes:
32
  image_stream = image_shape.image.blob
33
  image = Image.open(io.BytesIO(image_stream))
34
+ images.append(image)
35
+ return images
36
+
37
+ def remove_background_from_image(image):
38
+ return remove(image)
39
+
40
+
41
+
42
+
43
 
44
  def process_file(input_file):
45
  file_extension = os.path.splitext(input_file.name)[1].lower()
46
 
47
  if file_extension in ['.png', '.jpeg', '.jpg', '.bmp', '.gif']:
48
+ image = Image.open(input_file)
49
+ image = image.convert('RGB')
50
+ output_image = remove_background_from_image(image)
51
+ return output_image
52
  elif file_extension == '.pdf':
53
+ images = convert_pdf_to_images(input_file.name)
54
+ return [remove_background_from_image(image) for image in images]
55
  elif file_extension in ['.docx', '.doc']:
56
+ images = convert_docx_to_images(input_file.name)
57
+ return [remove_background_from_image(image) for image in images]
58
  else:
59
  return "File format not supported."
60
 
 
 
 
 
 
 
 
61
  def gradio_interface(input_file):
62
  return process_file(input_file)
63
 
64
  iface = gr.Interface(
65
  fn=gradio_interface,
66
+ inputs=gr.File(label="Upload Word, PDF, or Image"),
67
  outputs=gr.outputs.Image(type="file", label="Processed Image(s)"),
68
  title="Document to Image Converter with Background Removal"
69
  )