File size: 3,078 Bytes
176abf3 e05b08a 74cc02c 176abf3 e05b08a 176abf3 74cc02c c89663f 74cc02c c89663f 8a01ec0 176abf3 c89663f 8a01ec0 c89663f 8a01ec0 c89663f 8a01ec0 176abf3 8a01ec0 176abf3 8a01ec0 176abf3 8f74b38 8a01ec0 176abf3 8a01ec0 b874e98 176abf3 8956ec9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import os
from PIL import Image, ImageOps, ImageChops
import io
import fitz # PyMuPDF
from docx import Document
from rembg import remove
import gradio as gr
import os
import os
from io import BytesIO
from docx import Document
from PIL import Image
# ایجاد دایرکتوری static
os.makedirs("static", exist_ok=True)
def trim_whitespace(image):
gray_image = ImageOps.grayscale(image)
inverted_image = ImageChops.invert(gray_image)
bbox = inverted_image.getbbox()
trimmed_image = image.crop(bbox)
return trimmed_image
def convert_pdf_to_images(pdf_path, zoom=2):
pdf_document = fitz.open(pdf_path)
images = []
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
matrix = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=matrix)
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
trimmed_image = trim_whitespace(image)
images.append(trimmed_image)
return images
def convert_docx_to_jpeg(docx_bytes):
"""
Convert each image in a DOCX file to a separate JPEG image and return them as a list.
Args:
- docx_bytes: The binary content of the DOCX file.
Returns:
- A list of PIL Image objects in JPEG format.
"""
document = Document(BytesIO(docx_bytes))
images = []
for rel in document.part.rels.values():
if "image" in rel.target_ref:
image_stream = rel.target_part.blob
image = Image.open(BytesIO(image_stream))
jpeg_image = BytesIO()
image.convert('RGB').save(jpeg_image, format="JPEG")
jpeg_image.seek(0)
images.append(Image.open(jpeg_image))
return images
# Example usage:
# with open("example.docx", "rb") as f:
# docx_bytes = f.read()
# images = convert_docx_to_jpeg(docx_bytes)
# for img in images:
# img.show()
def remove_background_from_image(image):
return remove(image)
def process_file(input_file):
file_extension = os.path.splitext(input_file.name)[1].lower()
if file_extension in ['.png', '.jpeg', '.jpg', '.bmp', '.gif']:
image = Image.open(input_file)
image = image.convert('RGB')
output_image = remove_background_from_image(image)
return output_image
elif file_extension == '.pdf':
images = convert_pdf_to_images(input_file.name)
return [remove_background_from_image(image) for image in images]
elif file_extension in ['.docx', '.doc']:
images = convert_docx_to_jpeg(input_file.name)
return [remove_background_from_image(image) for image in images]
else:
return "File format not supported."
def gradio_interface(input_file):
return process_file(input_file)
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.File(label="Upload Word, PDF, or Image"),
outputs=gr.Image(type="pil", label="Processed Image(s)"),
title="Document to Image Converter with Background Removal"
)
if __name__ == "__main__":
iface.launch() |