File size: 6,113 Bytes
176abf3
afa3a48
176abf3
 
 
 
 
 
afa3a48
 
 
176abf3
afa3a48
e05b08a
afa3a48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176abf3
 
 
 
 
 
 
 
74cc02c
 
 
afa3a48
 
 
 
 
 
8a01ec0
176abf3
c89663f
 
8a01ec0
c89663f
 
 
 
 
 
 
 
8a01ec0
 
 
 
 
176abf3
 
afa3a48
176abf3
 
8a01ec0
 
afa3a48
176abf3
8a01ec0
afa3a48
176abf3
8f74b38
afa3a48
176abf3
 
 
afa3a48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176abf3
afa3a48
 
 
 
176abf3
 
 
8a01ec0
afa3a48
 
176abf3
 
 
afa3a48
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os

from PIL import Image, ImageOps, ImageChops
import io
import fitz  # PyMuPDF
from docx import Document
from rembg import remove
import gradio as gr
from hezar.models import Model
from ultralytics import YOLO
import json

# ایجاد دایرکتوری‌های لازم
os.makedirs("static", exist_ok=True)
os.makedirs("output_images", exist_ok=True)


def remove_readonly(func, path, excinfo):
    os.chmod(path, stat.S_IWRITE)
    func(path)

current_dir = os.path.dirname(os.path.abspath(__file__))
ultralytics_path = os.path.join(current_dir, 'runs')



if os.path.exists(ultralytics_path):
        
        shutil.rmtree(ultralytics_path, onerror=remove_readonly)
def trim_whitespace(image):
    gray_image = ImageOps.grayscale(image)
    inverted_image = ImageChops.invert(gray_image)
    bbox = inverted_image.getbbox()
    trimmed_image = image.crop(bbox)
    return trimmed_image

def convert_pdf_to_images(pdf_path, zoom=2):
    pdf_document = fitz.open(pdf_path)
    images = []
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        matrix = fitz.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=matrix)
        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        trimmed_image = trim_whitespace(image)
        images.append(trimmed_image)
    return images

def convert_docx_to_jpeg(docx_bytes):
    document = Document(BytesIO(docx_bytes))
    images = []
    for rel in document.part.rels.values():
        if "image" in rel.target_ref:
            image_stream = rel.target_part.blob
            image = Image.open(BytesIO(image_stream))
            jpeg_image = BytesIO()
            image.convert('RGB').save(jpeg_image, format="JPEG")
            jpeg_image.seek(0)
            images.append(Image.open(jpeg_image))
    return images

def remove_background_from_image(image):
    return remove(image)

def process_file(input_file):
    file_extension = os.path.splitext(input_file.name)[1].lower()
    images = []

    if file_extension in ['.png', '.jpeg', '.jpg', '.bmp', '.gif']:
        image = Image.open(input_file)
        output_image = remove_background_from_image(image)
        images.append(output_image)
    elif file_extension == '.pdf':
        images = convert_pdf_to_images(input_file.name)
        images = [remove_background_from_image(image) for image in images]
    elif file_extension in ['.docx', '.doc']:
        images = convert_docx_to_jpeg(input_file.name)
        images = [remove_background_from_image(image) for image in images]
    else:
        return "File format not supported."

    input_folder = 'output_images'
    for i, img in enumerate(images):
        if img.mode == 'RGBA':
            img = img.convert('RGB')
        img.save(os.path.join(input_folder, f'image_{i}.jpg'))

    return images


import shutil



def run_detection_and_ocr():
    # Load models
    ocr_model = Model.load('hezarai/crnn-fa-printed-96-long')
    yolo_model_check = YOLO("best_300_D_check.pt")
    yolo_model_numbers = YOLO("P_D_T.pt")
    
    input_folder = 'output_images'
    yolo_model_check.predict(input_folder, save=True, conf=0.5, save_crop=True)
    
    output_folder = 'runs/detect/predict'
    crop_folder = os.path.join(output_folder, 'crops')
    
    results = []
    
    for filename in os.listdir(input_folder):
        if filename.endswith('.JPEG') or filename.endswith('.jpg'):
            image_path = os.path.join(input_folder, filename)
            
            if os.path.exists(crop_folder):
                crops = []
                for crop_label in os.listdir(crop_folder):
                    crop_label_folder = os.path.join(crop_folder, crop_label)
                    if os.path.isdir(crop_label_folder):
                        for crop_filename in os.listdir(crop_label_folder):
                            crop_image_path = os.path.join(crop_label_folder, crop_filename)
                            if crop_label in ['mablagh_H', 'owner', 'vajh']:
                                text_prediction = predict_text(ocr_model, crop_image_path)
                            else:
                                text_prediction = process_numbers(yolo_model_numbers, crop_image_path)
                            crops.append({
                                'crop_image_path': crop_image_path,
                                'text_prediction': text_prediction,
                                'class_label': crop_label
                            })
                results.append({
                    'image': filename,
                    'crops': crops
                })

    output_json_path = 'output.json'
    with open(output_json_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=4)
    
    return output_json_path

def predict_text(model, image_path):
    try:
        image = Image.open(image_path)
        image = image.resize((320, 320))
        output = model.predict(image)
        if isinstance(output, list):
            return ' '.join([item['text'] for item in output])
        return str(output)
    except FileNotFoundError:
        return "N/A"

def process_numbers(model, image_path):
    results = model(image_path, conf=0.5, save_crop=False)
    detected_objects = []
    for result in results[0].boxes:
        class_id = int(result.cls[0].cpu().numpy())
        label = model.names[class_id]
        detected_objects.append({'bbox': result.xyxy[0].cpu().numpy().tolist(), 'label': label})
    sorted_objects = sorted(detected_objects, key=lambda x: x['bbox'][0])
    return ''.join([obj['label'] for obj in sorted_objects])

def gradio_interface(input_file):
    process_file(input_file)
    json_output = run_detection_and_ocr()
    with open(json_output, 'r', encoding='utf-8') as f:
        return json.load(f)

iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.File(label="Upload Word, PDF, or Image"),
    outputs=gr.JSON(label="JSON Output"),
    title="Document to JSON Converter with Background Removal"
)

if __name__ == "__main__":
    iface.launch()