Spaces:

cdcvd
/

Ocr_check

Sleeping

File size: 6,113 Bytes

176abf3
afa3a48
176abf3
 
 
 
 
 
afa3a48
 
 
176abf3
afa3a48
e05b08a
afa3a48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176abf3
 
 
 
 
 
 
 
74cc02c
 
 
afa3a48
 
 
 
 
 
8a01ec0
176abf3
c89663f
 
8a01ec0
c89663f
 
 
 
 
 
 
 
8a01ec0
 
 
 
 
176abf3
 
afa3a48
176abf3
 
8a01ec0
 
afa3a48
176abf3
8a01ec0
afa3a48
176abf3
8f74b38
afa3a48
176abf3
 
 
afa3a48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176abf3
afa3a48
 
 
 
176abf3
 
 
8a01ec0
afa3a48
 
176abf3
 
 
afa3a48

import os

from PIL import Image, ImageOps, ImageChops
import io
import fitz  # PyMuPDF
from docx import Document
from rembg import remove
import gradio as gr
from hezar.models import Model
from ultralytics import YOLO
import json

# ایجاد دایرکتوری‌های لازم
os.makedirs("static", exist_ok=True)
os.makedirs("output_images", exist_ok=True)


def remove_readonly(func, path, excinfo):
    os.chmod(path, stat.S_IWRITE)
    func(path)

current_dir = os.path.dirname(os.path.abspath(__file__))
ultralytics_path = os.path.join(current_dir, 'runs')



if os.path.exists(ultralytics_path):
        
        shutil.rmtree(ultralytics_path, onerror=remove_readonly)
def trim_whitespace(image):
    gray_image = ImageOps.grayscale(image)
    inverted_image = ImageChops.invert(gray_image)
    bbox = inverted_image.getbbox()
    trimmed_image = image.crop(bbox)
    return trimmed_image

def convert_pdf_to_images(pdf_path, zoom=2):
    pdf_document = fitz.open(pdf_path)
    images = []
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        matrix = fitz.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=matrix)
        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        trimmed_image = trim_whitespace(image)
        images.append(trimmed_image)
    return images

def convert_docx_to_jpeg(docx_bytes):
    document = Document(BytesIO(docx_bytes))
    images = []
    for rel in document.part.rels.values():
        if "image" in rel.target_ref:
            image_stream = rel.target_part.blob
            image = Image.open(BytesIO(image_stream))
            jpeg_image = BytesIO()
            image.convert('RGB').save(jpeg_image, format="JPEG")
            jpeg_image.seek(0)
            images.append(Image.open(jpeg_image))
    return images

def remove_background_from_image(image):
    return remove(image)

def process_file(input_file):
    file_extension = os.path.splitext(input_file.name)[1].lower()
    images = []

    if file_extension in ['.png', '.jpeg', '.jpg', '.bmp', '.gif']:
        image = Image.open(input_file)
        output_image = remove_background_from_image(image)
        images.append(output_image)
    elif file_extension == '.pdf':
        images = convert_pdf_to_images(input_file.name)
        images = [remove_background_from_image(image) for image in images]
    elif file_extension in ['.docx', '.doc']:
        images = convert_docx_to_jpeg(input_file.name)
        images = [remove_background_from_image(image) for image in images]
    else:
        return "File format not supported."

    input_folder = 'output_images'
    for i, img in enumerate(images):
        if img.mode == 'RGBA':
            img = img.convert('RGB')
        img.save(os.path.join(input_folder, f'image_{i}.jpg'))

    return images


import shutil



def run_detection_and_ocr():
    # Load models
    ocr_model = Model.load('hezarai/crnn-fa-printed-96-long')
    yolo_model_check = YOLO("best_300_D_check.pt")
    yolo_model_numbers = YOLO("P_D_T.pt")
    
    input_folder = 'output_images'
    yolo_model_check.predict(input_folder, save=True, conf=0.5, save_crop=True)
    
    output_folder = 'runs/detect/predict'
    crop_folder = os.path.join(output_folder, 'crops')
    
    results = []
    
    for filename in os.listdir(input_folder):
        if filename.endswith('.JPEG') or filename.endswith('.jpg'):
            image_path = os.path.join(input_folder, filename)
            
            if os.path.exists(crop_folder):
                crops = []
                for crop_label in os.listdir(crop_folder):
                    crop_label_folder = os.path.join(crop_folder, crop_label)
                    if os.path.isdir(crop_label_folder):
                        for crop_filename in os.listdir(crop_label_folder):
                            crop_image_path = os.path.join(crop_label_folder, crop_filename)
                            if crop_label in ['mablagh_H', 'owner', 'vajh']:
                                text_prediction = predict_text(ocr_model, crop_image_path)
                            else:
                                text_prediction = process_numbers(yolo_model_numbers, crop_image_path)
                            crops.append({
                                'crop_image_path': crop_image_path,
                                'text_prediction': text_prediction,
                                'class_label': crop_label
                            })
                results.append({
                    'image': filename,
                    'crops': crops
                })

    output_json_path = 'output.json'
    with open(output_json_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=4)
    
    return output_json_path

def predict_text(model, image_path):
    try:
        image = Image.open(image_path)
        image = image.resize((320, 320))
        output = model.predict(image)
        if isinstance(output, list):
            return ' '.join([item['text'] for item in output])
        return str(output)
    except FileNotFoundError:
        return "N/A"

def process_numbers(model, image_path):
    results = model(image_path, conf=0.5, save_crop=False)
    detected_objects = []
    for result in results[0].boxes:
        class_id = int(result.cls[0].cpu().numpy())
        label = model.names[class_id]
        detected_objects.append({'bbox': result.xyxy[0].cpu().numpy().tolist(), 'label': label})
    sorted_objects = sorted(detected_objects, key=lambda x: x['bbox'][0])
    return ''.join([obj['label'] for obj in sorted_objects])

def gradio_interface(input_file):
    process_file(input_file)
    json_output = run_detection_and_ocr()
    with open(json_output, 'r', encoding='utf-8') as f:
        return json.load(f)

iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.File(label="Upload Word, PDF, or Image"),
    outputs=gr.JSON(label="JSON Output"),
    title="Document to JSON Converter with Background Removal"
)

if __name__ == "__main__":
    iface.launch()