Spaces:

abhicodes
/

text-extractor-ORM

Running

File size: 1,468 Bytes

d053f30
 
 
0b8812e
 
92824ad
 
 
0b8812e
 
92824ad
d053f30
4a1f0e8
 
d053f30
0b8812e
 
 
 
 
 
 
 
 
 
 
 
 
4a1f0e8
0b8812e
 
 
 
 
 
4a1f0e8
d053f30
 
4a1f0e8
d053f30
 
 
0b8812e
d053f30
0b8812e
 
d053f30
0b8812e
 
d053f30
 
 
 
 
0b8812e
d053f30
 
 
4a1f0e8

import cv2
import easyocr
import gradio as gr
import numpy as np
import requests
import os

API_KEY = os.getenv("API_KEY")

API_URL = "https://api-inference.huggingface.co/models/dima806/facial_emotions_image_detection"
headers = {"Authorization": "Bearer "+ API_KEY+""}

# Instance text detector
reader = easyocr.Reader(['en'], gpu=False)


def query(image):
    image_data = np.array(image, dtype=np.uint8)

    # Convert the image data to binary format (JPEG)
    _, buffer = cv2.imencode('.jpg', image_data)

    # Convert the binary data to bytes
    binary_data = buffer.tobytes()

    response = requests.post(API_URL, headers=headers, data=binary_data)
    return response.json()

def text_extraction(image):

    # Facial Expression Detection
    global text_content
    text_content = ''
    facial_data = query(image)

    text_ = reader.readtext(image)

    threshold = 0.25
    # draw bbox and text
    for t_, t in enumerate(text_):
        bbox, text, score = t

        text_content = text_content + ' ' + ' '.join(text)

        if score > threshold:
            cv2.rectangle(image, tuple(map(int, bbox[0])), tuple(map(int, bbox[2])), (0, 255, 0), 5)

    #output the image
    return image, text_content, facial_data

# Define Gradio interface
iface = gr.Interface(
    fn=text_extraction,
    inputs=gr.Image(),
    outputs=[gr.Image(), gr.Textbox(label="Text Content"), gr.JSON(label="Facial Data")]
)

# Launch the Gradio interface
iface.launch()