File size: 1,468 Bytes
d053f30
 
 
0b8812e
 
92824ad
 
 
0b8812e
 
92824ad
d053f30
4a1f0e8
 
d053f30
0b8812e
 
 
 
 
 
 
 
 
 
 
 
 
4a1f0e8
0b8812e
 
 
 
 
 
4a1f0e8
d053f30
 
4a1f0e8
d053f30
 
 
0b8812e
d053f30
0b8812e
 
d053f30
0b8812e
 
d053f30
 
 
 
 
0b8812e
d053f30
 
 
4a1f0e8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import cv2
import easyocr
import gradio as gr
import numpy as np
import requests
import os

API_KEY = os.getenv("API_KEY")

API_URL = "https://api-inference.huggingface.co/models/dima806/facial_emotions_image_detection"
headers = {"Authorization": "Bearer "+ API_KEY+""}

# Instance text detector
reader = easyocr.Reader(['en'], gpu=False)


def query(image):
    image_data = np.array(image, dtype=np.uint8)

    # Convert the image data to binary format (JPEG)
    _, buffer = cv2.imencode('.jpg', image_data)

    # Convert the binary data to bytes
    binary_data = buffer.tobytes()

    response = requests.post(API_URL, headers=headers, data=binary_data)
    return response.json()

def text_extraction(image):

    # Facial Expression Detection
    global text_content
    text_content = ''
    facial_data = query(image)

    text_ = reader.readtext(image)

    threshold = 0.25
    # draw bbox and text
    for t_, t in enumerate(text_):
        bbox, text, score = t

        text_content = text_content + ' ' + ' '.join(text)

        if score > threshold:
            cv2.rectangle(image, tuple(map(int, bbox[0])), tuple(map(int, bbox[2])), (0, 255, 0), 5)

    #output the image
    return image, text_content, facial_data

# Define Gradio interface
iface = gr.Interface(
    fn=text_extraction,
    inputs=gr.Image(),
    outputs=[gr.Image(), gr.Textbox(label="Text Content"), gr.JSON(label="Facial Data")]
)

# Launch the Gradio interface
iface.launch()