File size: 3,592 Bytes
d05d4ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b86270
 
 
d05d4ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38f7bb7
 
 
d05d4ae
 
 
 
3b86270
d05d4ae
3b86270
d05d4ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os

import cv2
import numpy as np
from openvino import Core


class CodecCTC:
    def __init__(self, characters):
        self.chars = ["[blank]"] + list(characters)

    def decode(self, preds, top_k=10):
        index, texts, nbest = 0, list(), list()

        preds_index: np.ndarray = np.argmax(preds, 2)
        preds_index = preds_index.transpose(1, 0)
        preds_index_reshape = preds_index.reshape(-1)
        preds_sizes = np.array([preds_index.shape[1]] * preds_index.shape[0])

        for step in preds_sizes:
            t = preds_index_reshape[index : index + step]

            if t.shape[0] == 0:
                continue

            char_list = []
            for i in range(step):
                if t[i] == 0:
                    continue

                # removing repeated characters and blank.
                if i > 0 and t[i - 1] == t[i]:
                    continue

                char_list.append(self.chars[t[i]])

                # process n-best
                probs = self.softmax(preds[i][0])
                k_idx = np.argsort(-probs)[:top_k]
                k_probs = probs[k_idx]
                k_res = [
                    dict(prob=p, char=self.chars[j]) for j, p in zip(k_idx, k_probs)
                ]
                nbest.append(k_res)

            text = "".join(char_list)
            texts.append(text)

            index += step

        return texts, nbest

    def softmax(self, x):
        e_x = np.exp(x - np.max(x))
        return e_x / np.sum(e_x, axis=0)


class Recognizer:
    def __init__(self, model_path, char_list_path):
        core = Core()
        self.model = core.read_model(model_path)
        self.compiled_model = core.compile_model(self.model, "CPU")
        self.infer_request = self.compiled_model.create_infer_request()

        # (batch_size, channel, width, height)
        _, _, self.inn_h, self.inn_w = self.model.inputs[0].shape
        self.input_tensor_name = self.model.inputs[0].get_any_name()
        self.output_tensor_name = self.model.outputs[0].get_any_name()

        with open(char_list_path, "r", encoding="utf-8") as f:
            char_list = "".join(line.strip("\n") for line in f)
        self.codec = CodecCTC(char_list)

    def __call__(self, inn_img):
        inn_img = self.preprocess(inn_img, height=self.inn_h, width=self.inn_w)
        inn_img = inn_img[None, :, :, :]

        for _ in range(2):
            self.infer_request.infer(inputs={self.input_tensor_name: inn_img})
            preds = self.infer_request.get_tensor(self.output_tensor_name).data[:]
        result, nbest = self.codec.decode(preds)

        return result, nbest

    def preprocess(self, image, height, width, invert=False):
        src: np.ndarray = cv2.cvtColor(image, cv2.COLOR_RGBA2GRAY)
        src = (255 - src) if invert else src

        ratio = float(src.shape[1]) / float(src.shape[0])
        tw = int(height * ratio)
        rsz = cv2.resize(src, (tw, height), interpolation=cv2.INTER_AREA).astype(np.float32)

        # [h,w] -> [c,h,w]
        img = rsz[None, :, :]
        _, h, w = img.shape

        # right edge padding
        pad_img = np.pad(img, ((0, 0), (0, height - h), (0, width - w)), mode="edge")

        return pad_img


def main():
    recog = Recognizer("model/model.xml", "model/char_list.txt")

    target_dir = "."
    file_list = [os.path.join(dn, fn) for dn, _, ff in os.walk(target_dir) for fn in ff]
    file_list = sorted(file_list)

    for fp in file_list:
        if fp.endswith(".png"):
            print(recog(fp))


if __name__ == "__main__":
    main()