lep1 commited on
Commit
c6c3369
1 Parent(s): 6dcb5fc

Upload 14 files

Browse files
utils/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from utils.angelina_utils import *
2
+ from utils.dsbi_utils import *
3
+ from utils.kaggle_utils import *
4
+ from utils.braille_natural_utils import *
utils/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (381 Bytes). View file
 
utils/__pycache__/angelina_utils.cpython-311.pyc ADDED
Binary file (8.33 kB). View file
 
utils/__pycache__/braille_natural_utils.cpython-311.pyc ADDED
Binary file (1.81 kB). View file
 
utils/__pycache__/dsbi_utils.cpython-311.pyc ADDED
Binary file (6.23 kB). View file
 
utils/__pycache__/kaggle_utils.cpython-311.pyc ADDED
Binary file (692 Bytes). View file
 
utils/alpha_map.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "000001": "⠠",
3
+ "000010": "⠐",
4
+ "000011": "⠰",
5
+ "000100": "⠈",
6
+ "000101": "⠨",
7
+ "000110": "⠘",
8
+ "000111": "⠸",
9
+ "001000": "⠄",
10
+ "001001": "⠤",
11
+ "001010": "⠔",
12
+ "001011": "⠴",
13
+ "001100": "⠌",
14
+ "001101": "⠬",
15
+ "001110": "⠜",
16
+ "001111": "⠼",
17
+ "010000": "⠂",
18
+ "010001": "⠢",
19
+ "010010": "⠒",
20
+ "010011": "⠲",
21
+ "010100": "i",
22
+ "010101": "⠪",
23
+ "010110": "j",
24
+ "010111": "w",
25
+ "011000": "⠆",
26
+ "011001": "⠦",
27
+ "011010": "⠖",
28
+ "011011": "⠶",
29
+ "011100": "s",
30
+ "011101": "⠮",
31
+ "011110": "t",
32
+ "011111": "⠾",
33
+ "100000": "a",
34
+ "100001": "⠡",
35
+ "100010": "e",
36
+ "100011": "⠱",
37
+ "100100": "c",
38
+ "100101": "⠩",
39
+ "100110": "d",
40
+ "100111": "⠹",
41
+ "101000": "k",
42
+ "101001": "u",
43
+ "101010": "o",
44
+ "101011": "z",
45
+ "101100": "m",
46
+ "101101": "x",
47
+ "101110": "n",
48
+ "101111": "y",
49
+ "110000": "basement",
50
+ "110001": "⠣",
51
+ "110010": "h",
52
+ "110011": "⠳",
53
+ "110100": "f",
54
+ "110101": "⠫",
55
+ "110110": "g",
56
+ "110111": "⠻",
57
+ "111000": "l",
58
+ "111001": "⠧",
59
+ "111010": "r",
60
+ "111011": "⠷",
61
+ "111100": "p",
62
+ "111101": "v",
63
+ "111110": "q",
64
+ "111111": "⠿"
65
+ }
utils/alphabet_map.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "a": "100000",
3
+ "b": "110000",
4
+ "c": "100100",
5
+ "d": "100110",
6
+ "e": "100010",
7
+ "f": "110100",
8
+ "g": "110110",
9
+ "h": "110010",
10
+
11
+ "i": "010100",
12
+ "j": "010110",
13
+
14
+ "k": "101000",
15
+ "l": "111000",
16
+ "m": "101100",
17
+ "n": "101110",
18
+ "o": "101010",
19
+ "p": "111100",
20
+ "q": "111110",
21
+ "r": "111010",
22
+
23
+ "s": "011100",
24
+ "t": "011110",
25
+
26
+ "u": "101001",
27
+ "v": "111001",
28
+
29
+ "w": "010111",
30
+
31
+ "x": "101101",
32
+ "y": "101111",
33
+ "z": "101011"
34
+ }
utils/angelina_utils.py ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: UTF-8 -*-
3
+ """
4
+ Braille symbols declaration
5
+ """
6
+
7
+ import numpy as np
8
+
9
+
10
+ def angelina_label_map():
11
+ # constants for special symbols label
12
+ num_sign = "##"
13
+ caps_sign = "CC"
14
+ markout_sign = "XX"
15
+
16
+ # general symbols common for various languages
17
+ sym_map = {
18
+ "256": ".",
19
+ "2": ",",
20
+ "25": ":",
21
+ "26": "?",
22
+ "23": ";",
23
+ "235": "!",
24
+ "2356": "()", # postprocess to (, ). Labeled as ((, )), ()
25
+ "126": "(",
26
+ "345": ")",
27
+ "36": "-",
28
+ "34": "/",
29
+ "3456": num_sign,
30
+ "123456": markout_sign,
31
+ # '6': "en",
32
+ # '46': "EN", # TODO only for Russian ?
33
+ }
34
+
35
+ # RU symbols
36
+ alpha_map_RU = {
37
+ "1": "а",
38
+ "12": "б",
39
+ "2456": "в",
40
+ "1245": "г",
41
+ "145": "д",
42
+ "15": "е",
43
+ "16": "ё",
44
+ "245": "ж",
45
+ "1356": "з",
46
+ "24": "и",
47
+ "12346": "й",
48
+ "13": "к",
49
+ "123": "л",
50
+ "134": "м",
51
+ "1345": "н", # preprocess to № if followed by number
52
+ "135": "о",
53
+ "1234": "п",
54
+ "1235": "р",
55
+ "234": "с",
56
+ "2345": "т",
57
+ "136": "у",
58
+ "124": "ф",
59
+ "125": "х",
60
+ "14": "ц",
61
+ "12345": "ч",
62
+ "156": "ш",
63
+ "1346": "щ",
64
+ "12356": "ъ",
65
+ "2346": "ы",
66
+ "23456": "ь",
67
+ "246": "э",
68
+ "1256": "ю",
69
+ "1246": "я",
70
+ "45": caps_sign,
71
+ "236": "«", # <<
72
+ "356": "»", # >>
73
+ "4": "'",
74
+ "456": "|",
75
+ "346": "§", # mark as &&
76
+ }
77
+
78
+ # UZ symbols
79
+ alpha_map_UZ = {
80
+ **alpha_map_RU,
81
+ "1236": "ў",
82
+ "13456": "қ",
83
+ "12456": "ғ",
84
+ "1456": "ҳ",
85
+ }
86
+
87
+ # EN symbols
88
+ alpha_map_EN = {
89
+ "1": "a",
90
+ "12": "b",
91
+ "14": "c",
92
+ "145": "d",
93
+ "15": "e",
94
+ "124": "f",
95
+ "1245": "g",
96
+ "125": "h",
97
+ "24": "i",
98
+ "245": "j",
99
+ "13": "k",
100
+ "123": "l",
101
+ "134": "m",
102
+ "1345": "n",
103
+ "135": "o",
104
+ "1234": "p",
105
+ "12345": "q",
106
+ "1235": "r",
107
+ "234": "s",
108
+ "2345": "t",
109
+ "136": "u",
110
+ "1236": "v",
111
+ "2456": "w",
112
+ "1346": "x",
113
+ "13456": "y",
114
+ "1356": "z",
115
+ #'6': caps_sign, # TODO duplicate оf RU caps_sign
116
+ "3": "'",
117
+ "236": "«", # <<
118
+ "356": "»", # >>
119
+ # '236': '"', # mark as <<
120
+ # '356': '"', # mark as >>
121
+ }
122
+
123
+ # UZL symbols
124
+ alpha_map_UZL = {
125
+ **alpha_map_EN,
126
+ "1236": "o`",
127
+ "12456": "g`",
128
+ "156": "sh",
129
+ "12345": "ch",
130
+ }
131
+
132
+ # Greek letters
133
+ alpha_map_GR = {
134
+ "1": "α",
135
+ "12": "β",
136
+ "1245": "γ",
137
+ "145": "δ",
138
+ "15": "ε",
139
+ "1356": "ζ",
140
+ "345": "η",
141
+ "1456": "θ",
142
+ "24": "ι",
143
+ "13": "κ",
144
+ "123": "λ",
145
+ "134": "μ",
146
+ "1345": "ν",
147
+ "1346": "ξ",
148
+ "135": "ο",
149
+ "1234": "π",
150
+ "1235": "ρ",
151
+ "234": "σ",
152
+ "2345": "τ",
153
+ "13456": "υ",
154
+ "124": "φ",
155
+ "125": "χ",
156
+ "12346": "ψ",
157
+ "245": "ω",
158
+ "46": caps_sign,
159
+ "5": "'", # stress
160
+ "126": "αι",
161
+ "146": "ει",
162
+ "246": "οι",
163
+ "12456": "υι",
164
+ "16": "αυ",
165
+ "156": "ευ",
166
+ "1256": "ηυ",
167
+ "136": "ου",
168
+ }
169
+
170
+ # Latvian letters
171
+ alpha_map_LV = {
172
+ "1": "a",
173
+ "16": "ā",
174
+ "12": "b",
175
+ "14": "c",
176
+ "146": "č",
177
+ "145": "d",
178
+ "15": "e",
179
+ "156": "ē",
180
+ "124": "f",
181
+ "1245": "g",
182
+ "12456": "ģ",
183
+ "125": "h",
184
+ "24": "i",
185
+ "246": "ī",
186
+ "245": "j",
187
+ "13": "k",
188
+ "136": "ķ",
189
+ "123": "l",
190
+ "1236": "ļ",
191
+ "134": "m",
192
+ "1345": "n",
193
+ "13456": "ņ",
194
+ "135": "o",
195
+ "1234": "p",
196
+ "1235": "r",
197
+ "234": "s",
198
+ "2346": "š",
199
+ "2345": "t",
200
+ "34": "u",
201
+ "346": "ū",
202
+ "2456": "v",
203
+ "345": "z",
204
+ "3456": "ž",
205
+ "46": caps_sign,
206
+ }
207
+
208
+ # PL symbols
209
+ alpha_map_PL = {
210
+ **alpha_map_EN,
211
+ "16": "ą",
212
+ "146": "ć",
213
+ "156": "ę",
214
+ "126": "ł",
215
+ "1456": "ń",
216
+ "346": "ó",
217
+ "246": "ś",
218
+ "2346": "ź",
219
+ "12346": "ż",
220
+ "3": ".",
221
+ "256": "/",
222
+ "34": "\\",
223
+ }
224
+
225
+ # DE symbols
226
+ alpha_map_DE = {
227
+ "236": '"',
228
+ "356": '"',
229
+ "35": "*",
230
+ "235": "+",
231
+ "2": ",",
232
+ "36": "-",
233
+ "3": ".",
234
+ "346": "ie",
235
+ "16": "au",
236
+ "126": "eu",
237
+ "146": "ei",
238
+ "1456": "ch",
239
+ "156": "sch",
240
+ "1256": "ü",
241
+ "246": "ö",
242
+ "25": ":",
243
+ "23": ";",
244
+ "2356": "=",
245
+ "26": "?",
246
+ "345": "ä",
247
+ "1": "a",
248
+ "12": "b",
249
+ "14": "c",
250
+ "145": "d",
251
+ "15": "e",
252
+ "124": "f",
253
+ "1245": "g",
254
+ "125": "h",
255
+ "24": "i",
256
+ "245": "j",
257
+ "13": "k",
258
+ "123": "l",
259
+ "134": "m",
260
+ "1345": "n",
261
+ "135": "o",
262
+ "1234": "p",
263
+ "12345": "q",
264
+ "1235": "r",
265
+ "234": "s",
266
+ "2345": "t",
267
+ "136": "u",
268
+ "1236": "v",
269
+ "2456": "w",
270
+ "1346": "x",
271
+ "13456": "y",
272
+ "1356": "z",
273
+ "34": "äu",
274
+ "23456": "st",
275
+ "2346": "ß",
276
+ "46": caps_sign,
277
+ }
278
+
279
+ # Digit symbols (after num_sign)
280
+ num_map = {
281
+ "1": "1",
282
+ "12": "2",
283
+ "14": "3",
284
+ "145": "4",
285
+ "15": "5",
286
+ "124": "6",
287
+ "1245": "7",
288
+ "125": "8",
289
+ "24": "9",
290
+ "245": "0",
291
+ }
292
+
293
+ # Digits in denominators of fraction
294
+ num_denominator_map = {
295
+ "2": "/1",
296
+ "23": "/2",
297
+ "25": "/3",
298
+ "256": "/4",
299
+ "26": "/5",
300
+ "235": "/6",
301
+ "2356": "/7",
302
+ "236": "/8",
303
+ "35": "/9",
304
+ "356": "/0", # postprocess num 0 /0 to %
305
+ }
306
+
307
+ # Symbols for Math Braille (in Russian braille, I suppose)
308
+ math_RU = {
309
+ "2": ",", # decimal separator
310
+ "3": "..", # postprocess to "." (thousand separator) if between digits else to * (multiplication).
311
+ "235": "+",
312
+ "36": "-",
313
+ "236": "*",
314
+ "256": "::", # postprocess to ":" (division).
315
+ "246": "<",
316
+ "135": ">",
317
+ "2356": "=",
318
+ "126": "(",
319
+ "345": ")",
320
+ "12356": "[",
321
+ "23456": "]",
322
+ "246": "{",
323
+ "135": "}",
324
+ "456": "|",
325
+ "6": "en",
326
+ "46": "EN",
327
+ }
328
+
329
+ # Codes for dicts
330
+ letter_dicts = {
331
+ "SYM": sym_map,
332
+ "RU": alpha_map_RU,
333
+ "EN": alpha_map_EN,
334
+ "EN2": alpha_map_EN,
335
+ "GR": alpha_map_GR,
336
+ "LV": alpha_map_LV,
337
+ "PL": alpha_map_PL,
338
+ "DE": alpha_map_DE,
339
+ "UZ": alpha_map_UZ,
340
+ "UZL": alpha_map_UZL,
341
+ "NUM": num_map,
342
+ "NUM_DENOMINATOR": num_denominator_map,
343
+ "MATH_RU": math_RU,
344
+ }
345
+
346
+ return letter_dicts
347
+
348
+
349
+ def reversed_mapping(angelina_map=angelina_label_map()):
350
+ concat_dict = {}
351
+
352
+ for key in angelina_map.keys():
353
+ single_dict = angelina_map[key]
354
+ # invert single_dict
355
+ inverted_dict = {v: k for k, v in single_dict.items()}
356
+ for k, v in inverted_dict.items():
357
+ concat_dict[k] = v
358
+
359
+ return concat_dict
360
+
361
+
362
+ def transform_angelina_label(russian_label, mapping=reversed_mapping()):
363
+ russian_label = russian_label.strip()
364
+ if russian_label in mapping.keys():
365
+ return mapping[russian_label]
366
+ else:
367
+ # remove "~" from russian_label if russian_label is numeric
368
+ russian_label = russian_label.replace("~", "")
369
+ if russian_label.isnumeric():
370
+ return russian_label
371
+ # other cases where letters are capitalized
372
+ else:
373
+ if russian_label.lower() in mapping.keys():
374
+ return mapping[russian_label.lower()]
375
+ # other cases where letters are special letters
376
+ else:
377
+ if russian_label == "CC" or russian_label == "CC": # capitalize
378
+ return "46"
379
+ elif russian_label == "ХХ": # markout
380
+ return "123456"
381
+ elif russian_label == ">>":
382
+ return "356"
383
+ elif russian_label == "<<":
384
+ return "236"
385
+ else:
386
+ print(russian_label)
387
+ return "46"
utils/braille_map.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "000001": "⠠",
3
+ "000010": "⠐",
4
+ "000011": "⠰",
5
+ "000100": "⠈",
6
+ "000101": "⠨",
7
+ "000110": "⠘",
8
+ "000111": "⠸",
9
+ "001000": "⠄",
10
+ "001001": "⠤",
11
+ "001010": "⠔",
12
+ "001011": "⠴",
13
+ "001100": "⠌",
14
+ "001101": "⠬",
15
+ "001110": "⠜",
16
+ "001111": "⠼",
17
+ "010000": "⠂",
18
+ "010001": "⠢",
19
+ "010010": "⠒",
20
+ "010011": "⠲",
21
+ "010100": "⠊",
22
+ "010101": "⠪",
23
+ "010110": "⠚",
24
+ "010111": "⠺",
25
+ "011000": "⠆",
26
+ "011001": "⠦",
27
+ "011010": "⠖",
28
+ "011011": "⠶",
29
+ "011100": "⠎",
30
+ "011101": "⠮",
31
+ "011110": "⠞",
32
+ "011111": "⠾",
33
+ "100000": "⠁",
34
+ "100001": "⠡",
35
+ "100010": "⠑",
36
+ "100011": "⠱",
37
+ "100100": "⠉",
38
+ "100101": "⠩",
39
+ "100110": "⠙",
40
+ "100111": "⠹",
41
+ "101000": "⠅",
42
+ "101001": "⠥",
43
+ "101010": "⠕",
44
+ "101011": "⠵",
45
+ "101100": "⠍",
46
+ "101101": "⠭",
47
+ "101110": "⠝",
48
+ "101111": "⠽",
49
+ "110000": "⠃",
50
+ "110001": "⠣",
51
+ "110010": "⠓",
52
+ "110011": "⠳",
53
+ "110100": "⠋",
54
+ "110101": "⠫",
55
+ "110110": "⠛",
56
+ "110111": "⠻",
57
+ "111000": "⠇",
58
+ "111001": "⠧",
59
+ "111010": "⠗",
60
+ "111011": "⠷",
61
+ "111100": "⠏",
62
+ "111101": "⠯",
63
+ "111110": "⠟",
64
+ "111111": "⠿"
65
+ }
utils/braille_natural_utils.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+
3
+
4
+ def get_label(label_path):
5
+ with open(label_path, "r") as f:
6
+ label = f.readlines()
7
+ label = [line.strip().split(" ") for line in label]
8
+ # label is in yolo format, remove class in the first item of the row
9
+ label = [line[1:] for line in label]
10
+
11
+ # convert to float
12
+ label = [[float(item) for item in line] for line in label]
13
+
14
+ return label
15
+
16
+
17
+ def get_image(img_path):
18
+ img = Image.open(img_path)
19
+ return img
utils/dsbi_utils.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+ """
4
+ Utils for DSBI dataset (https://github.com/yeluo1994/DSBI)
5
+ Written by IlyaOvodov (https://github.com/IlyaOvodov/AngelinaDataset)
6
+ """
7
+ import collections
8
+
9
+ CellInfo = collections.namedtuple(
10
+ "CellInfo",
11
+ [
12
+ "row",
13
+ "col", # row and column in a symbol grid
14
+ "left",
15
+ "top",
16
+ "right",
17
+ "bottom", # symbol corner coordinates in pixels
18
+ "label",
19
+ ],
20
+ ) # symbol label either like '246' or '010101' format
21
+
22
+
23
+ def read_txt(file_txt, binary_label=True):
24
+ """
25
+ Loads Braille annotation from DSBI annotation txt file
26
+ :param file_txt: filename of txt file
27
+ :param binary_label: return symbol label in binary format, like '010101' (if True),
28
+ or human readable like '246' (if False)
29
+ :return: tuple (
30
+ angle: value from 1st line of annotation file,
31
+ h_lines: list of horizontal lines Y-coordinates,
32
+ v_lines: list of vertical lines X-coordinates,,
33
+ cells: symbols as list of CellInfo
34
+ )
35
+ None, None, None, None for empty annotation
36
+ """
37
+ with open(file_txt, "r") as f:
38
+ l = f.readlines()
39
+ if len(l) < 3:
40
+ return None, None, None, None
41
+ angle = eval(l[0])
42
+ v_lines = list(map(eval, l[1].split(" ")))
43
+ assert len(v_lines) % 2 == 0, (file_txt, len(v_lines))
44
+ h_lines = list(map(eval, l[2].split(" ")))
45
+ assert len(h_lines) % 3 == 0, (file_txt, len(h_lines))
46
+ cells = []
47
+ for cell_ln in l[3:]:
48
+ cell_nums = list(cell_ln[:-1].split(" ")) # exclude last '\n'
49
+ assert len(cell_nums) == 8, (file_txt, cell_ln)
50
+ row = eval(cell_nums[0])
51
+ col = eval(cell_nums[1])
52
+ if binary_label:
53
+ label = "".join(cell_nums[2:])
54
+ else:
55
+ label = ""
56
+ for i, c in enumerate(cell_nums[2:]):
57
+ if c == "1":
58
+ label += str(i + 1)
59
+ else:
60
+ assert c == "0", (file_txt, cell_ln, i, c)
61
+ left = v_lines[(col - 1) * 2]
62
+ right = v_lines[(col - 1) * 2 + 1]
63
+ top = h_lines[(row - 1) * 3]
64
+ bottom = h_lines[(row - 1) * 3 + 2]
65
+ cells.append(
66
+ CellInfo(
67
+ row=row,
68
+ col=col,
69
+ left=left,
70
+ top=top,
71
+ right=right,
72
+ bottom=bottom,
73
+ label=label,
74
+ )
75
+ )
76
+ return angle, h_lines, v_lines, cells
77
+
78
+
79
+ def read_DSBI_annotation(label_filename, width, height, rect_margin, get_points):
80
+ """
81
+ Loads Braille annotation from DSBI annotation txt file in albumentations format
82
+ :param label_filename: filename of txt file
83
+ :param width: image width
84
+ :param height: image height
85
+ :param rect_margin:
86
+ :param get_points: Points or Symbols mode
87
+ :return:
88
+ List of symbol rects if get_points==False. Each rect is a tuple (left, top, right, bottom, label) where
89
+ left..bottom are in [0,1], label is int in [1..63]. Symbol size is extended to rect_margin*width of symbol
90
+ in every side.
91
+ List of points rects if get_points==True. Each point is a tuple (left, top, right, bottom, label) where
92
+ left..bottom are in [0,1], label is 0. Width and height of point is 2*rect_margin*width of symbol
93
+ """
94
+ _, _, _, cells = read_txt(label_filename, binary_label=True)
95
+ if cells is not None:
96
+ if get_points:
97
+ rects = []
98
+ for cl in cells:
99
+ w = int((cl.right - cl.left) * rect_margin)
100
+ h = w
101
+ for i in range(6):
102
+ if cl.label[i] == "1":
103
+ iy = i % 3
104
+ ix = i - iy
105
+ if ix == 0:
106
+ xc = cl.left
107
+ else:
108
+ xc = cl.right
109
+ left, right = xc - w, xc + w
110
+ if iy == 0:
111
+ yc = cl.top
112
+ elif iy == 1:
113
+ yc = (cl.top + cl.bottom) // 2
114
+ else:
115
+ yc = cl.bottom
116
+ top, bottom = yc - h, yc + h
117
+ rects.append(
118
+ [
119
+ left / width,
120
+ top / height,
121
+ right / width,
122
+ bottom / height,
123
+ 0,
124
+ ]
125
+ )
126
+ else:
127
+ rects = [
128
+ (
129
+ (c.left - rect_margin * (c.right - c.left)),
130
+ (c.top - rect_margin * (c.right - c.left)),
131
+ (c.right + rect_margin * (c.right - c.left)),
132
+ (c.bottom + rect_margin * (c.right - c.left)),
133
+ c.label,
134
+ )
135
+ for c in cells
136
+ if c.label != "000000"
137
+ ]
138
+ else:
139
+ rects = []
140
+ return rects
utils/kaggle_utils.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+
4
+ def get_alphabet_map(path="./src/utils/alphabet_map.json"):
5
+
6
+ with open(path, "r") as f:
7
+ alphabet_map = json.load(f)
8
+
9
+ return alphabet_map
utils/number_map.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "000001": "⠠",
3
+ "000010": "⠐",
4
+ "000011": "⠰",
5
+ "000100": "⠈",
6
+ "000101": "⠨",
7
+ "000110": "⠘",
8
+ "000111": "⠸",
9
+ "001000": "⠄",
10
+ "001001": "⠤",
11
+ "001010": "⠔",
12
+ "001011": "⠴",
13
+ "001100": "⠌",
14
+ "001101": "⠬",
15
+ "001110": "⠜",
16
+ "001111": "floor",
17
+ "010000": "⠂",
18
+ "010001": "⠢",
19
+ "010010": "⠒",
20
+ "010011": "⠲",
21
+ "010100": "9",
22
+ "010101": "⠪",
23
+ "010110": "0",
24
+ "010111": "⠺",
25
+ "011000": "⠆",
26
+ "011001": "⠦",
27
+ "011010": "⠖",
28
+ "011011": "⠶",
29
+ "011100": "⠎",
30
+ "011101": "⠮",
31
+ "011110": "⠞",
32
+ "011111": "⠾",
33
+ "100000": "1",
34
+ "100001": "⠡",
35
+ "100010": "5",
36
+ "100011": "⠱",
37
+ "100100": "3",
38
+ "100101": "⠩",
39
+ "100110": "4",
40
+ "100111": "⠹",
41
+ "101000": "⠅",
42
+ "101001": "⠥",
43
+ "101010": "⠕",
44
+ "101011": "⠵",
45
+ "101100": "⠍",
46
+ "101101": "⠭",
47
+ "101110": "⠝",
48
+ "101111": "⠽",
49
+ "110000": "2",
50
+ "110001": "⠣",
51
+ "110010": "8",
52
+ "110011": "⠳",
53
+ "110100": "6",
54
+ "110101": "⠫",
55
+ "110110": "7",
56
+ "110111": "⠻",
57
+ "111000": "⠇",
58
+ "111001": "⠧",
59
+ "111010": "⠗",
60
+ "111011": "⠷",
61
+ "111100": "⠏",
62
+ "111101": "⠯",
63
+ "111110": "⠟",
64
+ "111111": "⠿"
65
+ }
66
+