Spaces:
Runtime error
Runtime error
Upload 14 files
Browse files- utils/__init__.py +4 -0
- utils/__pycache__/__init__.cpython-311.pyc +0 -0
- utils/__pycache__/angelina_utils.cpython-311.pyc +0 -0
- utils/__pycache__/braille_natural_utils.cpython-311.pyc +0 -0
- utils/__pycache__/dsbi_utils.cpython-311.pyc +0 -0
- utils/__pycache__/kaggle_utils.cpython-311.pyc +0 -0
- utils/alpha_map.json +65 -0
- utils/alphabet_map.json +34 -0
- utils/angelina_utils.py +387 -0
- utils/braille_map.json +65 -0
- utils/braille_natural_utils.py +19 -0
- utils/dsbi_utils.py +140 -0
- utils/kaggle_utils.py +9 -0
- utils/number_map.json +66 -0
utils/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.angelina_utils import *
|
2 |
+
from utils.dsbi_utils import *
|
3 |
+
from utils.kaggle_utils import *
|
4 |
+
from utils.braille_natural_utils import *
|
utils/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (381 Bytes). View file
|
|
utils/__pycache__/angelina_utils.cpython-311.pyc
ADDED
Binary file (8.33 kB). View file
|
|
utils/__pycache__/braille_natural_utils.cpython-311.pyc
ADDED
Binary file (1.81 kB). View file
|
|
utils/__pycache__/dsbi_utils.cpython-311.pyc
ADDED
Binary file (6.23 kB). View file
|
|
utils/__pycache__/kaggle_utils.cpython-311.pyc
ADDED
Binary file (692 Bytes). View file
|
|
utils/alpha_map.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"000001": "⠠",
|
3 |
+
"000010": "⠐",
|
4 |
+
"000011": "⠰",
|
5 |
+
"000100": "⠈",
|
6 |
+
"000101": "⠨",
|
7 |
+
"000110": "⠘",
|
8 |
+
"000111": "⠸",
|
9 |
+
"001000": "⠄",
|
10 |
+
"001001": "⠤",
|
11 |
+
"001010": "⠔",
|
12 |
+
"001011": "⠴",
|
13 |
+
"001100": "⠌",
|
14 |
+
"001101": "⠬",
|
15 |
+
"001110": "⠜",
|
16 |
+
"001111": "⠼",
|
17 |
+
"010000": "⠂",
|
18 |
+
"010001": "⠢",
|
19 |
+
"010010": "⠒",
|
20 |
+
"010011": "⠲",
|
21 |
+
"010100": "i",
|
22 |
+
"010101": "⠪",
|
23 |
+
"010110": "j",
|
24 |
+
"010111": "w",
|
25 |
+
"011000": "⠆",
|
26 |
+
"011001": "⠦",
|
27 |
+
"011010": "⠖",
|
28 |
+
"011011": "⠶",
|
29 |
+
"011100": "s",
|
30 |
+
"011101": "⠮",
|
31 |
+
"011110": "t",
|
32 |
+
"011111": "⠾",
|
33 |
+
"100000": "a",
|
34 |
+
"100001": "⠡",
|
35 |
+
"100010": "e",
|
36 |
+
"100011": "⠱",
|
37 |
+
"100100": "c",
|
38 |
+
"100101": "⠩",
|
39 |
+
"100110": "d",
|
40 |
+
"100111": "⠹",
|
41 |
+
"101000": "k",
|
42 |
+
"101001": "u",
|
43 |
+
"101010": "o",
|
44 |
+
"101011": "z",
|
45 |
+
"101100": "m",
|
46 |
+
"101101": "x",
|
47 |
+
"101110": "n",
|
48 |
+
"101111": "y",
|
49 |
+
"110000": "basement",
|
50 |
+
"110001": "⠣",
|
51 |
+
"110010": "h",
|
52 |
+
"110011": "⠳",
|
53 |
+
"110100": "f",
|
54 |
+
"110101": "⠫",
|
55 |
+
"110110": "g",
|
56 |
+
"110111": "⠻",
|
57 |
+
"111000": "l",
|
58 |
+
"111001": "⠧",
|
59 |
+
"111010": "r",
|
60 |
+
"111011": "⠷",
|
61 |
+
"111100": "p",
|
62 |
+
"111101": "v",
|
63 |
+
"111110": "q",
|
64 |
+
"111111": "⠿"
|
65 |
+
}
|
utils/alphabet_map.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"a": "100000",
|
3 |
+
"b": "110000",
|
4 |
+
"c": "100100",
|
5 |
+
"d": "100110",
|
6 |
+
"e": "100010",
|
7 |
+
"f": "110100",
|
8 |
+
"g": "110110",
|
9 |
+
"h": "110010",
|
10 |
+
|
11 |
+
"i": "010100",
|
12 |
+
"j": "010110",
|
13 |
+
|
14 |
+
"k": "101000",
|
15 |
+
"l": "111000",
|
16 |
+
"m": "101100",
|
17 |
+
"n": "101110",
|
18 |
+
"o": "101010",
|
19 |
+
"p": "111100",
|
20 |
+
"q": "111110",
|
21 |
+
"r": "111010",
|
22 |
+
|
23 |
+
"s": "011100",
|
24 |
+
"t": "011110",
|
25 |
+
|
26 |
+
"u": "101001",
|
27 |
+
"v": "111001",
|
28 |
+
|
29 |
+
"w": "010111",
|
30 |
+
|
31 |
+
"x": "101101",
|
32 |
+
"y": "101111",
|
33 |
+
"z": "101011"
|
34 |
+
}
|
utils/angelina_utils.py
ADDED
@@ -0,0 +1,387 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: UTF-8 -*-
|
3 |
+
"""
|
4 |
+
Braille symbols declaration
|
5 |
+
"""
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
|
10 |
+
def angelina_label_map():
|
11 |
+
# constants for special symbols label
|
12 |
+
num_sign = "##"
|
13 |
+
caps_sign = "CC"
|
14 |
+
markout_sign = "XX"
|
15 |
+
|
16 |
+
# general symbols common for various languages
|
17 |
+
sym_map = {
|
18 |
+
"256": ".",
|
19 |
+
"2": ",",
|
20 |
+
"25": ":",
|
21 |
+
"26": "?",
|
22 |
+
"23": ";",
|
23 |
+
"235": "!",
|
24 |
+
"2356": "()", # postprocess to (, ). Labeled as ((, )), ()
|
25 |
+
"126": "(",
|
26 |
+
"345": ")",
|
27 |
+
"36": "-",
|
28 |
+
"34": "/",
|
29 |
+
"3456": num_sign,
|
30 |
+
"123456": markout_sign,
|
31 |
+
# '6': "en",
|
32 |
+
# '46': "EN", # TODO only for Russian ?
|
33 |
+
}
|
34 |
+
|
35 |
+
# RU symbols
|
36 |
+
alpha_map_RU = {
|
37 |
+
"1": "а",
|
38 |
+
"12": "б",
|
39 |
+
"2456": "в",
|
40 |
+
"1245": "г",
|
41 |
+
"145": "д",
|
42 |
+
"15": "е",
|
43 |
+
"16": "ё",
|
44 |
+
"245": "ж",
|
45 |
+
"1356": "з",
|
46 |
+
"24": "и",
|
47 |
+
"12346": "й",
|
48 |
+
"13": "к",
|
49 |
+
"123": "л",
|
50 |
+
"134": "м",
|
51 |
+
"1345": "н", # preprocess to № if followed by number
|
52 |
+
"135": "о",
|
53 |
+
"1234": "п",
|
54 |
+
"1235": "р",
|
55 |
+
"234": "с",
|
56 |
+
"2345": "т",
|
57 |
+
"136": "у",
|
58 |
+
"124": "ф",
|
59 |
+
"125": "х",
|
60 |
+
"14": "ц",
|
61 |
+
"12345": "ч",
|
62 |
+
"156": "ш",
|
63 |
+
"1346": "щ",
|
64 |
+
"12356": "ъ",
|
65 |
+
"2346": "ы",
|
66 |
+
"23456": "ь",
|
67 |
+
"246": "э",
|
68 |
+
"1256": "ю",
|
69 |
+
"1246": "я",
|
70 |
+
"45": caps_sign,
|
71 |
+
"236": "«", # <<
|
72 |
+
"356": "»", # >>
|
73 |
+
"4": "'",
|
74 |
+
"456": "|",
|
75 |
+
"346": "§", # mark as &&
|
76 |
+
}
|
77 |
+
|
78 |
+
# UZ symbols
|
79 |
+
alpha_map_UZ = {
|
80 |
+
**alpha_map_RU,
|
81 |
+
"1236": "ў",
|
82 |
+
"13456": "қ",
|
83 |
+
"12456": "ғ",
|
84 |
+
"1456": "ҳ",
|
85 |
+
}
|
86 |
+
|
87 |
+
# EN symbols
|
88 |
+
alpha_map_EN = {
|
89 |
+
"1": "a",
|
90 |
+
"12": "b",
|
91 |
+
"14": "c",
|
92 |
+
"145": "d",
|
93 |
+
"15": "e",
|
94 |
+
"124": "f",
|
95 |
+
"1245": "g",
|
96 |
+
"125": "h",
|
97 |
+
"24": "i",
|
98 |
+
"245": "j",
|
99 |
+
"13": "k",
|
100 |
+
"123": "l",
|
101 |
+
"134": "m",
|
102 |
+
"1345": "n",
|
103 |
+
"135": "o",
|
104 |
+
"1234": "p",
|
105 |
+
"12345": "q",
|
106 |
+
"1235": "r",
|
107 |
+
"234": "s",
|
108 |
+
"2345": "t",
|
109 |
+
"136": "u",
|
110 |
+
"1236": "v",
|
111 |
+
"2456": "w",
|
112 |
+
"1346": "x",
|
113 |
+
"13456": "y",
|
114 |
+
"1356": "z",
|
115 |
+
#'6': caps_sign, # TODO duplicate оf RU caps_sign
|
116 |
+
"3": "'",
|
117 |
+
"236": "«", # <<
|
118 |
+
"356": "»", # >>
|
119 |
+
# '236': '"', # mark as <<
|
120 |
+
# '356': '"', # mark as >>
|
121 |
+
}
|
122 |
+
|
123 |
+
# UZL symbols
|
124 |
+
alpha_map_UZL = {
|
125 |
+
**alpha_map_EN,
|
126 |
+
"1236": "o`",
|
127 |
+
"12456": "g`",
|
128 |
+
"156": "sh",
|
129 |
+
"12345": "ch",
|
130 |
+
}
|
131 |
+
|
132 |
+
# Greek letters
|
133 |
+
alpha_map_GR = {
|
134 |
+
"1": "α",
|
135 |
+
"12": "β",
|
136 |
+
"1245": "γ",
|
137 |
+
"145": "δ",
|
138 |
+
"15": "ε",
|
139 |
+
"1356": "ζ",
|
140 |
+
"345": "η",
|
141 |
+
"1456": "θ",
|
142 |
+
"24": "ι",
|
143 |
+
"13": "κ",
|
144 |
+
"123": "λ",
|
145 |
+
"134": "μ",
|
146 |
+
"1345": "ν",
|
147 |
+
"1346": "ξ",
|
148 |
+
"135": "ο",
|
149 |
+
"1234": "π",
|
150 |
+
"1235": "ρ",
|
151 |
+
"234": "σ",
|
152 |
+
"2345": "τ",
|
153 |
+
"13456": "υ",
|
154 |
+
"124": "φ",
|
155 |
+
"125": "χ",
|
156 |
+
"12346": "ψ",
|
157 |
+
"245": "ω",
|
158 |
+
"46": caps_sign,
|
159 |
+
"5": "'", # stress
|
160 |
+
"126": "αι",
|
161 |
+
"146": "ει",
|
162 |
+
"246": "οι",
|
163 |
+
"12456": "υι",
|
164 |
+
"16": "αυ",
|
165 |
+
"156": "ευ",
|
166 |
+
"1256": "ηυ",
|
167 |
+
"136": "ου",
|
168 |
+
}
|
169 |
+
|
170 |
+
# Latvian letters
|
171 |
+
alpha_map_LV = {
|
172 |
+
"1": "a",
|
173 |
+
"16": "ā",
|
174 |
+
"12": "b",
|
175 |
+
"14": "c",
|
176 |
+
"146": "č",
|
177 |
+
"145": "d",
|
178 |
+
"15": "e",
|
179 |
+
"156": "ē",
|
180 |
+
"124": "f",
|
181 |
+
"1245": "g",
|
182 |
+
"12456": "ģ",
|
183 |
+
"125": "h",
|
184 |
+
"24": "i",
|
185 |
+
"246": "ī",
|
186 |
+
"245": "j",
|
187 |
+
"13": "k",
|
188 |
+
"136": "ķ",
|
189 |
+
"123": "l",
|
190 |
+
"1236": "ļ",
|
191 |
+
"134": "m",
|
192 |
+
"1345": "n",
|
193 |
+
"13456": "ņ",
|
194 |
+
"135": "o",
|
195 |
+
"1234": "p",
|
196 |
+
"1235": "r",
|
197 |
+
"234": "s",
|
198 |
+
"2346": "š",
|
199 |
+
"2345": "t",
|
200 |
+
"34": "u",
|
201 |
+
"346": "ū",
|
202 |
+
"2456": "v",
|
203 |
+
"345": "z",
|
204 |
+
"3456": "ž",
|
205 |
+
"46": caps_sign,
|
206 |
+
}
|
207 |
+
|
208 |
+
# PL symbols
|
209 |
+
alpha_map_PL = {
|
210 |
+
**alpha_map_EN,
|
211 |
+
"16": "ą",
|
212 |
+
"146": "ć",
|
213 |
+
"156": "ę",
|
214 |
+
"126": "ł",
|
215 |
+
"1456": "ń",
|
216 |
+
"346": "ó",
|
217 |
+
"246": "ś",
|
218 |
+
"2346": "ź",
|
219 |
+
"12346": "ż",
|
220 |
+
"3": ".",
|
221 |
+
"256": "/",
|
222 |
+
"34": "\\",
|
223 |
+
}
|
224 |
+
|
225 |
+
# DE symbols
|
226 |
+
alpha_map_DE = {
|
227 |
+
"236": '"',
|
228 |
+
"356": '"',
|
229 |
+
"35": "*",
|
230 |
+
"235": "+",
|
231 |
+
"2": ",",
|
232 |
+
"36": "-",
|
233 |
+
"3": ".",
|
234 |
+
"346": "ie",
|
235 |
+
"16": "au",
|
236 |
+
"126": "eu",
|
237 |
+
"146": "ei",
|
238 |
+
"1456": "ch",
|
239 |
+
"156": "sch",
|
240 |
+
"1256": "ü",
|
241 |
+
"246": "ö",
|
242 |
+
"25": ":",
|
243 |
+
"23": ";",
|
244 |
+
"2356": "=",
|
245 |
+
"26": "?",
|
246 |
+
"345": "ä",
|
247 |
+
"1": "a",
|
248 |
+
"12": "b",
|
249 |
+
"14": "c",
|
250 |
+
"145": "d",
|
251 |
+
"15": "e",
|
252 |
+
"124": "f",
|
253 |
+
"1245": "g",
|
254 |
+
"125": "h",
|
255 |
+
"24": "i",
|
256 |
+
"245": "j",
|
257 |
+
"13": "k",
|
258 |
+
"123": "l",
|
259 |
+
"134": "m",
|
260 |
+
"1345": "n",
|
261 |
+
"135": "o",
|
262 |
+
"1234": "p",
|
263 |
+
"12345": "q",
|
264 |
+
"1235": "r",
|
265 |
+
"234": "s",
|
266 |
+
"2345": "t",
|
267 |
+
"136": "u",
|
268 |
+
"1236": "v",
|
269 |
+
"2456": "w",
|
270 |
+
"1346": "x",
|
271 |
+
"13456": "y",
|
272 |
+
"1356": "z",
|
273 |
+
"34": "äu",
|
274 |
+
"23456": "st",
|
275 |
+
"2346": "ß",
|
276 |
+
"46": caps_sign,
|
277 |
+
}
|
278 |
+
|
279 |
+
# Digit symbols (after num_sign)
|
280 |
+
num_map = {
|
281 |
+
"1": "1",
|
282 |
+
"12": "2",
|
283 |
+
"14": "3",
|
284 |
+
"145": "4",
|
285 |
+
"15": "5",
|
286 |
+
"124": "6",
|
287 |
+
"1245": "7",
|
288 |
+
"125": "8",
|
289 |
+
"24": "9",
|
290 |
+
"245": "0",
|
291 |
+
}
|
292 |
+
|
293 |
+
# Digits in denominators of fraction
|
294 |
+
num_denominator_map = {
|
295 |
+
"2": "/1",
|
296 |
+
"23": "/2",
|
297 |
+
"25": "/3",
|
298 |
+
"256": "/4",
|
299 |
+
"26": "/5",
|
300 |
+
"235": "/6",
|
301 |
+
"2356": "/7",
|
302 |
+
"236": "/8",
|
303 |
+
"35": "/9",
|
304 |
+
"356": "/0", # postprocess num 0 /0 to %
|
305 |
+
}
|
306 |
+
|
307 |
+
# Symbols for Math Braille (in Russian braille, I suppose)
|
308 |
+
math_RU = {
|
309 |
+
"2": ",", # decimal separator
|
310 |
+
"3": "..", # postprocess to "." (thousand separator) if between digits else to * (multiplication).
|
311 |
+
"235": "+",
|
312 |
+
"36": "-",
|
313 |
+
"236": "*",
|
314 |
+
"256": "::", # postprocess to ":" (division).
|
315 |
+
"246": "<",
|
316 |
+
"135": ">",
|
317 |
+
"2356": "=",
|
318 |
+
"126": "(",
|
319 |
+
"345": ")",
|
320 |
+
"12356": "[",
|
321 |
+
"23456": "]",
|
322 |
+
"246": "{",
|
323 |
+
"135": "}",
|
324 |
+
"456": "|",
|
325 |
+
"6": "en",
|
326 |
+
"46": "EN",
|
327 |
+
}
|
328 |
+
|
329 |
+
# Codes for dicts
|
330 |
+
letter_dicts = {
|
331 |
+
"SYM": sym_map,
|
332 |
+
"RU": alpha_map_RU,
|
333 |
+
"EN": alpha_map_EN,
|
334 |
+
"EN2": alpha_map_EN,
|
335 |
+
"GR": alpha_map_GR,
|
336 |
+
"LV": alpha_map_LV,
|
337 |
+
"PL": alpha_map_PL,
|
338 |
+
"DE": alpha_map_DE,
|
339 |
+
"UZ": alpha_map_UZ,
|
340 |
+
"UZL": alpha_map_UZL,
|
341 |
+
"NUM": num_map,
|
342 |
+
"NUM_DENOMINATOR": num_denominator_map,
|
343 |
+
"MATH_RU": math_RU,
|
344 |
+
}
|
345 |
+
|
346 |
+
return letter_dicts
|
347 |
+
|
348 |
+
|
349 |
+
def reversed_mapping(angelina_map=angelina_label_map()):
|
350 |
+
concat_dict = {}
|
351 |
+
|
352 |
+
for key in angelina_map.keys():
|
353 |
+
single_dict = angelina_map[key]
|
354 |
+
# invert single_dict
|
355 |
+
inverted_dict = {v: k for k, v in single_dict.items()}
|
356 |
+
for k, v in inverted_dict.items():
|
357 |
+
concat_dict[k] = v
|
358 |
+
|
359 |
+
return concat_dict
|
360 |
+
|
361 |
+
|
362 |
+
def transform_angelina_label(russian_label, mapping=reversed_mapping()):
|
363 |
+
russian_label = russian_label.strip()
|
364 |
+
if russian_label in mapping.keys():
|
365 |
+
return mapping[russian_label]
|
366 |
+
else:
|
367 |
+
# remove "~" from russian_label if russian_label is numeric
|
368 |
+
russian_label = russian_label.replace("~", "")
|
369 |
+
if russian_label.isnumeric():
|
370 |
+
return russian_label
|
371 |
+
# other cases where letters are capitalized
|
372 |
+
else:
|
373 |
+
if russian_label.lower() in mapping.keys():
|
374 |
+
return mapping[russian_label.lower()]
|
375 |
+
# other cases where letters are special letters
|
376 |
+
else:
|
377 |
+
if russian_label == "CC" or russian_label == "CC": # capitalize
|
378 |
+
return "46"
|
379 |
+
elif russian_label == "ХХ": # markout
|
380 |
+
return "123456"
|
381 |
+
elif russian_label == ">>":
|
382 |
+
return "356"
|
383 |
+
elif russian_label == "<<":
|
384 |
+
return "236"
|
385 |
+
else:
|
386 |
+
print(russian_label)
|
387 |
+
return "46"
|
utils/braille_map.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"000001": "⠠",
|
3 |
+
"000010": "⠐",
|
4 |
+
"000011": "⠰",
|
5 |
+
"000100": "⠈",
|
6 |
+
"000101": "⠨",
|
7 |
+
"000110": "⠘",
|
8 |
+
"000111": "⠸",
|
9 |
+
"001000": "⠄",
|
10 |
+
"001001": "⠤",
|
11 |
+
"001010": "⠔",
|
12 |
+
"001011": "⠴",
|
13 |
+
"001100": "⠌",
|
14 |
+
"001101": "⠬",
|
15 |
+
"001110": "⠜",
|
16 |
+
"001111": "⠼",
|
17 |
+
"010000": "⠂",
|
18 |
+
"010001": "⠢",
|
19 |
+
"010010": "⠒",
|
20 |
+
"010011": "⠲",
|
21 |
+
"010100": "⠊",
|
22 |
+
"010101": "⠪",
|
23 |
+
"010110": "⠚",
|
24 |
+
"010111": "⠺",
|
25 |
+
"011000": "⠆",
|
26 |
+
"011001": "⠦",
|
27 |
+
"011010": "⠖",
|
28 |
+
"011011": "⠶",
|
29 |
+
"011100": "⠎",
|
30 |
+
"011101": "⠮",
|
31 |
+
"011110": "⠞",
|
32 |
+
"011111": "⠾",
|
33 |
+
"100000": "⠁",
|
34 |
+
"100001": "⠡",
|
35 |
+
"100010": "⠑",
|
36 |
+
"100011": "⠱",
|
37 |
+
"100100": "⠉",
|
38 |
+
"100101": "⠩",
|
39 |
+
"100110": "⠙",
|
40 |
+
"100111": "⠹",
|
41 |
+
"101000": "⠅",
|
42 |
+
"101001": "⠥",
|
43 |
+
"101010": "⠕",
|
44 |
+
"101011": "⠵",
|
45 |
+
"101100": "⠍",
|
46 |
+
"101101": "⠭",
|
47 |
+
"101110": "⠝",
|
48 |
+
"101111": "⠽",
|
49 |
+
"110000": "⠃",
|
50 |
+
"110001": "⠣",
|
51 |
+
"110010": "⠓",
|
52 |
+
"110011": "⠳",
|
53 |
+
"110100": "⠋",
|
54 |
+
"110101": "⠫",
|
55 |
+
"110110": "⠛",
|
56 |
+
"110111": "⠻",
|
57 |
+
"111000": "⠇",
|
58 |
+
"111001": "⠧",
|
59 |
+
"111010": "⠗",
|
60 |
+
"111011": "⠷",
|
61 |
+
"111100": "⠏",
|
62 |
+
"111101": "⠯",
|
63 |
+
"111110": "⠟",
|
64 |
+
"111111": "⠿"
|
65 |
+
}
|
utils/braille_natural_utils.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
|
3 |
+
|
4 |
+
def get_label(label_path):
|
5 |
+
with open(label_path, "r") as f:
|
6 |
+
label = f.readlines()
|
7 |
+
label = [line.strip().split(" ") for line in label]
|
8 |
+
# label is in yolo format, remove class in the first item of the row
|
9 |
+
label = [line[1:] for line in label]
|
10 |
+
|
11 |
+
# convert to float
|
12 |
+
label = [[float(item) for item in line] for line in label]
|
13 |
+
|
14 |
+
return label
|
15 |
+
|
16 |
+
|
17 |
+
def get_image(img_path):
|
18 |
+
img = Image.open(img_path)
|
19 |
+
return img
|
utils/dsbi_utils.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
"""
|
4 |
+
Utils for DSBI dataset (https://github.com/yeluo1994/DSBI)
|
5 |
+
Written by IlyaOvodov (https://github.com/IlyaOvodov/AngelinaDataset)
|
6 |
+
"""
|
7 |
+
import collections
|
8 |
+
|
9 |
+
CellInfo = collections.namedtuple(
|
10 |
+
"CellInfo",
|
11 |
+
[
|
12 |
+
"row",
|
13 |
+
"col", # row and column in a symbol grid
|
14 |
+
"left",
|
15 |
+
"top",
|
16 |
+
"right",
|
17 |
+
"bottom", # symbol corner coordinates in pixels
|
18 |
+
"label",
|
19 |
+
],
|
20 |
+
) # symbol label either like '246' or '010101' format
|
21 |
+
|
22 |
+
|
23 |
+
def read_txt(file_txt, binary_label=True):
|
24 |
+
"""
|
25 |
+
Loads Braille annotation from DSBI annotation txt file
|
26 |
+
:param file_txt: filename of txt file
|
27 |
+
:param binary_label: return symbol label in binary format, like '010101' (if True),
|
28 |
+
or human readable like '246' (if False)
|
29 |
+
:return: tuple (
|
30 |
+
angle: value from 1st line of annotation file,
|
31 |
+
h_lines: list of horizontal lines Y-coordinates,
|
32 |
+
v_lines: list of vertical lines X-coordinates,,
|
33 |
+
cells: symbols as list of CellInfo
|
34 |
+
)
|
35 |
+
None, None, None, None for empty annotation
|
36 |
+
"""
|
37 |
+
with open(file_txt, "r") as f:
|
38 |
+
l = f.readlines()
|
39 |
+
if len(l) < 3:
|
40 |
+
return None, None, None, None
|
41 |
+
angle = eval(l[0])
|
42 |
+
v_lines = list(map(eval, l[1].split(" ")))
|
43 |
+
assert len(v_lines) % 2 == 0, (file_txt, len(v_lines))
|
44 |
+
h_lines = list(map(eval, l[2].split(" ")))
|
45 |
+
assert len(h_lines) % 3 == 0, (file_txt, len(h_lines))
|
46 |
+
cells = []
|
47 |
+
for cell_ln in l[3:]:
|
48 |
+
cell_nums = list(cell_ln[:-1].split(" ")) # exclude last '\n'
|
49 |
+
assert len(cell_nums) == 8, (file_txt, cell_ln)
|
50 |
+
row = eval(cell_nums[0])
|
51 |
+
col = eval(cell_nums[1])
|
52 |
+
if binary_label:
|
53 |
+
label = "".join(cell_nums[2:])
|
54 |
+
else:
|
55 |
+
label = ""
|
56 |
+
for i, c in enumerate(cell_nums[2:]):
|
57 |
+
if c == "1":
|
58 |
+
label += str(i + 1)
|
59 |
+
else:
|
60 |
+
assert c == "0", (file_txt, cell_ln, i, c)
|
61 |
+
left = v_lines[(col - 1) * 2]
|
62 |
+
right = v_lines[(col - 1) * 2 + 1]
|
63 |
+
top = h_lines[(row - 1) * 3]
|
64 |
+
bottom = h_lines[(row - 1) * 3 + 2]
|
65 |
+
cells.append(
|
66 |
+
CellInfo(
|
67 |
+
row=row,
|
68 |
+
col=col,
|
69 |
+
left=left,
|
70 |
+
top=top,
|
71 |
+
right=right,
|
72 |
+
bottom=bottom,
|
73 |
+
label=label,
|
74 |
+
)
|
75 |
+
)
|
76 |
+
return angle, h_lines, v_lines, cells
|
77 |
+
|
78 |
+
|
79 |
+
def read_DSBI_annotation(label_filename, width, height, rect_margin, get_points):
|
80 |
+
"""
|
81 |
+
Loads Braille annotation from DSBI annotation txt file in albumentations format
|
82 |
+
:param label_filename: filename of txt file
|
83 |
+
:param width: image width
|
84 |
+
:param height: image height
|
85 |
+
:param rect_margin:
|
86 |
+
:param get_points: Points or Symbols mode
|
87 |
+
:return:
|
88 |
+
List of symbol rects if get_points==False. Each rect is a tuple (left, top, right, bottom, label) where
|
89 |
+
left..bottom are in [0,1], label is int in [1..63]. Symbol size is extended to rect_margin*width of symbol
|
90 |
+
in every side.
|
91 |
+
List of points rects if get_points==True. Each point is a tuple (left, top, right, bottom, label) where
|
92 |
+
left..bottom are in [0,1], label is 0. Width and height of point is 2*rect_margin*width of symbol
|
93 |
+
"""
|
94 |
+
_, _, _, cells = read_txt(label_filename, binary_label=True)
|
95 |
+
if cells is not None:
|
96 |
+
if get_points:
|
97 |
+
rects = []
|
98 |
+
for cl in cells:
|
99 |
+
w = int((cl.right - cl.left) * rect_margin)
|
100 |
+
h = w
|
101 |
+
for i in range(6):
|
102 |
+
if cl.label[i] == "1":
|
103 |
+
iy = i % 3
|
104 |
+
ix = i - iy
|
105 |
+
if ix == 0:
|
106 |
+
xc = cl.left
|
107 |
+
else:
|
108 |
+
xc = cl.right
|
109 |
+
left, right = xc - w, xc + w
|
110 |
+
if iy == 0:
|
111 |
+
yc = cl.top
|
112 |
+
elif iy == 1:
|
113 |
+
yc = (cl.top + cl.bottom) // 2
|
114 |
+
else:
|
115 |
+
yc = cl.bottom
|
116 |
+
top, bottom = yc - h, yc + h
|
117 |
+
rects.append(
|
118 |
+
[
|
119 |
+
left / width,
|
120 |
+
top / height,
|
121 |
+
right / width,
|
122 |
+
bottom / height,
|
123 |
+
0,
|
124 |
+
]
|
125 |
+
)
|
126 |
+
else:
|
127 |
+
rects = [
|
128 |
+
(
|
129 |
+
(c.left - rect_margin * (c.right - c.left)),
|
130 |
+
(c.top - rect_margin * (c.right - c.left)),
|
131 |
+
(c.right + rect_margin * (c.right - c.left)),
|
132 |
+
(c.bottom + rect_margin * (c.right - c.left)),
|
133 |
+
c.label,
|
134 |
+
)
|
135 |
+
for c in cells
|
136 |
+
if c.label != "000000"
|
137 |
+
]
|
138 |
+
else:
|
139 |
+
rects = []
|
140 |
+
return rects
|
utils/kaggle_utils.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
|
4 |
+
def get_alphabet_map(path="./src/utils/alphabet_map.json"):
|
5 |
+
|
6 |
+
with open(path, "r") as f:
|
7 |
+
alphabet_map = json.load(f)
|
8 |
+
|
9 |
+
return alphabet_map
|
utils/number_map.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"000001": "⠠",
|
3 |
+
"000010": "⠐",
|
4 |
+
"000011": "⠰",
|
5 |
+
"000100": "⠈",
|
6 |
+
"000101": "⠨",
|
7 |
+
"000110": "⠘",
|
8 |
+
"000111": "⠸",
|
9 |
+
"001000": "⠄",
|
10 |
+
"001001": "⠤",
|
11 |
+
"001010": "⠔",
|
12 |
+
"001011": "⠴",
|
13 |
+
"001100": "⠌",
|
14 |
+
"001101": "⠬",
|
15 |
+
"001110": "⠜",
|
16 |
+
"001111": "floor",
|
17 |
+
"010000": "⠂",
|
18 |
+
"010001": "⠢",
|
19 |
+
"010010": "⠒",
|
20 |
+
"010011": "⠲",
|
21 |
+
"010100": "9",
|
22 |
+
"010101": "⠪",
|
23 |
+
"010110": "0",
|
24 |
+
"010111": "⠺",
|
25 |
+
"011000": "⠆",
|
26 |
+
"011001": "⠦",
|
27 |
+
"011010": "⠖",
|
28 |
+
"011011": "⠶",
|
29 |
+
"011100": "⠎",
|
30 |
+
"011101": "⠮",
|
31 |
+
"011110": "⠞",
|
32 |
+
"011111": "⠾",
|
33 |
+
"100000": "1",
|
34 |
+
"100001": "⠡",
|
35 |
+
"100010": "5",
|
36 |
+
"100011": "⠱",
|
37 |
+
"100100": "3",
|
38 |
+
"100101": "⠩",
|
39 |
+
"100110": "4",
|
40 |
+
"100111": "⠹",
|
41 |
+
"101000": "⠅",
|
42 |
+
"101001": "⠥",
|
43 |
+
"101010": "⠕",
|
44 |
+
"101011": "⠵",
|
45 |
+
"101100": "⠍",
|
46 |
+
"101101": "⠭",
|
47 |
+
"101110": "⠝",
|
48 |
+
"101111": "⠽",
|
49 |
+
"110000": "2",
|
50 |
+
"110001": "⠣",
|
51 |
+
"110010": "8",
|
52 |
+
"110011": "⠳",
|
53 |
+
"110100": "6",
|
54 |
+
"110101": "⠫",
|
55 |
+
"110110": "7",
|
56 |
+
"110111": "⠻",
|
57 |
+
"111000": "⠇",
|
58 |
+
"111001": "⠧",
|
59 |
+
"111010": "⠗",
|
60 |
+
"111011": "⠷",
|
61 |
+
"111100": "⠏",
|
62 |
+
"111101": "⠯",
|
63 |
+
"111110": "⠟",
|
64 |
+
"111111": "⠿"
|
65 |
+
}
|
66 |
+
|