Spaces:
Running
Running
def preprocess_text(text): | |
# replace apostrophe | |
text = text.replace("`", "'") | |
text = text.replace("ʼ", "'") | |
# numbers | |
text = text.replace("1", "од+ин ") | |
text = text.replace("2", "дв+а ") | |
text = text.replace("3", "тр+и ") | |
text = text.replace("4", "чот+ири ") | |
text = text.replace("5", "п'+ять ") | |
text = text.replace("6", "ш+ість ") | |
text = text.replace("7", "с+ім ") | |
text = text.replace("8", "в+ісім ") | |
text = text.replace("9", "д+ев'ять ") | |
text = text.replace("0", "н+уль ") | |
# speak english alphabet using brute force transliteration | |
english = { | |
"a": "а", | |
"b": "б", | |
"c": "ц", | |
"d": "д", | |
"e": "е", | |
"f": "ф", | |
"g": "ґ", | |
"h": "г", | |
"i": "і", | |
"j": "дж", | |
"k": "к", | |
"l": "л", | |
"m": "м", | |
"n": "н", | |
"o": "о", | |
"p": "п", | |
"q": "кв", | |
"r": "р", | |
"s": "с", | |
"t": "т", | |
"u": "ю", | |
"v": "в", | |
"w": "в", | |
"x": "кс", | |
"y": "і", | |
"z": "з", | |
} | |
for english_char in english.keys(): | |
# uppercase | |
text = text.replace(english_char.upper(), english[english_char].upper()) | |
text = text.replace(english_char, english[english_char]) | |
# TODO: autostress support here | |
return text | |
if __name__ == "__main__": | |
print(preprocess_text("Quality of life update")) |