|
import tempfile |
|
|
|
import edge_tts |
|
import gradio as gr |
|
from gradio_client import Client |
|
import pyarabic.araby as araby |
|
|
|
language_dict = { |
|
"English": { |
|
"Jenny": "en-US-JennyNeural", |
|
"Guy": "en-US-GuyNeural", |
|
"Ana": "en-US-AnaNeural", |
|
"Aria": "en-US-AriaNeural", |
|
"Christopher": "en-US-ChristopherNeural", |
|
"Eric": "en-US-EricNeural", |
|
"Michelle": "en-US-MichelleNeural", |
|
"Roger": "en-US-RogerNeural", |
|
"Natasha": "en-AU-NatashaNeural", |
|
"William": "en-AU-WilliamNeural", |
|
"Clara": "en-CA-ClaraNeural", |
|
"Liam": "en-CA-LiamNeural", |
|
"Libby": "en-GB-LibbyNeural", |
|
"Maisie": "en-GB-MaisieNeural", |
|
"Ryan": "en-GB-RyanNeural", |
|
"Sonia": "en-GB-SoniaNeural", |
|
"Thomas": "en-GB-ThomasNeural", |
|
"Sam": "en-HK-SamNeural", |
|
"Yan": "en-HK-YanNeural", |
|
"Connor": "en-IE-ConnorNeural", |
|
"Emily": "en-IE-EmilyNeural", |
|
"Neerja": "en-IN-NeerjaNeural", |
|
"Prabhat": "en-IN-PrabhatNeural", |
|
"Asilia": "en-KE-AsiliaNeural", |
|
"Chilemba": "en-KE-ChilembaNeural", |
|
"Abeo": "en-NG-AbeoNeural", |
|
"Ezinne": "en-NG-EzinneNeural", |
|
"Mitchell": "en-NZ-MitchellNeural", |
|
"James": "en-PH-JamesNeural", |
|
"Rosa": "en-PH-RosaNeural", |
|
"Luna": "en-SG-LunaNeural", |
|
"Wayne": "en-SG-WayneNeural", |
|
"Elimu": "en-TZ-ElimuNeural", |
|
"Imani": "en-TZ-ImaniNeural", |
|
"Leah": "en-ZA-LeahNeural", |
|
"Luke": "en-ZA-LukeNeural" |
|
}, |
|
"Spanish": { |
|
"Elena": "es-AR-ElenaNeural", |
|
"Tomas": "es-AR-TomasNeural", |
|
"Marcelo": "es-BO-MarceloNeural", |
|
"Sofia": "es-BO-SofiaNeural", |
|
"Gonzalo": "es-CO-GonzaloNeural", |
|
"Salome": "es-CO-SalomeNeural", |
|
"Juan": "es-CR-JuanNeural", |
|
"Maria": "es-CR-MariaNeural", |
|
"Belkys": "es-CU-BelkysNeural", |
|
"Emilio": "es-DO-EmilioNeural", |
|
"Ramona": "es-DO-RamonaNeural", |
|
"Andrea": "es-EC-AndreaNeural", |
|
"Luis": "es-EC-LuisNeural", |
|
"Alvaro": "es-ES-AlvaroNeural", |
|
"Elvira": "es-ES-ElviraNeural", |
|
"Teresa": "es-GQ-TeresaNeural", |
|
"Andres": "es-GT-AndresNeural", |
|
"Marta": "es-GT-MartaNeural", |
|
"Carlos": "es-HN-CarlosNeural", |
|
"Karla": "es-HN-KarlaNeural", |
|
"Federico": "es-NI-FedericoNeural", |
|
"Yolanda": "es-NI-YolandaNeural", |
|
"Margarita": "es-PA-MargaritaNeural", |
|
"Roberto": "es-PA-RobertoNeural", |
|
"Alex": "es-PE-AlexNeural", |
|
"Camila": "es-PE-CamilaNeural", |
|
"Karina": "es-PR-KarinaNeural", |
|
"Victor": "es-PR-VictorNeural", |
|
"Mario": "es-PY-MarioNeural", |
|
"Tania": "es-PY-TaniaNeural", |
|
"Lorena": "es-SV-LorenaNeural", |
|
"Rodrigo": "es-SV-RodrigoNeural", |
|
"Alonso": "es-US-AlonsoNeural", |
|
"Paloma": "es-US-PalomaNeural", |
|
"Mateo": "es-UY-MateoNeural", |
|
"Valentina": "es-UY-ValentinaNeural", |
|
"Paola": "es-VE-PaolaNeural", |
|
"Sebastian": "es-VE-SebastianNeural" |
|
}, |
|
"Arabic": { |
|
"Hamed": "ar-SA-HamedNeural", |
|
"Zariyah": "ar-SA-ZariyahNeural", |
|
"Fatima": "ar-AE-FatimaNeural", |
|
"Hamdan": "ar-AE-HamdanNeural", |
|
"Ali": "ar-BH-AliNeural", |
|
"Laila": "ar-BH-LailaNeural", |
|
"Ismael": "ar-DZ-IsmaelNeural", |
|
"Salma": "ar-EG-SalmaNeural", |
|
"Shakir": "ar-EG-ShakirNeural", |
|
"Bassel": "ar-IQ-BasselNeural", |
|
"Rana": "ar-IQ-RanaNeural", |
|
"Sana": "ar-JO-SanaNeural", |
|
"Taim": "ar-JO-TaimNeural", |
|
"Fahed": "ar-KW-FahedNeural", |
|
"Noura": "ar-KW-NouraNeural", |
|
"Layla": "ar-LB-LaylaNeural", |
|
"Rami": "ar-LB-RamiNeural", |
|
"Iman": "ar-LY-ImanNeural", |
|
"Omar": "ar-LY-OmarNeural", |
|
"Jamal": "ar-MA-JamalNeural", |
|
"Mouna": "ar-MA-MounaNeural", |
|
"Abdullah": "ar-OM-AbdullahNeural", |
|
"Aysha": "ar-OM-AyshaNeural", |
|
"Amal": "ar-QA-AmalNeural", |
|
"Moaz": "ar-QA-MoazNeural", |
|
"Amany": "ar-SY-AmanyNeural", |
|
"Laith": "ar-SY-LaithNeural", |
|
"Hedi": "ar-TN-HediNeural", |
|
"Reem": "ar-TN-ReemNeural", |
|
"Maryam": "ar-YE-MaryamNeural", |
|
"Saleh": "ar-YE-SalehNeural" |
|
}, |
|
"Korean": { |
|
"Sun-Hi": "ko-KR-SunHiNeural", |
|
"InJoon": "ko-KR-InJoonNeural" |
|
}, |
|
"Thai": { |
|
"Premwadee": "th-TH-PremwadeeNeural", |
|
"Niwat": "th-TH-NiwatNeural" |
|
}, |
|
"Vietnamese": { |
|
"HoaiMy": "vi-VN-HoaiMyNeural", |
|
"NamMinh": "vi-VN-NamMinhNeural" |
|
}, |
|
"Japanese": { |
|
"Nanami": "ja-JP-NanamiNeural", |
|
"Keita": "ja-JP-KeitaNeural" |
|
}, |
|
"French": { |
|
"Denise": "fr-FR-DeniseNeural", |
|
"Eloise": "fr-FR-EloiseNeural", |
|
"Henri": "fr-FR-HenriNeural", |
|
"Sylvie": "fr-CA-SylvieNeural", |
|
"Antoine": "fr-CA-AntoineNeural", |
|
"Jean": "fr-CA-JeanNeural", |
|
"Ariane": "fr-CH-ArianeNeural", |
|
"Fabrice": "fr-CH-FabriceNeural", |
|
"Charline": "fr-BE-CharlineNeural", |
|
"Gerard": "fr-BE-GerardNeural" |
|
}, |
|
"Portuguese": { |
|
"Francisca": "pt-BR-FranciscaNeural", |
|
"Antonio": "pt-BR-AntonioNeural", |
|
"Duarte": "pt-PT-DuarteNeural", |
|
"Raquel": "pt-PT-RaquelNeural" |
|
}, |
|
"Indonesian": { |
|
"Ardi": "id-ID-ArdiNeural", |
|
"Gadis": "id-ID-GadisNeural" |
|
}, |
|
"Hebrew": { |
|
"Avri": "he-IL-AvriNeural", |
|
"Hila": "he-IL-HilaNeural" |
|
}, |
|
"Italian": { |
|
"Isabella": "it-IT-IsabellaNeural", |
|
"Diego": "it-IT-DiegoNeural", |
|
"Elsa": "it-IT-ElsaNeural" |
|
}, |
|
"Dutch": { |
|
"Colette": "nl-NL-ColetteNeural", |
|
"Fenna": "nl-NL-FennaNeural", |
|
"Maarten": "nl-NL-MaartenNeural", |
|
"Arnaud": "nl-BE-ArnaudNeural", |
|
"Dena": "nl-BE-DenaNeural" |
|
}, |
|
"Malay": { |
|
"Osman": "ms-MY-OsmanNeural", |
|
"Yasmin": "ms-MY-YasminNeural" |
|
}, |
|
"Norwegian": { |
|
"Pernille": "nb-NO-PernilleNeural", |
|
"Finn": "nb-NO-FinnNeural" |
|
}, |
|
"Swedish": { |
|
"Sofie": "sv-SE-SofieNeural", |
|
"Mattias": "sv-SE-MattiasNeural" |
|
}, |
|
"Greek": { |
|
"Athina": "el-GR-AthinaNeural", |
|
"Nestoras": "el-GR-NestorasNeural" |
|
}, |
|
"German": { |
|
"Katja": "de-DE-KatjaNeural", |
|
"Amala": "de-DE-AmalaNeural", |
|
"Conrad": "de-DE-ConradNeural", |
|
"Killian": "de-DE-KillianNeural", |
|
"Ingrid": "de-AT-IngridNeural", |
|
"Jonas": "de-AT-JonasNeural", |
|
"Jan": "de-CH-JanNeural", |
|
"Leni": "de-CH-LeniNeural" |
|
}, |
|
"Afrikaans": { |
|
"Adri": "af-ZA-AdriNeural", |
|
"Willem": "af-ZA-WillemNeural" |
|
}, |
|
"Amharic": { |
|
"Ameha": "am-ET-AmehaNeural", |
|
"Mekdes": "am-ET-MekdesNeural" |
|
}, |
|
"Azerbaijani": { |
|
"Babek": "az-AZ-BabekNeural", |
|
"Banu": "az-AZ-BanuNeural" |
|
}, |
|
"Bulgarian": { |
|
"Borislav": "bg-BG-BorislavNeural", |
|
"Kalina": "bg-BG-KalinaNeural" |
|
}, |
|
"Bengali": { |
|
"Nabanita": "bn-BD-NabanitaNeural", |
|
"Pradeep": "bn-BD-PradeepNeural", |
|
"Bashkar": "bn-IN-BashkarNeural", |
|
"Tanishaa": "bn-IN-TanishaaNeural" |
|
}, |
|
"Bosnian": { |
|
"Goran": "bs-BA-GoranNeural", |
|
"Vesna": "bs-BA-VesnaNeural" |
|
}, |
|
"Catalan": { |
|
"Joana": "ca-ES-JoanaNeural", |
|
"Enric": "ca-ES-EnricNeural" |
|
}, |
|
"Czech": { |
|
"Antonin": "cs-CZ-AntoninNeural", |
|
"Vlasta": "cs-CZ-VlastaNeural" |
|
}, |
|
"Welsh": { |
|
"Aled": "cy-GB-AledNeural", |
|
"Nia": "cy-GB-NiaNeural" |
|
}, |
|
"Danish": { |
|
"Christel": "da-DK-ChristelNeural", |
|
"Jeppe": "da-DK-JeppeNeural" |
|
}, |
|
"Estonian": { |
|
"Anu": "et-EE-AnuNeural", |
|
"Kert": "et-EE-KertNeural" |
|
}, |
|
"Persian": { |
|
"Dilara": "fa-IR-DilaraNeural", |
|
"Farid": "fa-IR-FaridNeural" |
|
}, |
|
"Finnish": { |
|
"Harri": "fi-FI-HarriNeural", |
|
"Noora": "fi-FI-NooraNeural" |
|
}, |
|
"Irish": { |
|
"Colm": "ga-IE-ColmNeural", |
|
"Orla": "ga-IE-OrlaNeural" |
|
}, |
|
"Galician": { |
|
"Roi": "gl-ES-RoiNeural", |
|
"Sabela": "gl-ES-SabelaNeural" |
|
}, |
|
"Gujarati": { |
|
"Dhwani": "gu-IN-DhwaniNeural", |
|
"Niranjan": "gu-IN-NiranjanNeural" |
|
}, |
|
"Hindi": { |
|
"Madhur": "hi-IN-MadhurNeural", |
|
"Swara": "hi-IN-SwaraNeural" |
|
}, |
|
"Croatian": { |
|
"Gabrijela": "hr-HR-GabrijelaNeural", |
|
"Srecko": "hr-HR-SreckoNeural" |
|
}, |
|
"Hungarian": { |
|
"Noemi": "hu-HU-NoemiNeural", |
|
"Tamas": "hu-HU-TamasNeural" |
|
}, |
|
"Icelandic": { |
|
"Gudrun": "is-IS-GudrunNeural", |
|
"Gunnar": "is-IS-GunnarNeural" |
|
}, |
|
"Javanese": { |
|
"Dimas": "jv-ID-DimasNeural", |
|
"Siti": "jv-ID-SitiNeural" |
|
}, |
|
"Georgian": { |
|
"Eka": "ka-GE-EkaNeural", |
|
"Giorgi": "ka-GE-GiorgiNeural" |
|
}, |
|
"Kazakh": { |
|
"Aigul": "kk-KZ-AigulNeural", |
|
"Daulet": "kk-KZ-DauletNeural" |
|
}, |
|
"Khmer": { |
|
"Piseth": "km-KH-PisethNeural", |
|
"Sreymom": "km-KH-SreymomNeural" |
|
}, |
|
"Kannada": { |
|
"Gagan": "kn-IN-GaganNeural", |
|
"Sapna": "kn-IN-SapnaNeural" |
|
}, |
|
"Lao": { |
|
"Chanthavong": "lo-LA-ChanthavongNeural", |
|
"Keomany": "lo-LA-KeomanyNeural" |
|
}, |
|
"Lithuanian": { |
|
"Leonas": "lt-LT-LeonasNeural", |
|
"Ona": "lt-LT-OnaNeural" |
|
}, |
|
"Latvian": { |
|
"Everita": "lv-LV-EveritaNeural", |
|
"Nils": "lv-LV-NilsNeural" |
|
}, |
|
"Macedonian": { |
|
"Aleksandar": "mk-MK-AleksandarNeural", |
|
"Marija": "mk-MK-MarijaNeural" |
|
}, |
|
"Malayalam": { |
|
"Midhun": "ml-IN-MidhunNeural", |
|
"Sobhana": "ml-IN-SobhanaNeural" |
|
}, |
|
"Mongolian": { |
|
"Bataa": "mn-MN-BataaNeural", |
|
"Yesui": "mn-MN-YesuiNeural" |
|
}, |
|
"Marathi": { |
|
"Aarohi": "mr-IN-AarohiNeural", |
|
"Manohar": "mr-IN-ManoharNeural" |
|
}, |
|
"Maltese": { |
|
"Grace": "mt-MT-GraceNeural", |
|
"Joseph": "mt-MT-JosephNeural" |
|
}, |
|
"Burmese": { |
|
"Nilar": "my-MM-NilarNeural", |
|
"Thiha": "my-MM-ThihaNeural" |
|
}, |
|
"Nepali": { |
|
"Hemkala": "ne-NP-HemkalaNeural", |
|
"Sagar": "ne-NP-SagarNeural" |
|
}, |
|
"Polish": { |
|
"Marek": "pl-PL-MarekNeural", |
|
"Zofia": "pl-PL-ZofiaNeural" |
|
}, |
|
"Pashto": { |
|
"Gul Nawaz": "ps-AF-GulNawazNeural", |
|
"Latifa": "ps-AF-LatifaNeural" |
|
}, |
|
"Romanian": { |
|
"Alina": "ro-RO-AlinaNeural", |
|
"Emil": "ro-RO-EmilNeural" |
|
}, |
|
"Russian": { |
|
"Svetlana": "ru-RU-SvetlanaNeural", |
|
"Dmitry": "ru-RU-DmitryNeural" |
|
}, |
|
"Sinhala": { |
|
"Sameera": "si-LK-SameeraNeural", |
|
"Thilini": "si-LK-ThiliniNeural" |
|
}, |
|
"Slovak": { |
|
"Lukas": "sk-SK-LukasNeural", |
|
"Viktoria": "sk-SK-ViktoriaNeural" |
|
}, |
|
"Slovenian": { |
|
"Petra": "sl-SI-PetraNeural", |
|
"Rok": "sl-SI-RokNeural" |
|
}, |
|
"Somali": { |
|
"Muuse": "so-SO-MuuseNeural", |
|
"Ubax": "so-SO-UbaxNeural" |
|
}, |
|
"Albanian": { |
|
"Anila": "sq-AL-AnilaNeural", |
|
"Ilir": "sq-AL-IlirNeural" |
|
}, |
|
"Serbian": { |
|
"Nicholas": "sr-RS-NicholasNeural", |
|
"Sophie": "sr-RS-SophieNeural" |
|
}, |
|
"Sundanese": { |
|
"Jajang": "su-ID-JajangNeural", |
|
"Tuti": "su-ID-TutiNeural" |
|
}, |
|
"Swahili": { |
|
"Rafiki": "sw-KE-RafikiNeural", |
|
"Zuri": "sw-KE-ZuriNeural", |
|
"Daudi": "sw-TZ-DaudiNeural", |
|
"Rehema": "sw-TZ-RehemaNeural" |
|
}, |
|
"Tamil": { |
|
"Pallavi": "ta-IN-PallaviNeural", |
|
"Valluvar": "ta-IN-ValluvarNeural", |
|
"Kumar": "ta-LK-KumarNeural", |
|
"Saranya": "ta-LK-SaranyaNeural", |
|
"Kani": "ta-MY-KaniNeural", |
|
"Surya": "ta-MY-SuryaNeural", |
|
"Anbu": "ta-SG-AnbuNeural" |
|
}, |
|
"Telugu": { |
|
"Mohan": "te-IN-MohanNeural", |
|
"Shruti": "te-IN-ShrutiNeural" |
|
}, |
|
"Turkish": { |
|
"Ahmet": "tr-TR-AhmetNeural", |
|
"Emel": "tr-TR-EmelNeural" |
|
}, |
|
"Ukrainian": { |
|
"Ostap": "uk-UA-OstapNeural", |
|
"Polina": "uk-UA-PolinaNeural" |
|
}, |
|
"Urdu": { |
|
"Gul": "ur-IN-GulNeural", |
|
"Salman": "ur-IN-SalmanNeural", |
|
"Asad": "ur-PK-AsadNeural", |
|
"Uzma": "ur-PK-UzmaNeural" |
|
}, |
|
"Uzbek": { |
|
"Madina": "uz-UZ-MadinaNeural", |
|
"Sardor": "uz-UZ-SardorNeural" |
|
}, |
|
"Mandarin": { |
|
"Xiaoxiao": "zh-CN-XiaoxiaoNeural", |
|
"Yunyang": "zh-CN-YunyangNeural", |
|
"Yunxi": "zh-CN-YunxiNeural", |
|
"Xiaoyi": "zh-CN-XiaoyiNeural", |
|
"Yunjian": "zh-CN-YunjianNeural", |
|
"Yunxia": "zh-CN-YunxiaNeural", |
|
"Xiaobei": "zh-CN-liaoning-XiaobeiNeural", |
|
"Xiaoni": "zh-CN-shaanxi-XiaoniNeural", |
|
"HiuMaan": "zh-HK-HiuMaanNeural", |
|
"HiuGaai": "zh-HK-HiuGaaiNeural", |
|
"WanLung": "zh-HK-WanLungNeural", |
|
"HsiaoChen": "zh-TW-HsiaoChenNeural", |
|
"HsiaoYu": "zh-TW-HsiaoYuNeural", |
|
"YunJhe": "zh-TW-YunJheNeural" |
|
}, |
|
"Zulu": { |
|
"Thando": "zu-ZA-ThandoNeural", |
|
"Themba": "zu-ZA-ThembaNeural" |
|
} |
|
} |
|
|
|
client = Client("MohamedRashad/arabic-auto-tashkeel") |
|
|
|
async def text_to_speech_edge(text, language_code, speaker, tashkeel_checkbox=False): |
|
|
|
|
|
if language_code == "Arabic" and tashkeel_checkbox: |
|
text = client.predict( |
|
input_text=araby.strip_diacritics(text), |
|
api_name="/infer_shakkala" |
|
) |
|
|
|
|
|
voice = language_dict[language_code][speaker] |
|
communicate = edge_tts.Communicate(text, voice) |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: |
|
tmp_path = tmp_file.name |
|
await communicate.save(tmp_path) |
|
|
|
return text, tmp_path |
|
|
|
|
|
def get_speakers(language): |
|
print(language) |
|
speakers = list(language_dict[language].keys()) |
|
return gr.Dropdown(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True) |
|
|
|
|
|
default_language = None |
|
default_speaker = None |
|
with gr.Blocks(title="Multilingual TTS") as demo: |
|
gr.HTML("<center><h1>Multilingual TTS (Edge TTS)</h1></center>") |
|
gr.HTML(f"<h2 style='color:Tomato;'> {len(language_dict)} languages supported</h3>") |
|
gr.HTML(f"<p> {', '.join(language_dict.keys())} </h3>") |
|
gr.Markdown("**Note:** A special feature is added for Arabic language only.") |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_text = gr.Textbox(lines=5, label="Input Text", placeholder="Enter text to convert to speech") |
|
language = gr.Dropdown( |
|
choices=list(language_dict.keys()), value=default_language, label="Languages", interactive=True |
|
) |
|
speaker = gr.Dropdown(choices=[], value=default_speaker, label="Speakers", interactive=False) |
|
tashkeel_checkbox = gr.Checkbox(label="Tashkeel", value=False, visible=False, interactive=False) |
|
run_btn = gr.Button(value="Generate Audio", variant="primary") |
|
|
|
with gr.Column(): |
|
output_text = gr.Textbox(label="Output Text") |
|
output_audio = gr.Audio(type="filepath", label="Audio Output") |
|
|
|
language.change(get_speakers, inputs=[language], outputs=[speaker, tashkeel_checkbox]) |
|
run_btn.click(text_to_speech_edge, inputs=[input_text, language, speaker, tashkeel_checkbox], outputs=[output_text, output_audio]) |
|
|
|
if __name__ == "__main__": |
|
demo.queue().launch(share=False) |
|
|