import tempfile from typing import Optional from TTS.config import load_config import gradio as gr import numpy as np from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer MODELS = {} SPEAKERS = {} manager = ModelManager() MODEL_NAMES = manager.list_tts_models() # reorder models ddc = MODEL_NAMES[1] MODEL_NAMES[1] = MODEL_NAMES[0] MODEL_NAMES[0] = ddc # filter out multi-speaker models filters = ["vctk", "your_tts"] MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)] print(MODEL_NAMES) def tts(text: str, model_name: str, speaker_idx: str=None): print(text, model_name) # download model model_path, config_path, model_item = manager.download_model(f"tts_models/{model_name}") vocoder_name: Optional[str] = model_item["default_vocoder"] # download vocoder vocoder_path = None vocoder_config_path = None if vocoder_name is not None: vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) # init synthesizer synthesizer = Synthesizer( model_path, config_path, None, None, vocoder_path, vocoder_config_path, ) # synthesize if synthesizer is None: raise NameError("model not found") wavs = synthesizer.tts(text, speaker_idx) # return output with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wavs, fp) return fp.name article= """ ### Visit us on Coqui.ai and drop a 🌟 to CoquiTTS. ### You can run CoquiTTS on your machine. Check out our documentation. ```bash $ pip install TTS ... $ tts --list_models ... $ tts --text "Text for TTS" --model_name "///" --out_path folder/to/save/output.wav ``` ### πŸ‘‘ Model contributors - @nmstoker - @kaiidams - @WeberJulian, - @Edresson - @thorstenMueller - @r-dh - @kirianguiller - @robinhad Drop a ✨PR✨ on 🐸TTS to share a new model and have it included here. """ iface = gr.Interface( fn=tts, inputs=[ gr.inputs.Textbox( label="Input", default="Hello, how are you?", ), gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, ), # gr.inputs.Dropdown(label="Select a speaker", choices=SPEAKERS, default=None) # gr.inputs.Audio(source="microphone", label="Record your voice.", type="numpy", label=None, optional=False) ], outputs=gr.outputs.Audio(label="Output"), title="πŸΈπŸ’¬ CoquiTTS Demo", theme="grass", description="πŸΈπŸ’¬ Coqui TTS - a deep learning toolkit for Text-to-Speech, battle-tested in research and production.", article=article, allow_flagging=False, flagging_options=['error', 'bad-quality', 'wrong-pronounciation'], live=False ) iface.launch(share=False)