import tempfile from typing import Optional from TTS.config import load_config import gradio as gr import numpy as np from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer MODELS = {} SPEAKERS = {} MAX_TXT_LEN = 100 manager = ModelManager() MODEL_NAMES = manager.list_tts_models() # filter out multi-speaker models and slow wavegrad vocoders filters = ["vctk", "your_tts", "ek1"] MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)] EN = [el for el in MODEL_NAMES if "/en/" in el] OTHER = [el for el in MODEL_NAMES if "/en/" not in el] EN[0], EN[5] = EN[5], EN[0] MODEL_NAMES = EN + OTHER # reorder models print(MODEL_NAMES) def tts(text: str, model_name: str): if len(text) > MAX_TXT_LEN: text = text[:MAX_TXT_LEN] print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.") print(text, model_name) # download model model_path, config_path, model_item = manager.download_model(model_name) vocoder_name: Optional[str] = model_item["default_vocoder"] # download vocoder vocoder_path = None vocoder_config_path = None if vocoder_name is not None: vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) # init synthesizer synthesizer = Synthesizer( model_path, config_path, None, None, vocoder_path, vocoder_config_path, ) # synthesize if synthesizer is None: raise NameError("model not found") wavs = synthesizer.tts(text, None) # return output with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wavs, fp) return fp.name title = """

🐸💬 CoquiTTS Playground

""" with gr.Blocks(analytics_enabled=False) as demo: with gr.Row(): with gr.Column(): gr.Markdown( """ ##

""" ) gr.Markdown( """
## 🐸Coqui.ai News - 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://tts.readthedocs.io/en/dev/models/xtts.html) - 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://tts.readthedocs.io/en/dev/models/bark.html) - 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. - 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://tts.readthedocs.io/en/dev/models/tortoise.html) - 📣 **Coqui Studio API** is landed on 🐸TTS. - [Example](https://github.com/coqui-ai/TTS/blob/dev/README.md#-python-api) - 📣 [**Coqui Studio API**](https://docs.coqui.ai/docs) is live. - 📣 Voice generation with prompts - **Prompt to Voice** - is live on [**Coqui Studio**](https://app.coqui.ai/auth/signin)!! - [Blog Post](https://coqui.ai/blog/tts/prompt-to-voice) - 📣 Voice generation with fusion - **Voice fusion** - is live on [**Coqui Studio**](https://app.coqui.ai/auth/signin). - 📣 Voice cloning is live on [**Coqui Studio**](https://app.coqui.ai/auth/signin).
""" ) with gr.Column(): gr.Markdown( """
💻 This space showcases some of the **[CoquiTTS](https://github.com/coqui-ai/TTS)** models.
There are > 30 languages with single and multi speaker models, all thanks to our 👑 Contributors.
Visit the links below for more. | | | | ------------------------------- | --------------------------------------- | | 🐸💬 **CoquiTTS** | [Github](https://github.com/coqui-ai/TTS) | | 💼 **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/) | 👩‍💻 **Questions** | [GitHub Discussions] | | 🗯 **Community** | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) | [github issue tracker]: https://github.com/coqui-ai/tts/issues [github discussions]: https://github.com/coqui-ai/TTS/discussions [discord]: https://discord.gg/5eXr5seRrv """ ) with gr.Row(): gr.Markdown( """

👑 Model contributors

- @nmstoker - @kaiidams - @WeberJulian, - @Edresson - @thorstenMueller - @r-dh - @kirianguiller - @robinhad - @fkarabiber - @nicolalandro - @a-froghyar - @manmay-nakhashi - @noml4u

""" ) with gr.Row(): with gr.Column(): input_text = gr.inputs.Textbox( label="Input Text", default="This sentence has been generated by a speech synthesis system.", ) model_select = gr.inputs.Dropdown( label="Pick Model: tts_models///", choices=MODEL_NAMES, default="tts_models/en/jenny/jenny" ) tts_button = gr.Button("Send", elem_id="send-btn", visible=True) with gr.Column(): output_audio = gr.outputs.Audio(label="Output", type="filepath") tts_button.click( tts, inputs=[ input_text, model_select, ], outputs=[output_audio], ) demo.queue(concurrency_count=16).launch(debug=True)