import os import spaces import gradio as gr from threading import Lock from balacoon_tts import TTS from huggingface_hub import hf_hub_download, list_repo_files repo = "balacoon/tts" default_text_model = "en_us_hifi_jets_cpu.addon" default_text_speaker = "8051" locker = Lock() cur_model_path = None model_to_speakers = dict() model_repo_dir = "./data" model_path = hf_hub_download(repo_id=repo, filename=default_text_model) tts = TTS(model_path) for name in list_repo_files(repo_id=repo): if not os.path.isfile(os.path.join(model_repo_dir, name)): hf_hub_download(repo_id=repo, filename=name, local_dir=model_repo_dir) repo_files = os.listdir(model_repo_dir) model_files = [x for x in repo_files if x.endswith("_cpu.addon")] def set_model(model_name_str: str): global model_to_speakers if model_name_str in model_to_speakers: speakers = model_to_speakers[model_name_str] else: global tts, cur_model_path, locker with locker: model_path = os.path.join(model_repo_dir, model_name_str) if tts is not None: del tts tts = TTS(model_path) cur_model_path = model_path speakers = tts.get_speakers() model_to_speakers[model_name_str] = speakers value = speakers[-1] return gr.Dropdown(choices=speakers, value=value, visible=True) def predict(text_str, model_name_str, speaker_str): if not text_str or not model_name_str or not speaker_str: logging.info("[ERROR] Text, model, or speaker are not provided") return expected_model_path = os.path.join(model_repo_dir, model_name_str) global tts, cur_model_path, locker with locker: if expected_model_path != cur_model_path: if tts is not None: del tts tts = TTS(expected_model_path) cur_model_path = expected_model_path samples = tts.synthesize(text_str, speaker_str) return gr.Audio(value=(tts.get_sampling_rate(), samples)) def cloud(): print("[CLOUD] | Space maintained.") @spaces.GPU(duration=15) def gpu(): return with gr.Blocks() as demo: with gr.Row(): gr.Markdown("# ⚡ BOOSTED! This now provides instant inference.") gr.Markdown("⭐ A ITT space owned within Statical.") with gr.Row(): with gr.Column(): text = gr.Textbox(label="Input", placeholder="Message...", lines=1) get_text_model = gr.Dropdown(label="Models", choices=model_files, value=default_text_model) get_text_speaker = gr.Dropdown(label="Speakers", choices=[], value=default_text_speaker) with gr.Column(): audio = gr.Audio() run = gr.Button("Generate") maintain = gr.Button("☁️") run.click(predict, inputs=[text, get_text_model, get_text_speaker], outputs=[audio], queue = False) maintain.click(cloud, inputs = [], outputs = [], queue = False) get_text_model.change(set_model, inputs=get_text_model, outputs=get_text_speaker) demo.launch()