Spaces:
Running
Running
New TTS: IMS-Toucan
Browse files- app.py +30 -7
- test_tts_ims-toucan.py +16 -0
app.py
CHANGED
@@ -74,29 +74,32 @@ AVAILABLE_MODELS = {
|
|
74 |
|
75 |
# HF Gradio Spaces: # <works with gradio version #>
|
76 |
# gravio version that works with most spaces: 4.29
|
77 |
-
'coqui/xtts': 'coqui/xtts', # 4.29 4.32
|
78 |
-
'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
79 |
# 'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # 4.29
|
80 |
# 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
|
81 |
-
'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
82 |
'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
83 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
84 |
-
'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
85 |
-
'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
|
86 |
|
87 |
# E2 & F5 TTS
|
88 |
# F5 model
|
89 |
-
'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
|
90 |
|
91 |
# # Parler
|
92 |
# Parler Large model
|
93 |
-
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
94 |
# Parler Mini model
|
95 |
# 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
96 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
97 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
98 |
|
99 |
# # Microsoft Edge TTS
|
|
|
|
|
|
|
100 |
'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
|
101 |
|
102 |
# HF TTS w issues
|
@@ -241,6 +244,15 @@ HF_SPACES = {
|
|
241 |
'series': 'E2/F5 TTS',
|
242 |
},
|
243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
# TTS w issues
|
245 |
# 'PolyAI/pheme': '/predict#0', #sleepy HF Space
|
246 |
# 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
|
@@ -352,6 +364,17 @@ OVERRIDE_INPUTS = {
|
|
352 |
3: "F5-TTS", # model
|
353 |
4: False, # cleanup silence
|
354 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
}
|
356 |
|
357 |
hf_clients: Tuple[Client] = {}
|
|
|
74 |
|
75 |
# HF Gradio Spaces: # <works with gradio version #>
|
76 |
# gravio version that works with most spaces: 4.29
|
77 |
+
# 'coqui/xtts': 'coqui/xtts', # 4.29 4.32
|
78 |
+
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
|
79 |
# 'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # 4.29
|
80 |
# 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
|
81 |
+
# 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
|
82 |
'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
|
83 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
84 |
+
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
|
85 |
+
# 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
|
86 |
|
87 |
# E2 & F5 TTS
|
88 |
# F5 model
|
89 |
+
# 'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
|
90 |
|
91 |
# # Parler
|
92 |
# Parler Large model
|
93 |
+
# 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
94 |
# Parler Mini model
|
95 |
# 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
|
96 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
|
97 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
98 |
|
99 |
# # Microsoft Edge TTS
|
100 |
+
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
|
101 |
+
|
102 |
+
# IMS-Toucan
|
103 |
'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
|
104 |
|
105 |
# HF TTS w issues
|
|
|
244 |
'series': 'E2/F5 TTS',
|
245 |
},
|
246 |
|
247 |
+
# IMS-Toucan
|
248 |
+
'Flux9665/MassivelyMultilingualTTS': {
|
249 |
+
'name': 'IMS-Toucan',
|
250 |
+
'function': "/predict",
|
251 |
+
'text_param_index': 0,
|
252 |
+
'return_audio_index': 0,
|
253 |
+
'series': 'IMS-Toucan',
|
254 |
+
}
|
255 |
+
|
256 |
# TTS w issues
|
257 |
# 'PolyAI/pheme': '/predict#0', #sleepy HF Space
|
258 |
# 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
|
|
|
364 |
3: "F5-TTS", # model
|
365 |
4: False, # cleanup silence
|
366 |
},
|
367 |
+
|
368 |
+
# IMS-Toucan
|
369 |
+
'Flux9665/MassivelyMultilingualTTS': {
|
370 |
+
1: "English (eng)", #language
|
371 |
+
2: 0.6, #prosody_creativity
|
372 |
+
3: 1, #duration_scaling_factor
|
373 |
+
4: 41, #voice_seed
|
374 |
+
5: -7.5, #emb1
|
375 |
+
6: None, #reference_audio
|
376 |
+
}
|
377 |
+
|
378 |
}
|
379 |
|
380 |
hf_clients: Tuple[Client] = {}
|
test_tts_ims-toucan.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client
|
3 |
+
|
4 |
+
client = Client("Flux9665/MassivelyMultilingualTTS", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
# print(endpoints)
|
7 |
+
result = client.predict(
|
8 |
+
prompt="What I cannot create, I do not understand.",
|
9 |
+
language="English (eng)",
|
10 |
+
prosody_creativity=0.5,
|
11 |
+
duration_scaling_factor=1,
|
12 |
+
voice_seed=27,
|
13 |
+
emb1=-7.5,
|
14 |
+
reference_audio=None,
|
15 |
+
api_name="/predict"
|
16 |
+
)
|