Pendrokar commited on
Commit
6fd6025
1 Parent(s): 57ec188

New TTS: IMS-Toucan

Browse files
Files changed (2) hide show
  1. app.py +30 -7
  2. test_tts_ims-toucan.py +16 -0
app.py CHANGED
@@ -74,29 +74,32 @@ AVAILABLE_MODELS = {
74
 
75
  # HF Gradio Spaces: # <works with gradio version #>
76
  # gravio version that works with most spaces: 4.29
77
- 'coqui/xtts': 'coqui/xtts', # 4.29 4.32
78
- 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
79
  # 'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # 4.29
80
  # 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
81
- 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
82
  'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
83
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
84
- 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
85
- 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
86
 
87
  # E2 & F5 TTS
88
  # F5 model
89
- 'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
90
 
91
  # # Parler
92
  # Parler Large model
93
- 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
94
  # Parler Mini model
95
  # 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
96
  # 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
97
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
98
 
99
  # # Microsoft Edge TTS
 
 
 
100
  'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
101
 
102
  # HF TTS w issues
@@ -241,6 +244,15 @@ HF_SPACES = {
241
  'series': 'E2/F5 TTS',
242
  },
243
 
 
 
 
 
 
 
 
 
 
244
  # TTS w issues
245
  # 'PolyAI/pheme': '/predict#0', #sleepy HF Space
246
  # 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
@@ -352,6 +364,17 @@ OVERRIDE_INPUTS = {
352
  3: "F5-TTS", # model
353
  4: False, # cleanup silence
354
  },
 
 
 
 
 
 
 
 
 
 
 
355
  }
356
 
357
  hf_clients: Tuple[Client] = {}
 
74
 
75
  # HF Gradio Spaces: # <works with gradio version #>
76
  # gravio version that works with most spaces: 4.29
77
+ # 'coqui/xtts': 'coqui/xtts', # 4.29 4.32
78
+ # 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # 4.32 4.36.1
79
  # 'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # same devs as MeloTTS, which scores higher # 4.29
80
  # 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # same devs as MeloTTS, which scores higher # 4.29
81
+ # 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29 4.32
82
  'Pendrokar/xVASynth-TTS': 'Pendrokar/xVASynth-TTS', # 4.29 4.32 4.42.0
83
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
84
+ # 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # 4.29 4.32
85
+ # 'fishaudio/fish-speech-1': 'fishaudio/fish-speech-1', # 4.29 4.32 4.36.1
86
 
87
  # E2 & F5 TTS
88
  # F5 model
89
+ # 'mrfakename/E2-F5-TTS': 'mrfakename/E2-F5-TTS', # 5.0
90
 
91
  # # Parler
92
  # Parler Large model
93
+ # 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
94
  # Parler Mini model
95
  # 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29 4.32 4.36.1 4.42.0
96
  # 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # Mini is the default model of parler_tts
97
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
98
 
99
  # # Microsoft Edge TTS
100
+ # 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
101
+
102
+ # IMS-Toucan
103
  'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
104
 
105
  # HF TTS w issues
 
244
  'series': 'E2/F5 TTS',
245
  },
246
 
247
+ # IMS-Toucan
248
+ 'Flux9665/MassivelyMultilingualTTS': {
249
+ 'name': 'IMS-Toucan',
250
+ 'function': "/predict",
251
+ 'text_param_index': 0,
252
+ 'return_audio_index': 0,
253
+ 'series': 'IMS-Toucan',
254
+ }
255
+
256
  # TTS w issues
257
  # 'PolyAI/pheme': '/predict#0', #sleepy HF Space
258
  # 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
 
364
  3: "F5-TTS", # model
365
  4: False, # cleanup silence
366
  },
367
+
368
+ # IMS-Toucan
369
+ 'Flux9665/MassivelyMultilingualTTS': {
370
+ 1: "English (eng)", #language
371
+ 2: 0.6, #prosody_creativity
372
+ 3: 1, #duration_scaling_factor
373
+ 4: 41, #voice_seed
374
+ 5: -7.5, #emb1
375
+ 6: None, #reference_audio
376
+ }
377
+
378
  }
379
 
380
  hf_clients: Tuple[Client] = {}
test_tts_ims-toucan.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client
3
+
4
+ client = Client("Flux9665/MassivelyMultilingualTTS", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ # print(endpoints)
7
+ result = client.predict(
8
+ prompt="What I cannot create, I do not understand.",
9
+ language="English (eng)",
10
+ prosody_creativity=0.5,
11
+ duration_scaling_factor=1,
12
+ voice_seed=27,
13
+ emb1=-7.5,
14
+ reference_audio=None,
15
+ api_name="/predict"
16
+ )