Spaces:
Runtime error
Runtime error
modified files
Browse files- .gitignore +172 -0
- app.py +99 -4
- packages.txt +2 -0
- requirements.txt +6 -0
.gitignore
CHANGED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Initially taken from Github's Python gitignore file
|
2 |
+
|
3 |
+
# Byte-compiled / optimized / DLL files
|
4 |
+
__pycache__/
|
5 |
+
*.py[cod]
|
6 |
+
*$py.class
|
7 |
+
|
8 |
+
# C extensions
|
9 |
+
*.so
|
10 |
+
|
11 |
+
# Distribution / packaging
|
12 |
+
.Python
|
13 |
+
build/
|
14 |
+
develop-eggs/
|
15 |
+
dist/
|
16 |
+
downloads/
|
17 |
+
eggs/
|
18 |
+
.eggs/
|
19 |
+
lib/
|
20 |
+
lib64/
|
21 |
+
parts/
|
22 |
+
sdist/
|
23 |
+
var/
|
24 |
+
wheels/
|
25 |
+
*.egg-info/
|
26 |
+
.installed.cfg
|
27 |
+
*.egg
|
28 |
+
MANIFEST
|
29 |
+
|
30 |
+
# PyInstaller
|
31 |
+
# Usually these files are written by a python script from a template
|
32 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
33 |
+
*.manifest
|
34 |
+
*.spec
|
35 |
+
|
36 |
+
# Installer logs
|
37 |
+
pip-log.txt
|
38 |
+
pip-delete-this-directory.txt
|
39 |
+
|
40 |
+
# Unit test / coverage reports
|
41 |
+
htmlcov/
|
42 |
+
.tox/
|
43 |
+
.nox/
|
44 |
+
.coverage
|
45 |
+
.coverage.*
|
46 |
+
.cache
|
47 |
+
nosetests.xml
|
48 |
+
coverage.xml
|
49 |
+
*.cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
|
53 |
+
# Translations
|
54 |
+
*.mo
|
55 |
+
*.pot
|
56 |
+
|
57 |
+
# Django stuff:
|
58 |
+
*.log
|
59 |
+
local_settings.py
|
60 |
+
db.sqlite3
|
61 |
+
|
62 |
+
# Flask stuff:
|
63 |
+
instance/
|
64 |
+
.webassets-cache
|
65 |
+
|
66 |
+
# Scrapy stuff:
|
67 |
+
.scrapy
|
68 |
+
|
69 |
+
# Sphinx documentation
|
70 |
+
docs/_build/
|
71 |
+
|
72 |
+
# PyBuilder
|
73 |
+
target/
|
74 |
+
|
75 |
+
# Jupyter Notebook
|
76 |
+
.ipynb_checkpoints
|
77 |
+
|
78 |
+
# IPython
|
79 |
+
profile_default/
|
80 |
+
ipython_config.py
|
81 |
+
|
82 |
+
# pyenv
|
83 |
+
.python-version
|
84 |
+
|
85 |
+
# celery beat schedule file
|
86 |
+
celerybeat-schedule
|
87 |
+
|
88 |
+
# SageMath parsed files
|
89 |
+
*.sage.py
|
90 |
+
|
91 |
+
# Environments
|
92 |
+
.env
|
93 |
+
.venv
|
94 |
+
env/
|
95 |
+
venv/
|
96 |
+
ENV/
|
97 |
+
env.bak/
|
98 |
+
venv.bak/
|
99 |
+
|
100 |
+
# Spyder project settings
|
101 |
+
.spyderproject
|
102 |
+
.spyproject
|
103 |
+
|
104 |
+
# Rope project settings
|
105 |
+
.ropeproject
|
106 |
+
|
107 |
+
# mkdocs documentation
|
108 |
+
/site
|
109 |
+
|
110 |
+
# mypy
|
111 |
+
.mypy_cache/
|
112 |
+
.dmypy.json
|
113 |
+
dmypy.json
|
114 |
+
|
115 |
+
# Pyre type checker
|
116 |
+
.pyre/
|
117 |
+
|
118 |
+
# vscode
|
119 |
+
.vscode
|
120 |
+
|
121 |
+
# Pycharm
|
122 |
+
.idea
|
123 |
+
|
124 |
+
# TF code
|
125 |
+
tensorflow_code
|
126 |
+
|
127 |
+
# Models
|
128 |
+
models
|
129 |
+
|
130 |
+
|
131 |
+
.idea/
|
132 |
+
data/*
|
133 |
+
output/*
|
134 |
+
proc_data
|
135 |
+
|
136 |
+
# examples
|
137 |
+
runs
|
138 |
+
examples/runs
|
139 |
+
|
140 |
+
# data
|
141 |
+
/data
|
142 |
+
serialization_dir
|
143 |
+
out_dir
|
144 |
+
log
|
145 |
+
|
146 |
+
|
147 |
+
# emacs
|
148 |
+
*.*~
|
149 |
+
|
150 |
+
*.tgz
|
151 |
+
*.bz2
|
152 |
+
|
153 |
+
raw_wikipedia/
|
154 |
+
*.npy
|
155 |
+
*.p
|
156 |
+
|
157 |
+
results/*
|
158 |
+
caches/*
|
159 |
+
wandb/*
|
160 |
+
|
161 |
+
*.html
|
162 |
+
|
163 |
+
env/*
|
164 |
+
|
165 |
+
vocoder/*
|
166 |
+
|
167 |
+
|
168 |
+
/outputs/
|
169 |
+
/exp/
|
170 |
+
models
|
171 |
+
*.wav
|
172 |
+
/wav_files/
|
app.py
CHANGED
@@ -1,7 +1,102 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import scipy.io.wavfile
|
5 |
+
from espnet2.bin.tts_inference import Text2Speech
|
6 |
+
from espnet2.utils.types import str_or_none
|
7 |
|
8 |
+
tagen = 'kan-bayashi/ljspeech_vits'
|
9 |
+
vocoder_tagen = "none"
|
10 |
|
11 |
+
text2speechen = Text2Speech.from_pretrained(
|
12 |
+
model_tag=str_or_none(tagen),
|
13 |
+
vocoder_tag=str_or_none(vocoder_tagen),
|
14 |
+
device="cpu",
|
15 |
+
# Only for Tacotron 2 & Transformer
|
16 |
+
threshold=0.5,
|
17 |
+
# Only for Tacotron 2
|
18 |
+
minlenratio=0.0,
|
19 |
+
maxlenratio=10.0,
|
20 |
+
use_att_constraint=False,
|
21 |
+
backward_window=1,
|
22 |
+
forward_window=3,
|
23 |
+
# Only for FastSpeech & FastSpeech2 & VITS
|
24 |
+
speed_control_alpha=1.0,
|
25 |
+
# Only for VITS
|
26 |
+
noise_scale=0.333,
|
27 |
+
noise_scale_dur=0.333,
|
28 |
+
)
|
29 |
+
|
30 |
+
|
31 |
+
tagjp = 'kan-bayashi/jsut_full_band_vits_prosody'
|
32 |
+
vocoder_tagjp = 'none'
|
33 |
+
|
34 |
+
text2speechjp = Text2Speech.from_pretrained(
|
35 |
+
model_tag=str_or_none(tagjp),
|
36 |
+
vocoder_tag=str_or_none(vocoder_tagjp),
|
37 |
+
device="cpu",
|
38 |
+
# Only for Tacotron 2 & Transformer
|
39 |
+
threshold=0.5,
|
40 |
+
# Only for Tacotron 2
|
41 |
+
minlenratio=0.0,
|
42 |
+
maxlenratio=10.0,
|
43 |
+
use_att_constraint=False,
|
44 |
+
backward_window=1,
|
45 |
+
forward_window=3,
|
46 |
+
# Only for FastSpeech & FastSpeech2 & VITS
|
47 |
+
speed_control_alpha=1.0,
|
48 |
+
# Only for VITS
|
49 |
+
noise_scale=0.333,
|
50 |
+
noise_scale_dur=0.333,
|
51 |
+
)
|
52 |
+
|
53 |
+
tagch = 'kan-bayashi/csmsc_full_band_vits'
|
54 |
+
vocoder_tagch = "none"
|
55 |
+
|
56 |
+
text2speechch = Text2Speech.from_pretrained(
|
57 |
+
model_tag=str_or_none(tagch),
|
58 |
+
vocoder_tag=str_or_none(vocoder_tagch),
|
59 |
+
device="cpu",
|
60 |
+
# Only for Tacotron 2 & Transformer
|
61 |
+
threshold=0.5,
|
62 |
+
# Only for Tacotron 2
|
63 |
+
minlenratio=0.0,
|
64 |
+
maxlenratio=10.0,
|
65 |
+
use_att_constraint=False,
|
66 |
+
backward_window=1,
|
67 |
+
forward_window=3,
|
68 |
+
# Only for FastSpeech & FastSpeech2 & VITS
|
69 |
+
speed_control_alpha=1.0,
|
70 |
+
# Only for VITS
|
71 |
+
noise_scale=0.333,
|
72 |
+
noise_scale_dur=0.333,
|
73 |
+
)
|
74 |
+
|
75 |
+
def inference(text,lang):
|
76 |
+
with torch.no_grad():
|
77 |
+
if lang == "english":
|
78 |
+
wav = text2speechen(text)["wav"]
|
79 |
+
scipy.io.wavfile.write("out.wav",text2speechen.fs , wav.view(-1).cpu().numpy())
|
80 |
+
if lang == "chinese":
|
81 |
+
wav = text2speechch(text)["wav"]
|
82 |
+
scipy.io.wavfile.write("out.wav",text2speechch.fs , wav.view(-1).cpu().numpy())
|
83 |
+
if lang == "japanese":
|
84 |
+
wav = text2speechjp(text)["wav"]
|
85 |
+
scipy.io.wavfile.write("out.wav",text2speechjp.fs , wav.view(-1).cpu().numpy())
|
86 |
+
return "out.wav"
|
87 |
+
title = "ESPnet2-TTS"
|
88 |
+
description = "Gradio demo for ESPnet2-TTS: Extending the Edge of TTS Research. To use it, simply add your audio, or click one of the examples to load them. Read more at the links below."
|
89 |
+
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2110.07840' target='_blank'>ESPnet2-TTS: Extending the Edge of TTS Research</a> | <a href='https://github.com/espnet/espnet' target='_blank'>Github Repo</a></p>"
|
90 |
+
|
91 |
+
examples=[['This paper describes ESPnet2-TTS, an end-to-end text-to-speech (E2E-TTS) toolkit. ESPnet2-TTS extends our earlier version, ESPnet-TTS, by adding many new features, including: on-the-fly flexible pre-processing, joint training with neural vocoders, and state-of-the-art TTS models with extensions like full-band E2E text-to-waveform modeling, which simplify the training pipeline and further enhance TTS performance. The unified design of our recipes enables users to quickly reproduce state-of-the-art E2E-TTS results',"english"],['レシピの統一された設計により、ユーザーは最先端のE2E-TTSの結果をすばやく再現できます。また、推論用の統合Pythonインターフェースで事前にトレーニングされたモデルを多数提供し、ユーザーがベースラインサンプルを生成してデモを構築するための迅速な手段を提供します。',"japanese"],['对英语和日语语料库的实验评估表明,我们提供的模型合成了与真实情况相当的话语,达到了最先进的水平',"chinese"]]
|
92 |
+
|
93 |
+
gr.Interface(
|
94 |
+
inference,
|
95 |
+
[gr.inputs.Textbox(label="input text",lines=10),gr.inputs.Radio(choices=["english", "chinese", "japanese"], type="value", default="english", label="language")],
|
96 |
+
gr.outputs.Audio(type="file", label="Output"),
|
97 |
+
title=title,
|
98 |
+
description=description,
|
99 |
+
article=article,
|
100 |
+
enable_queue=True,
|
101 |
+
examples=examples
|
102 |
+
).launch(debug=True)
|
packages.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
cmake
|
2 |
+
libsndfile1
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
espnet==0.10.3
|
2 |
+
pyopenjtalk
|
3 |
+
parallel_wavegan==0.5.3
|
4 |
+
espnet_model_zoo
|
5 |
+
scipy
|
6 |
+
torch
|