fix notebook
Browse files- tortoise_tts.ipynb +11 -255
tortoise_tts.ipynb
CHANGED
@@ -34,88 +34,9 @@
|
|
34 |
"cell_type": "code",
|
35 |
"execution_count": null,
|
36 |
"metadata": {
|
37 |
-
"id": "JrK20I32grP6"
|
38 |
-
"colab": {
|
39 |
-
"base_uri": "https://localhost:8080/"
|
40 |
-
},
|
41 |
-
"outputId": "44f55dca-5d0a-405e-a4cc-54bc8e16b780"
|
42 |
},
|
43 |
-
"outputs": [
|
44 |
-
{
|
45 |
-
"output_type": "stream",
|
46 |
-
"name": "stdout",
|
47 |
-
"text": [
|
48 |
-
"Cloning into 'tortoise-tts'...\n",
|
49 |
-
"remote: Enumerating objects: 736, done.\u001b[K\n",
|
50 |
-
"remote: Counting objects: 100% (23/23), done.\u001b[K\n",
|
51 |
-
"remote: Compressing objects: 100% (15/15), done.\u001b[K\n",
|
52 |
-
"remote: Total 736 (delta 10), reused 20 (delta 8), pack-reused 713\u001b[K\n",
|
53 |
-
"Receiving objects: 100% (736/736), 348.62 MiB | 24.08 MiB/s, done.\n",
|
54 |
-
"Resolving deltas: 100% (161/161), done.\n",
|
55 |
-
"/content/tortoise-tts\n",
|
56 |
-
"Requirement already satisfied: torch in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 1)) (1.10.0+cu111)\n",
|
57 |
-
"Requirement already satisfied: torchaudio in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 2)) (0.10.0+cu111)\n",
|
58 |
-
"Collecting rotary_embedding_torch\n",
|
59 |
-
" Downloading rotary_embedding_torch-0.1.5-py3-none-any.whl (4.1 kB)\n",
|
60 |
-
"Collecting transformers\n",
|
61 |
-
" Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)\n",
|
62 |
-
"\u001b[K |ββββββββββββββββββββββββββββββββ| 4.0 MB 5.3 MB/s \n",
|
63 |
-
"\u001b[?25hCollecting tokenizers\n",
|
64 |
-
" Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n",
|
65 |
-
"\u001b[K |ββββββββββββββββββββββββββββββββ| 6.6 MB 31.3 MB/s \n",
|
66 |
-
"\u001b[?25hRequirement already satisfied: inflect in /usr/local/lib/python3.7/dist-packages (from -r requirements.txt (line 6)) (2.1.0)\n",
|
67 |
-
"Collecting progressbar\n",
|
68 |
-
" Downloading progressbar-2.5.tar.gz (10 kB)\n",
|
69 |
-
"Collecting einops\n",
|
70 |
-
" Downloading einops-0.4.1-py3-none-any.whl (28 kB)\n",
|
71 |
-
"Collecting unidecode\n",
|
72 |
-
" Downloading Unidecode-1.3.4-py3-none-any.whl (235 kB)\n",
|
73 |
-
"\u001b[K |ββββββββββββββββββββββββββββββββ| 235 kB 44.3 MB/s \n",
|
74 |
-
"\u001b[?25hCollecting entmax\n",
|
75 |
-
" Downloading entmax-1.0.tar.gz (7.2 kB)\n",
|
76 |
-
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch->-r requirements.txt (line 1)) (4.1.1)\n",
|
77 |
-
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers->-r requirements.txt (line 4)) (4.64.0)\n",
|
78 |
-
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers->-r requirements.txt (line 4)) (21.3)\n",
|
79 |
-
"Collecting sacremoses\n",
|
80 |
-
" Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)\n",
|
81 |
-
"\u001b[K |ββββββββββββββββββββββββββββββββ| 895 kB 36.6 MB/s \n",
|
82 |
-
"\u001b[?25hCollecting huggingface-hub<1.0,>=0.1.0\n",
|
83 |
-
" Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)\n",
|
84 |
-
"\u001b[K |ββββββββββββββββββββββββββββββββ| 77 kB 6.3 MB/s \n",
|
85 |
-
"\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers->-r requirements.txt (line 4)) (3.6.0)\n",
|
86 |
-
"Collecting pyyaml>=5.1\n",
|
87 |
-
" Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
|
88 |
-
"\u001b[K |ββββββββββββββββββββββββββββββββ| 596 kB 38.9 MB/s \n",
|
89 |
-
"\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers->-r requirements.txt (line 4)) (1.21.6)\n",
|
90 |
-
"Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers->-r requirements.txt (line 4)) (2.23.0)\n",
|
91 |
-
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers->-r requirements.txt (line 4)) (2019.12.20)\n",
|
92 |
-
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers->-r requirements.txt (line 4)) (4.11.3)\n",
|
93 |
-
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers->-r requirements.txt (line 4)) (3.0.8)\n",
|
94 |
-
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers->-r requirements.txt (line 4)) (3.8.0)\n",
|
95 |
-
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers->-r requirements.txt (line 4)) (1.24.3)\n",
|
96 |
-
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers->-r requirements.txt (line 4)) (3.0.4)\n",
|
97 |
-
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers->-r requirements.txt (line 4)) (2.10)\n",
|
98 |
-
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers->-r requirements.txt (line 4)) (2021.10.8)\n",
|
99 |
-
"Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers->-r requirements.txt (line 4)) (1.15.0)\n",
|
100 |
-
"Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers->-r requirements.txt (line 4)) (1.1.0)\n",
|
101 |
-
"Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers->-r requirements.txt (line 4)) (7.1.2)\n",
|
102 |
-
"Building wheels for collected packages: progressbar, entmax\n",
|
103 |
-
" Building wheel for progressbar (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
104 |
-
" Created wheel for progressbar: filename=progressbar-2.5-py3-none-any.whl size=12082 sha256=bb7d90605d0bf4d89aedc46bd8ed39538f55e00ee70fa382c1af81f142f08fa8\n",
|
105 |
-
" Stored in directory: /root/.cache/pip/wheels/f0/fd/1f/3e35ed57e94cd8ced38dd46771f1f0f94f65fec548659ed855\n",
|
106 |
-
" Building wheel for entmax (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
107 |
-
" Created wheel for entmax: filename=entmax-1.0-py3-none-any.whl size=11015 sha256=5e2cf723e790ec941984d2030eb3231e1ae3ce75231709391a13edcd2bfb4770\n",
|
108 |
-
" Stored in directory: /root/.cache/pip/wheels/f7/e8/0d/acc29c2f66e69a1f42483347fa8545c293dec12325ee161716\n",
|
109 |
-
"Successfully built progressbar entmax\n",
|
110 |
-
"Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, einops, unidecode, transformers, rotary-embedding-torch, progressbar, entmax\n",
|
111 |
-
" Attempting uninstall: pyyaml\n",
|
112 |
-
" Found existing installation: PyYAML 3.13\n",
|
113 |
-
" Uninstalling PyYAML-3.13:\n",
|
114 |
-
" Successfully uninstalled PyYAML-3.13\n",
|
115 |
-
"Successfully installed einops-0.4.1 entmax-1.0 huggingface-hub-0.5.1 progressbar-2.5 pyyaml-6.0 rotary-embedding-torch-0.1.5 sacremoses-0.0.49 tokenizers-0.12.1 transformers-4.18.0 unidecode-1.3.4\n"
|
116 |
-
]
|
117 |
-
}
|
118 |
-
],
|
119 |
"source": [
|
120 |
"!git clone https://github.com/neonbjb/tortoise-tts.git\n",
|
121 |
"%cd tortoise-tts\n",
|
@@ -138,97 +59,10 @@
|
|
138 |
"tts = TextToSpeech()"
|
139 |
],
|
140 |
"metadata": {
|
141 |
-
"id": "Gen09NM4hONQ"
|
142 |
-
"colab": {
|
143 |
-
"base_uri": "https://localhost:8080/"
|
144 |
-
},
|
145 |
-
"outputId": "35c1fb4b-5998-4e75-9ec9-29521b301db6"
|
146 |
},
|
147 |
"execution_count": null,
|
148 |
-
"outputs": [
|
149 |
-
{
|
150 |
-
"output_type": "stream",
|
151 |
-
"name": "stdout",
|
152 |
-
"text": [
|
153 |
-
"Downloading autoregressive.pth from https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/autoregressive.pth...\n"
|
154 |
-
]
|
155 |
-
},
|
156 |
-
{
|
157 |
-
"output_type": "stream",
|
158 |
-
"name": "stderr",
|
159 |
-
"text": [
|
160 |
-
"\n"
|
161 |
-
]
|
162 |
-
},
|
163 |
-
{
|
164 |
-
"output_type": "stream",
|
165 |
-
"name": "stdout",
|
166 |
-
"text": [
|
167 |
-
"Done.\n",
|
168 |
-
"Downloading clvp.pth from https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/clvp.pth...\n"
|
169 |
-
]
|
170 |
-
},
|
171 |
-
{
|
172 |
-
"output_type": "stream",
|
173 |
-
"name": "stderr",
|
174 |
-
"text": [
|
175 |
-
"\n"
|
176 |
-
]
|
177 |
-
},
|
178 |
-
{
|
179 |
-
"output_type": "stream",
|
180 |
-
"name": "stdout",
|
181 |
-
"text": [
|
182 |
-
"Done.\n",
|
183 |
-
"Downloading cvvp.pth from https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/cvvp.pth...\n"
|
184 |
-
]
|
185 |
-
},
|
186 |
-
{
|
187 |
-
"output_type": "stream",
|
188 |
-
"name": "stderr",
|
189 |
-
"text": [
|
190 |
-
"\n"
|
191 |
-
]
|
192 |
-
},
|
193 |
-
{
|
194 |
-
"output_type": "stream",
|
195 |
-
"name": "stdout",
|
196 |
-
"text": [
|
197 |
-
"Done.\n",
|
198 |
-
"Downloading diffusion_decoder.pth from https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/diffusion_decoder.pth...\n"
|
199 |
-
]
|
200 |
-
},
|
201 |
-
{
|
202 |
-
"output_type": "stream",
|
203 |
-
"name": "stderr",
|
204 |
-
"text": [
|
205 |
-
"\n"
|
206 |
-
]
|
207 |
-
},
|
208 |
-
{
|
209 |
-
"output_type": "stream",
|
210 |
-
"name": "stdout",
|
211 |
-
"text": [
|
212 |
-
"Done.\n",
|
213 |
-
"Downloading vocoder.pth from https://huggingface.co/jbetker/tortoise-tts-v2/resolve/hf/.models/vocoder.pth...\n"
|
214 |
-
]
|
215 |
-
},
|
216 |
-
{
|
217 |
-
"output_type": "stream",
|
218 |
-
"name": "stderr",
|
219 |
-
"text": [
|
220 |
-
"\n"
|
221 |
-
]
|
222 |
-
},
|
223 |
-
{
|
224 |
-
"output_type": "stream",
|
225 |
-
"name": "stdout",
|
226 |
-
"text": [
|
227 |
-
"Done.\n",
|
228 |
-
"Removing weight norm...\n"
|
229 |
-
]
|
230 |
-
}
|
231 |
-
]
|
232 |
},
|
233 |
{
|
234 |
"cell_type": "code",
|
@@ -239,28 +73,10 @@
|
|
239 |
"%ls voices"
|
240 |
],
|
241 |
"metadata": {
|
242 |
-
"id": "SSleVnRAiEE2"
|
243 |
-
"colab": {
|
244 |
-
"base_uri": "https://localhost:8080/"
|
245 |
-
},
|
246 |
-
"outputId": "e1eb09e2-1b68-4f81-b679-edb97538da39"
|
247 |
},
|
248 |
"execution_count": null,
|
249 |
-
"outputs": [
|
250 |
-
{
|
251 |
-
"output_type": "stream",
|
252 |
-
"name": "stdout",
|
253 |
-
"text": [
|
254 |
-
"\u001b[0m\u001b[01;34mangelina_jolie\u001b[0m/ \u001b[01;34mhalle_barry\u001b[0m/ \u001b[01;34mlj\u001b[0m/ \u001b[01;34msamuel_jackson\u001b[0m/\n",
|
255 |
-
"\u001b[01;34matkins\u001b[0m/ \u001b[01;34mharris\u001b[0m/ \u001b[01;34mmol\u001b[0m/ \u001b[01;34msigourney_weaver\u001b[0m/\n",
|
256 |
-
"\u001b[01;34mcarlin\u001b[0m/ \u001b[01;34mhenry_cavill\u001b[0m/ \u001b[01;34mmorgan_freeman\u001b[0m/ \u001b[01;34mtom_hanks\u001b[0m/\n",
|
257 |
-
"\u001b[01;34mdaniel_craig\u001b[0m/ \u001b[01;34mjennifer_lawrence\u001b[0m/ \u001b[01;34mmyself\u001b[0m/ \u001b[01;34mwilliam_shatner\u001b[0m/\n",
|
258 |
-
"\u001b[01;34mdotrice\u001b[0m/ \u001b[01;34mjohn_krasinski\u001b[0m/ \u001b[01;34motto\u001b[0m/\n",
|
259 |
-
"\u001b[01;34memma_stone\u001b[0m/ \u001b[01;34mkennard\u001b[0m/ \u001b[01;34mpatrick_stewart\u001b[0m/\n",
|
260 |
-
"\u001b[01;34mgrace\u001b[0m/ \u001b[01;34mlescault\u001b[0m/ \u001b[01;34mrobert_deniro\u001b[0m/\n"
|
261 |
-
]
|
262 |
-
}
|
263 |
-
]
|
264 |
},
|
265 |
{
|
266 |
"cell_type": "code",
|
@@ -302,40 +118,10 @@
|
|
302 |
"torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)"
|
303 |
],
|
304 |
"metadata": {
|
305 |
-
"id": "KEXOKjIvn6NW"
|
306 |
-
"colab": {
|
307 |
-
"base_uri": "https://localhost:8080/"
|
308 |
-
},
|
309 |
-
"outputId": "7977bfd7-9fbc-41f7-d3ac-25fd4e350049"
|
310 |
},
|
311 |
"execution_count": null,
|
312 |
-
"outputs": [
|
313 |
-
{
|
314 |
-
"output_type": "stream",
|
315 |
-
"name": "stderr",
|
316 |
-
"text": [
|
317 |
-
"100%|ββββββββββ| 6/6 [01:18<00:00, 13.11s/it]\n",
|
318 |
-
"/usr/local/lib/python3.7/dist-packages/torch/utils/checkpoint.py:25: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
|
319 |
-
" warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n",
|
320 |
-
"/content/tortoise-tts/models/autoregressive.py:359: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n",
|
321 |
-
" mel_lengths = wav_lengths // self.mel_length_compression\n"
|
322 |
-
]
|
323 |
-
},
|
324 |
-
{
|
325 |
-
"output_type": "stream",
|
326 |
-
"name": "stdout",
|
327 |
-
"text": [
|
328 |
-
"Performing vocoding..\n"
|
329 |
-
]
|
330 |
-
},
|
331 |
-
{
|
332 |
-
"output_type": "stream",
|
333 |
-
"name": "stderr",
|
334 |
-
"text": [
|
335 |
-
"100%|ββββββββββ| 32/32 [00:16<00:00, 1.94it/s]\n"
|
336 |
-
]
|
337 |
-
}
|
338 |
-
]
|
339 |
},
|
340 |
{
|
341 |
"cell_type": "code",
|
@@ -346,7 +132,7 @@
|
|
346 |
"#\n",
|
347 |
"# Lets see what it would sound like if Picard and Kirk had a kid with a penchant for philosophy:\n",
|
348 |
"conds = []\n",
|
349 |
-
"for v in ['
|
350 |
" cond_paths = voices[v]\n",
|
351 |
" for cond_path in cond_paths:\n",
|
352 |
" c = load_audio(cond_path, 22050)\n",
|
@@ -356,40 +142,10 @@
|
|
356 |
"torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)"
|
357 |
],
|
358 |
"metadata": {
|
359 |
-
"
|
360 |
-
"base_uri": "https://localhost:8080/"
|
361 |
-
},
|
362 |
-
"id": "fYTk8KUezUr5",
|
363 |
-
"outputId": "8a07f251-c90f-4e6a-c204-132b737dfff8"
|
364 |
},
|
365 |
"execution_count": null,
|
366 |
-
"outputs": [
|
367 |
-
{
|
368 |
-
"output_type": "stream",
|
369 |
-
"name": "stderr",
|
370 |
-
"text": [
|
371 |
-
"100%|ββββββββββ| 6/6 [01:45<00:00, 17.62s/it]\n",
|
372 |
-
"/usr/local/lib/python3.7/dist-packages/torch/utils/checkpoint.py:25: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
|
373 |
-
" warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n",
|
374 |
-
"/content/tortoise-tts/models/autoregressive.py:359: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n",
|
375 |
-
" mel_lengths = wav_lengths // self.mel_length_compression\n"
|
376 |
-
]
|
377 |
-
},
|
378 |
-
{
|
379 |
-
"output_type": "stream",
|
380 |
-
"name": "stdout",
|
381 |
-
"text": [
|
382 |
-
"Performing vocoding..\n"
|
383 |
-
]
|
384 |
-
},
|
385 |
-
{
|
386 |
-
"output_type": "stream",
|
387 |
-
"name": "stderr",
|
388 |
-
"text": [
|
389 |
-
"100%|ββββββββββ| 32/32 [00:16<00:00, 2.00it/s]\n"
|
390 |
-
]
|
391 |
-
}
|
392 |
-
]
|
393 |
}
|
394 |
]
|
395 |
}
|
|
|
34 |
"cell_type": "code",
|
35 |
"execution_count": null,
|
36 |
"metadata": {
|
37 |
+
"id": "JrK20I32grP6"
|
|
|
|
|
|
|
|
|
38 |
},
|
39 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
"source": [
|
41 |
"!git clone https://github.com/neonbjb/tortoise-tts.git\n",
|
42 |
"%cd tortoise-tts\n",
|
|
|
59 |
"tts = TextToSpeech()"
|
60 |
],
|
61 |
"metadata": {
|
62 |
+
"id": "Gen09NM4hONQ"
|
|
|
|
|
|
|
|
|
63 |
},
|
64 |
"execution_count": null,
|
65 |
+
"outputs": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
},
|
67 |
{
|
68 |
"cell_type": "code",
|
|
|
73 |
"%ls voices"
|
74 |
],
|
75 |
"metadata": {
|
76 |
+
"id": "SSleVnRAiEE2"
|
|
|
|
|
|
|
|
|
77 |
},
|
78 |
"execution_count": null,
|
79 |
+
"outputs": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
},
|
81 |
{
|
82 |
"cell_type": "code",
|
|
|
118 |
"torchaudio.save('generated.wav', gen.squeeze(0).cpu(), 24000)"
|
119 |
],
|
120 |
"metadata": {
|
121 |
+
"id": "KEXOKjIvn6NW"
|
|
|
|
|
|
|
|
|
122 |
},
|
123 |
"execution_count": null,
|
124 |
+
"outputs": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
},
|
126 |
{
|
127 |
"cell_type": "code",
|
|
|
132 |
"#\n",
|
133 |
"# Lets see what it would sound like if Picard and Kirk had a kid with a penchant for philosophy:\n",
|
134 |
"conds = []\n",
|
135 |
+
"for v in ['pat', 'william']:\n",
|
136 |
" cond_paths = voices[v]\n",
|
137 |
" for cond_path in cond_paths:\n",
|
138 |
" c = load_audio(cond_path, 22050)\n",
|
|
|
142 |
"torchaudio.save('captain_kirkard.wav', gen.squeeze(0).cpu(), 24000)"
|
143 |
],
|
144 |
"metadata": {
|
145 |
+
"id": "fYTk8KUezUr5"
|
|
|
|
|
|
|
|
|
146 |
},
|
147 |
"execution_count": null,
|
148 |
+
"outputs": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
}
|
150 |
]
|
151 |
}
|