Update README.md
Browse files
README.md
CHANGED
@@ -4,14 +4,38 @@ pipeline_tag: text-to-speech
|
|
4 |
tags:
|
5 |
- transformers.js
|
6 |
- mms
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
8 |
---
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
## Usage
|
11 |
|
12 |
### Transformers
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
### Transformers.js
|
17 |
|
@@ -25,7 +49,7 @@ npm i @xenova/transformers
|
|
25 |
import { pipeline } from '@xenova/transformers';
|
26 |
|
27 |
// Create a text-to-speech pipeline
|
28 |
-
const synthesizer = await pipeline('text-to-speech', '
|
29 |
quantized: false, // Remove this line to use the quantized version (default)
|
30 |
});
|
31 |
|
@@ -49,5 +73,4 @@ fs.writeFileSync('out.wav', wav.toBuffer());
|
|
49 |
```
|
50 |
|
51 |
|
52 |
-
<audio controls src="https://cdn-uploads.huggingface.co/production/uploads/61b253b7ac5ecaae3d1efe0c/6FvN6zFSHGeenWS2-H8xv.wav"></audio>
|
53 |
-
|
|
|
4 |
tags:
|
5 |
- transformers.js
|
6 |
- mms
|
7 |
+
- vits
|
8 |
+
license: cc-by-nc-4.0
|
9 |
+
datasets:
|
10 |
+
- ylacombe/google-chilean-spanish
|
11 |
+
language:
|
12 |
+
- es
|
13 |
---
|
14 |
|
15 |
+
## Model
|
16 |
+
|
17 |
+
This is a finetuned version of the Spanish version of Massively Multilingual Speech (MMS) models, which are light-weight, low-latency TTS models based on the [VITS architecture](https://huggingface.co/docs/transformers/model_doc/vits).
|
18 |
+
|
19 |
+
It was trained in around **20 minutes** with as little as **80 to 150 samples**, on this [Chilean Spanish dataset](https://huggingface.co/datasets/ylacombe/google-chilean-spanish).
|
20 |
+
|
21 |
+
Training recipe available in this [github repository: **ylacombe/finetune-hf-vits**](https://github.com/ylacombe/finetune-hf-vits).
|
22 |
+
|
23 |
+
|
24 |
## Usage
|
25 |
|
26 |
### Transformers
|
27 |
|
28 |
+
```python
|
29 |
+
from transformers import pipeline
|
30 |
+
import scipy
|
31 |
+
|
32 |
+
model_id = "ylacombe/mms-spa-finetuned-chilean-monospeaker"
|
33 |
+
synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU
|
34 |
+
|
35 |
+
speech = synthesiser("Hola, ¿cómo estás hoy?")
|
36 |
+
|
37 |
+
scipy.io.wavfile.write("finetuned_output.wav", rate=speech["sampling_rate"], data=speech["audio"])
|
38 |
+
```
|
39 |
|
40 |
### Transformers.js
|
41 |
|
|
|
49 |
import { pipeline } from '@xenova/transformers';
|
50 |
|
51 |
// Create a text-to-speech pipeline
|
52 |
+
const synthesizer = await pipeline('text-to-speech', 'ylacombe/mms-spa-finetuned-chilean-monospeaker', {
|
53 |
quantized: false, // Remove this line to use the quantized version (default)
|
54 |
});
|
55 |
|
|
|
73 |
```
|
74 |
|
75 |
|
76 |
+
<audio controls src="https://cdn-uploads.huggingface.co/production/uploads/61b253b7ac5ecaae3d1efe0c/6FvN6zFSHGeenWS2-H8xv.wav"></audio>
|
|