Spaces:
Running
Running
File size: 4,279 Bytes
8c4d22a 64fcafd 6449e88 8c4d22a 6edda28 8c4d22a 6449e88 8c4d22a 2ffc7e7 8c4d22a 64fcafd 2ffc7e7 6449e88 8c4d22a c49c056 64fcafd 2ffc7e7 8c4d22a 2ffc7e7 6449e88 64fcafd 6449e88 8c4d22a 6449e88 64fcafd c49c056 64fcafd 8c4d22a c49c056 2ffc7e7 c49c056 8c4d22a 2ffc7e7 8c4d22a c49c056 6edda28 8c4d22a 6449e88 8c4d22a 6449e88 9d153e7 6449e88 8c4d22a a575152 8c4d22a 6449e88 64fcafd 8c4d22a 6449e88 8c4d22a 64fcafd 6449e88 8c4d22a a9c23eb 8c4d22a 64fcafd 8c4d22a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
from io import BytesIO
import requests
from os.path import exists, join
from espnet2.bin.tts_inference import Text2Speech
from enum import Enum
from .formatter import preprocess_text
from .stress import sentence_to_stress, stress_dict, stress_with_model
from torch import no_grad
import numpy as np
import time
import soundfile as sf
class Voices(Enum):
"""List of available voices for the model."""
Olena = 4
Mykyta = 3
Lada = 2
Dmytro = 1
Olga = 5
class Stress(Enum):
"""Options how to stress sentence.
- `dictionary` - performs lookup in dictionary, taking into account grammatical case of a word and its' neighbors
- `model` - stress using transformer model"""
Dictionary = "dictionary"
Model = "model"
class TTS:
""" """
def __init__(self, cache_folder=None, device="cpu") -> None:
"""
Class to setup a text-to-speech engine, from download to model creation. \n
Downloads or uses files from `cache_folder` directory. \n
By default stores in current directory."""
self.device = device
self.__setup_cache(cache_folder)
def tts(self, text: str, voice: int, stress: str, output_fp=BytesIO(), speed=1.0):
"""
Run a Text-to-Speech engine and output to `output_fp` BytesIO-like object.
- `text` - your model input text.
- `voice` - one of predefined voices from `Voices` enum.
- `stress` - stress method options, predefined in `Stress` enum.
- `output_fp` - file-like object output. Stores in RAM by default.
"""
if stress not in [option.value for option in Stress]:
raise ValueError(
f"Invalid value for stress option selected! Please use one of the following values: {', '.join([option.value for option in Stress])}."
)
if stress == Stress.Model.value:
stress = True
else:
stress = False
if voice not in [option.value for option in Voices]:
raise ValueError(
f"Invalid value for voice selected! Please use one of the following values: {', '.join([option.value for option in Voices])}."
)
text = preprocess_text(text, stress)
text = sentence_to_stress(text, stress_with_model if stress else stress_dict)
# synthesis
with no_grad():
start = time.time()
wav = self.synthesizer(
text, sids=np.array(voice), decode_conf={"alpha": 1 / speed}
)["wav"]
rtf = (time.time() - start) / (len(wav) / self.synthesizer.fs)
print(f"RTF = {rtf:5f}")
sf.write(
output_fp,
wav.view(-1).cpu().numpy(),
self.synthesizer.fs,
"PCM_16",
format="wav",
)
output_fp.seek(0)
return output_fp, text
def __setup_cache(self, cache_folder=None):
"""Downloads models and stores them into `cache_folder`. By default stores in current directory."""
print("downloading uk/mykyta/vits-tts")
release_number = "v4.0.0"
model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model.pth"
config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.yaml"
if cache_folder is None:
cache_folder = "."
model_path = join(cache_folder, "model.pth")
config_path = join(cache_folder, "config.yaml")
self.__download(model_link, model_path)
self.__download(config_link, config_path)
self.synthesizer = Text2Speech(
train_config="config.yaml",
model_file="model.pth",
device=self.device,
# Only for VITS
noise_scale=0.333,
noise_scale_dur=0.333,
)
def __download(self, url, file_name):
"""Downloads file from `url` into local `file_name` file."""
if not exists(file_name):
print(f"Downloading {file_name}")
r = requests.get(url, allow_redirects=True)
with open(file_name, "wb") as file:
file.write(r.content)
else:
print(f"Found {file_name}. Skipping download...")
|