PTTS / piper /download.py
nikajoon's picture
Upload 12 files
1b8f0eb verified
"""Utility for downloading Piper voices."""
import json
import logging
import shutil
from pathlib import Path
from typing import Any, Dict, Iterable, Set, Tuple, Union
from urllib.request import urlopen
from .file_hash import get_file_hash
URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
_DIR = Path(__file__).parent
_LOGGER = logging.getLogger(__name__)
_SKIP_FILES = {"MODEL_CARD"}
class VoiceNotFoundError(Exception):
pass
def get_voices(
download_dir: Union[str, Path], update_voices: bool = False
) -> Dict[str, Any]:
"""Loads available voices from downloaded or embedded JSON file."""
download_dir = Path(download_dir)
voices_download = download_dir / "voices.json"
if update_voices:
# Download latest voices.json
voices_url = URL_FORMAT.format(file="voices.json")
_LOGGER.debug("Downloading %s to %s", voices_url, voices_download)
with urlopen(voices_url) as response, open(
voices_download, "wb"
) as download_file:
shutil.copyfileobj(response, download_file)
# Prefer downloaded file to embedded
voices_embedded = _DIR / "voices.json"
voices_path = voices_download if voices_download.exists() else voices_embedded
_LOGGER.debug("Loading %s", voices_path)
with open(voices_path, "r", encoding="utf-8") as voices_file:
return json.load(voices_file)
def ensure_voice_exists(
name: str,
data_dirs: Iterable[Union[str, Path]],
download_dir: Union[str, Path],
voices_info: Dict[str, Any],
):
assert data_dirs, "No data dirs"
if name not in voices_info:
raise VoiceNotFoundError(name)
voice_info = voices_info[name]
voice_files = voice_info["files"]
files_to_download: Set[str] = set()
for data_dir in data_dirs:
data_dir = Path(data_dir)
# Check sizes/hashes
for file_path, file_info in voice_files.items():
if file_path in files_to_download:
# Already planning to download
continue
file_name = Path(file_path).name
if file_name in _SKIP_FILES:
continue
data_file_path = data_dir / file_name
_LOGGER.debug("Checking %s", data_file_path)
if not data_file_path.exists():
_LOGGER.debug("Missing %s", data_file_path)
files_to_download.add(file_path)
continue
expected_size = file_info["size_bytes"]
actual_size = data_file_path.stat().st_size
if expected_size != actual_size:
_LOGGER.warning(
"Wrong size (expected=%s, actual=%s) for %s",
expected_size,
actual_size,
data_file_path,
)
files_to_download.add(file_path)
continue
expected_hash = file_info["md5_digest"]
actual_hash = get_file_hash(data_file_path)
if expected_hash != actual_hash:
_LOGGER.warning(
"Wrong hash (expected=%s, actual=%s) for %s",
expected_hash,
actual_hash,
data_file_path,
)
files_to_download.add(file_path)
continue
if (not voice_files) and (not files_to_download):
raise ValueError(f"Unable to find or download voice: {name}")
# Download missing files
download_dir = Path(download_dir)
for file_path in files_to_download:
file_name = Path(file_path).name
if file_name in _SKIP_FILES:
continue
file_url = URL_FORMAT.format(file=file_path)
download_file_path = download_dir / file_name
download_file_path.parent.mkdir(parents=True, exist_ok=True)
_LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
with urlopen(file_url) as response, open(
download_file_path, "wb"
) as download_file:
shutil.copyfileobj(response, download_file)
_LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]:
for data_dir in data_dirs:
data_dir = Path(data_dir)
onnx_path = data_dir / f"{name}.onnx"
config_path = data_dir / f"{name}.onnx.json"
if onnx_path.exists() and config_path.exists():
return onnx_path, config_path
raise ValueError(f"Missing files for voice {name}")