PTTS / piper /__main__.py
nikajoon's picture
Upload 12 files
1b8f0eb verified
raw
history blame
5.11 kB
import argparse
import logging
import sys
import time
import wave
from pathlib import Path
from typing import Any, Dict
from . import PiperVoice
from .download import ensure_voice_exists, find_voice, get_voices
_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
parser.add_argument("-c", "--config", help="Path to model config file")
parser.add_argument(
"-f",
"--output-file",
"--output_file",
help="Path to output WAV file (default: stdout)",
)
parser.add_argument(
"-d",
"--output-dir",
"--output_dir",
help="Path to output directory (default: cwd)",
)
parser.add_argument(
"--output-raw",
"--output_raw",
action="store_true",
help="Stream raw audio to stdout",
)
#
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)")
parser.add_argument(
"--length-scale", "--length_scale", type=float, help="Phoneme length"
)
parser.add_argument(
"--noise-scale", "--noise_scale", type=float, help="Generator noise"
)
parser.add_argument(
"--noise-w", "--noise_w", type=float, help="Phoneme width noise"
)
#
parser.add_argument("--cuda", action="store_true", help="Use GPU")
#
parser.add_argument(
"--sentence-silence",
"--sentence_silence",
type=float,
default=0.0,
help="Seconds of silence after each sentence",
)
#
parser.add_argument(
"--data-dir",
"--data_dir",
action="append",
default=[str(Path.cwd())],
help="Data directory to check for downloaded models (default: current directory)",
)
parser.add_argument(
"--download-dir",
"--download_dir",
help="Directory to download voices into (default: first data dir)",
)
#
parser.add_argument(
"--update-voices",
action="store_true",
help="Download latest voices.json during startup",
)
#
parser.add_argument(
"--debug", action="store_true", help="Print DEBUG messages to console"
)
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
_LOGGER.debug(args)
if not args.download_dir:
# Download to first data directory by default
args.download_dir = args.data_dir[0]
# Download voice if file doesn't exist
model_path = Path(args.model)
if not model_path.exists():
# Load voice info
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
# Resolve aliases for backwards compatibility with old voice names
aliases_info: Dict[str, Any] = {}
for voice_info in voices_info.values():
for voice_alias in voice_info.get("aliases", []):
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
voices_info.update(aliases_info)
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info)
args.model, args.config = find_voice(args.model, args.data_dir)
# Load voice
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda)
synthesize_args = {
"speaker_id": args.speaker,
"length_scale": args.length_scale,
"noise_scale": args.noise_scale,
"noise_w": args.noise_w,
"sentence_silence": args.sentence_silence,
}
if args.output_raw:
# Read line-by-line
for line in sys.stdin:
line = line.strip()
if not line:
continue
# Write raw audio to stdout as its produced
audio_stream = voice.synthesize_stream_raw(line, **synthesize_args)
for audio_bytes in audio_stream:
sys.stdout.buffer.write(audio_bytes)
sys.stdout.buffer.flush()
elif args.output_dir:
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
# Read line-by-line
for line in sys.stdin:
line = line.strip()
if not line:
continue
wav_path = output_dir / f"{time.monotonic_ns()}.wav"
with wave.open(str(wav_path), "wb") as wav_file:
voice.synthesize(line, wav_file, **synthesize_args)
_LOGGER.info("Wrote %s", wav_path)
else:
# Read entire input
text = sys.stdin.read()
if (not args.output_file) or (args.output_file == "-"):
# Write to stdout
with wave.open(sys.stdout.buffer, "wb") as wav_file:
voice.synthesize(text, wav_file, **synthesize_args)
else:
# Write to file
with wave.open(args.output_file, "wb") as wav_file:
voice.synthesize(text, wav_file, **synthesize_args)
if __name__ == "__main__":
main()