|
import argparse |
|
import os |
|
|
|
from torchaudio.datasets import CMUARCTIC |
|
from tqdm import tqdm |
|
|
|
|
|
SPLITS = { |
|
"train": list(range( 0, 932)), |
|
"valid": list(range( 932, 1032)), |
|
"test": list(range(1032, 1132)), |
|
} |
|
|
|
|
|
def get_parser(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"root", metavar="DIR", help="root directory containing wav files to index" |
|
) |
|
parser.add_argument( |
|
"--dest", default=".", type=str, metavar="DIR", help="output directory" |
|
) |
|
parser.add_argument( |
|
"--source", default="bdl,clb,slt,rms", type=str, help="Source voice from slt, clb, bdl, rms." |
|
) |
|
parser.add_argument( |
|
"--target", default="bdl,clb,slt,rms", type=str, help="Target voice from slt, clb, bdl, rms." |
|
) |
|
parser.add_argument( |
|
"--splits", default="932,100,100", type=str, help="Split of train,valid,test seperate by comma." |
|
) |
|
parser.add_argument( |
|
"--wav-root", default=None, type=str, metavar="DIR", help="saved waveform root directory for tsv" |
|
) |
|
parser.add_argument( |
|
"--spkemb-npy-dir", required=True, type=str, help="speaker embedding directory" |
|
) |
|
return parser |
|
|
|
def main(args): |
|
dest_dir = args.dest |
|
wav_root = args.wav_root |
|
if not os.path.exists(dest_dir): |
|
os.makedirs(dest_dir) |
|
|
|
source = args.source.split(",") |
|
target = args.target.split(",") |
|
spks = sorted(list(set(source + target))) |
|
datasets = {} |
|
|
|
datasets["slt"] = CMUARCTIC(args.root, url="slt", folder_in_archive="ARCTIC", download=False) |
|
for spk in spks: |
|
if spk != "slt": |
|
datasets[spk] = CMUARCTIC(args.root, url=spk, folder_in_archive="ARCTIC", download=False) |
|
datasets[spk]._walker = list(datasets["slt"]._walker) |
|
if "slt" not in spks: |
|
del datasets["slt"] |
|
|
|
num_splits = [int(n_split) for n_split in args.splits.split(',')] |
|
assert sum(num_splits) == 1132, f"Missing utterances: {sum(num_splits)} != 1132" |
|
|
|
tsv = {} |
|
for split in SPLITS.keys(): |
|
tsv[split] = open(os.path.join(dest_dir, f"{split}.tsv"), "w") |
|
print(wav_root, file=tsv[split]) |
|
|
|
for split, indices in SPLITS.items(): |
|
for i in tqdm(indices, desc=f"[{'-'.join(spks)}]tsv/wav/spk"): |
|
for src_spk in source: |
|
for tgt_spk in target: |
|
if src_spk == tgt_spk: continue |
|
|
|
src_i = datasets[src_spk][i] |
|
tgt_i = datasets[tgt_spk][i] |
|
assert src_i[1] == tgt_i[1], f"{src_i[1]}-{tgt_i[1]}" |
|
assert src_i[3] == tgt_i[3], f"{src_i[3]}-{tgt_i[3]}" |
|
src_wav = os.path.join(os.path.basename(datasets[src_spk]._path), datasets[src_spk]._folder_audio, f"arctic_{src_i[3]}.wav") |
|
src_nframes = src_i[0].shape[-1] |
|
tgt_wav = os.path.join(os.path.basename(datasets[tgt_spk]._path), datasets[tgt_spk]._folder_audio, f"arctic_{tgt_i[3]}.wav") |
|
tgt_nframes = tgt_i[0].shape[-1] |
|
tgt_spkemb = os.path.join(args.spkemb_npy_dir, f"{os.path.basename(datasets[tgt_spk]._path)}-{datasets[tgt_spk]._folder_audio}-arctic_{tgt_i[3]}.npy") |
|
print(f"{src_wav}\t{src_nframes}\t{tgt_wav}\t{tgt_nframes}\t{tgt_spkemb}", file=tsv[split]) |
|
for split in tsv.keys(): |
|
tsv[split].close() |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = get_parser() |
|
args = parser.parse_args() |
|
main(args) |
|
|