|
import librosa |
|
import os |
|
import soundfile |
|
from tqdm import tqdm, tqdm_notebook |
|
|
|
base_dir = "./data_sakuramiko_senbetsu" |
|
output_dir = "./plachta/VITS-fast-fine-tuning/custom_character_voice/sakuramiko" |
|
all_dir = [f for f in os.listdir(base_dir) if not os.path.isfile(os.path.join(base_dir, f))] |
|
|
|
file_list = [] |
|
|
|
skip_dir = ["301_dousa", |
|
"801_eng_suuji", |
|
"801_eng_jikan", |
|
"803_eng_others", |
|
"912_alphabet", |
|
"912_alphabet2", |
|
"913_web", |
|
"sample"] |
|
|
|
total_file_write = 0 |
|
|
|
def recursive_til_audio_file_found(path): |
|
listed_dir = [f for f in os.listdir(path)] |
|
if len(listed_dir) == 0: |
|
return |
|
test_path_first = os.path.join(path, listed_dir[0]) |
|
|
|
|
|
if not os.path.isfile(test_path_first): |
|
for next_dir in listed_dir: |
|
next_path = os.path.join(path, next_dir) |
|
|
|
for skip in skip_dir: |
|
if next_path.find(skip) != -1: |
|
break |
|
else: |
|
recursive_til_audio_file_found(next_path) |
|
return |
|
|
|
|
|
for new_dir in listed_dir: |
|
new_path = os.path.join(path, new_dir) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if new_path.find(".wav") == -1 and new_path.find(".mp3") == -1: |
|
continue |
|
|
|
global total_file_write |
|
|
|
audio, rate = librosa.load(new_path, sr=22050) |
|
|
|
output_path = os.path.join(output_dir, "sakuramiko_" + str(total_file_write) + ".wav") |
|
|
|
soundfile.write(output_path, audio, rate, format='wav', subtype="PCM_16") |
|
file_list.append(new_dir) |
|
|
|
total_file_write += 1 |
|
pbar.update(1) |
|
|
|
|
|
with tqdm(total=24778) as pbar: |
|
recursive_til_audio_file_found(base_dir) |
|
print(f"Total audio file written : {total_file_write}") |
|
|
|
import json |
|
out_json = {} |
|
for val in file_list: |
|
out_json[val] = {"path":val, "kana":""} |
|
|
|
with open("./amitaro.json", "w") as outfile: |
|
outfile.write(json.dumps(out_json)) |