import librosa import os import soundfile from tqdm import tqdm, tqdm_notebook base_dir = "./data_sakuramiko_senbetsu" output_dir = "./plachta/VITS-fast-fine-tuning/custom_character_voice/sakuramiko" all_dir = [f for f in os.listdir(base_dir) if not os.path.isfile(os.path.join(base_dir, f))] file_list = [] skip_dir = ["301_dousa", "801_eng_suuji", "801_eng_jikan", "803_eng_others", "912_alphabet", "912_alphabet2", "913_web", "sample"] total_file_write = 0 def recursive_til_audio_file_found(path): listed_dir = [f for f in os.listdir(path)] if len(listed_dir) == 0: return test_path_first = os.path.join(path, listed_dir[0]) # continue through the directory if not a file if not os.path.isfile(test_path_first): for next_dir in listed_dir: next_path = os.path.join(path, next_dir) # skip any directory specify in skip_dir for skip in skip_dir: if next_path.find(skip) != -1: break else: recursive_til_audio_file_found(next_path) return #for new_dir in tqdm_notebook(listed_dir, desc=f"Processing : {path}"): for new_dir in listed_dir: new_path = os.path.join(path, new_dir) #if it is file, convert the audio to 16k and write to output directory # output_path_base = path.replace(base_dir, output_dir) # if not os.path.exists(output_path_base): # os.makedirs(output_path_base, exist_ok=True) # not an audio file if new_path.find(".wav") == -1 and new_path.find(".mp3") == -1: continue global total_file_write # audio, rate = librosa.load(new_path, sr=16000) audio, rate = librosa.load(new_path, sr=22050) # output_path = os.path.join(output_path_base, new_dir) output_path = os.path.join(output_dir, "sakuramiko_" + str(total_file_write) + ".wav") # output_path = os.path.join(output_dir, new_dir[0:-4] + ".wav") soundfile.write(output_path, audio, rate, format='wav', subtype="PCM_16") file_list.append(new_dir) total_file_write += 1 pbar.update(1) #print(f"\rWrite file{output_path}", end="") with tqdm(total=24778) as pbar: recursive_til_audio_file_found(base_dir) print(f"Total audio file written : {total_file_write}") import json out_json = {} for val in file_list: out_json[val] = {"path":val, "kana":""} with open("./amitaro.json", "w") as outfile: outfile.write(json.dumps(out_json))