""" Script for extracting DeepSpeech features from audio file. """ import os import argparse import numpy as np import pandas as pd from deepspeech_store import get_deepspeech_model_file from deepspeech_features import conv_audios_to_deepspeech def parse_args(): """ Create python script parameters. Returns ------- ArgumentParser Resulted args. """ parser = argparse.ArgumentParser( description="Extract DeepSpeech features from audio file", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( "--input", type=str, required=True, help="path to input audio file or directory") parser.add_argument( "--output", type=str, help="path to output file with DeepSpeech features") parser.add_argument( "--deepspeech", type=str, help="path to DeepSpeech 0.1.0 frozen model") parser.add_argument( "--metainfo", type=str, help="path to file with meta-information") args = parser.parse_args() return args def extract_features(in_audios, out_files, deepspeech_pb_path, metainfo_file_path=None): """ Real extract audio from video file. Parameters ---------- in_audios : list of str Paths to input audio files. out_files : list of str Paths to output files with DeepSpeech features. deepspeech_pb_path : str Path to DeepSpeech 0.1.0 frozen model. metainfo_file_path : str, default None Path to file with meta-information. """ #deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm" if metainfo_file_path is None: num_frames_info = [None] * len(in_audios) else: train_df = pd.read_csv( metainfo_file_path, sep="\t", index_col=False, dtype={"Id": np.int, "File": np.unicode, "Count": np.int}) num_frames_info = train_df["Count"].values assert (len(num_frames_info) == len(in_audios)) for i, in_audio in enumerate(in_audios): if not out_files[i]: file_stem, _ = os.path.splitext(in_audio) out_files[i] = file_stem + ".npy" #print(out_files[i]) conv_audios_to_deepspeech( audios=in_audios, out_files=out_files, num_frames_info=num_frames_info, deepspeech_pb_path=deepspeech_pb_path) def main(): """ Main body of script. """ args = parse_args() in_audio = os.path.expanduser(args.input) if not os.path.exists(in_audio): raise Exception("Input file/directory doesn't exist: {}".format(in_audio)) deepspeech_pb_path = args.deepspeech #add deepspeech_pb_path = True args.deepspeech = '~/.tensorflow/models/deepspeech-0_1_0-b90017e8.pb' #deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm" if deepspeech_pb_path is None: deepspeech_pb_path = "" if deepspeech_pb_path: deepspeech_pb_path = os.path.expanduser(args.deepspeech) if not os.path.exists(deepspeech_pb_path): deepspeech_pb_path = get_deepspeech_model_file() if os.path.isfile(in_audio): extract_features( in_audios=[in_audio], out_files=[args.output], deepspeech_pb_path=deepspeech_pb_path, metainfo_file_path=args.metainfo) else: audio_file_paths = [] for file_name in os.listdir(in_audio): if not os.path.isfile(os.path.join(in_audio, file_name)): continue _, file_ext = os.path.splitext(file_name) if file_ext.lower() == ".wav": audio_file_path = os.path.join(in_audio, file_name) audio_file_paths.append(audio_file_path) audio_file_paths = sorted(audio_file_paths) out_file_paths = [""] * len(audio_file_paths) extract_features( in_audios=audio_file_paths, out_files=out_file_paths, deepspeech_pb_path=deepspeech_pb_path, metainfo_file_path=args.metainfo) if __name__ == "__main__": main()