Spaces:

thepianist9
/

Loonly

Runtime error

App Files Files Community

Loonly / NeRF /data_utils /deepspeech_features /extract_ds_features.py

thepianist9

Upload folder using huggingface_hub

8d0209c verified 3 months ago

raw

history blame contribute delete

4.36 kB

	"""
	Script for extracting DeepSpeech features from audio file.
	"""

	import os
	import argparse
	import numpy as np
	import pandas as pd
	from deepspeech_store import get_deepspeech_model_file
	from deepspeech_features import conv_audios_to_deepspeech


	def parse_args():
	"""
	Create python script parameters.
	Returns
	-------
	ArgumentParser
	Resulted args.
	"""
	parser = argparse.ArgumentParser(
	description="Extract DeepSpeech features from audio file",
	formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument(
	"--input",
	type=str,
	required=True,
	help="path to input audio file or directory")
	parser.add_argument(
	"--output",
	type=str,
	help="path to output file with DeepSpeech features")
	parser.add_argument(
	"--deepspeech",
	type=str,
	help="path to DeepSpeech 0.1.0 frozen model")
	parser.add_argument(
	"--metainfo",
	type=str,
	help="path to file with meta-information")

	args = parser.parse_args()
	return args


	def extract_features(in_audios,
	out_files,
	deepspeech_pb_path,
	metainfo_file_path=None):
	"""
	Real extract audio from video file.
	Parameters
	----------
	in_audios : list of str
	Paths to input audio files.
	out_files : list of str
	Paths to output files with DeepSpeech features.
	deepspeech_pb_path : str
	Path to DeepSpeech 0.1.0 frozen model.
	metainfo_file_path : str, default None
	Path to file with meta-information.
	"""
	#deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm"
	if metainfo_file_path is None:
	num_frames_info = [None] * len(in_audios)
	else:
	train_df = pd.read_csv(
	metainfo_file_path,
	sep="\t",
	index_col=False,
	dtype={"Id": np.int, "File": np.unicode, "Count": np.int})
	num_frames_info = train_df["Count"].values
	assert (len(num_frames_info) == len(in_audios))

	for i, in_audio in enumerate(in_audios):
	if not out_files[i]:
	file_stem, _ = os.path.splitext(in_audio)
	out_files[i] = file_stem + ".npy"
	#print(out_files[i])
	conv_audios_to_deepspeech(
	audios=in_audios,
	out_files=out_files,
	num_frames_info=num_frames_info,
	deepspeech_pb_path=deepspeech_pb_path)


	def main():
	"""
	Main body of script.
	"""
	args = parse_args()
	in_audio = os.path.expanduser(args.input)
	if not os.path.exists(in_audio):
	raise Exception("Input file/directory doesn't exist: {}".format(in_audio))
	deepspeech_pb_path = args.deepspeech
	#add
	deepspeech_pb_path = True
	args.deepspeech = '~/.tensorflow/models/deepspeech-0_1_0-b90017e8.pb'
	#deepspeech_pb_path="/disk4/keyu/DeepSpeech/deepspeech-0.9.2-models.pbmm"
	if deepspeech_pb_path is None:
	deepspeech_pb_path = ""
	if deepspeech_pb_path:
	deepspeech_pb_path = os.path.expanduser(args.deepspeech)
	if not os.path.exists(deepspeech_pb_path):
	deepspeech_pb_path = get_deepspeech_model_file()
	if os.path.isfile(in_audio):
	extract_features(
	in_audios=[in_audio],
	out_files=[args.output],
	deepspeech_pb_path=deepspeech_pb_path,
	metainfo_file_path=args.metainfo)
	else:
	audio_file_paths = []
	for file_name in os.listdir(in_audio):
	if not os.path.isfile(os.path.join(in_audio, file_name)):
	continue
	_, file_ext = os.path.splitext(file_name)
	if file_ext.lower() == ".wav":
	audio_file_path = os.path.join(in_audio, file_name)
	audio_file_paths.append(audio_file_path)
	audio_file_paths = sorted(audio_file_paths)
	out_file_paths = [""] * len(audio_file_paths)
	extract_features(
	in_audios=audio_file_paths,
	out_files=out_file_paths,
	deepspeech_pb_path=deepspeech_pb_path,
	metainfo_file_path=args.metainfo)


	if __name__ == "__main__":
	main()