andito
/

s2s

Inference Endpoints

Model card Files Files and versions Community

s2s / arguments_classes /vad_arguments.py

andito's picture

andito HF staff

Upload folder using huggingface_hub

c72e80d verified about 2 months ago

history blame contribute delete

1.73 kB

	from dataclasses import dataclass, field


	@dataclass
	class VADHandlerArguments:
	thresh: float = field(
	default=0.3,
	metadata={
	"help": "The threshold value for voice activity detection (VAD). Values typically range from 0 to 1, with higher values requiring higher confidence in speech detection."
	},
	)
	sample_rate: int = field(
	default=16000,
	metadata={
	"help": "The sample rate of the audio in Hertz. Default is 16000 Hz, which is a common setting for voice audio."
	},
	)
	min_silence_ms: int = field(
	default=250,
	metadata={
	"help": "Minimum length of silence intervals to be used for segmenting speech. Measured in milliseconds. Default is 250 ms."
	},
	)
	min_speech_ms: int = field(
	default=500,
	metadata={
	"help": "Minimum length of speech segments to be considered valid speech. Measured in milliseconds. Default is 500 ms."
	},
	)
	max_speech_ms: float = field(
	default=float("inf"),
	metadata={
	"help": "Maximum length of continuous speech before forcing a split. Default is infinite, allowing for uninterrupted speech segments."
	},
	)
	speech_pad_ms: int = field(
	default=500,
	metadata={
	"help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms."
	},
	)
	audio_enhancement: bool = field(
	default=False,
	metadata={
	"help": "improves sound quality by applying techniques like noise reduction, equalization, and echo cancellation. Default is False."
	},
	)