import librosa from IPython.display import Audio, display import matplotlib.pyplot as plt import torch SAMPLE_RIR_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/room-response/rm1/impulse/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo.wav" SAMPLE_NOISE_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/distractors/rm1/babb/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav" def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None): spec = spec.squeeze(0) spec = spec.numpy() fig, axs = plt.subplots(1, 1) axs.set_title(title or "Spectrogram (db)") axs.set_ylabel(ylabel) axs.set_xlabel("frame") im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect) if xmax: axs.set_xlim((0, xmax)) fig.colorbar(im, ax=axs) plt.show(block=False) def play_audio(waveform, sample_rate): waveform = waveform.numpy() num_channels, num_frames = waveform.shape if num_channels == 1: display(Audio(waveform[0], rate=sample_rate)) elif num_channels == 2: display(Audio((waveform[0], waveform[1]), rate=sample_rate)) else: raise ValueError("Waveform with more than 2 channels are not supported.") def get_rir_sample(path, resample=None, processed=False): rir_raw, sample_rate = torch.load(path) if not processed: return rir_raw, sample_rate rir = rir_raw[:, int(sample_rate*1.01):int(sample_rate*1.3)] rir = rir / torch.norm(rir, p=2) rir = torch.flip(rir, [1]) return rir, sample_rate