""" Mostly copied from https://github.com/HarryVolek/PyTorch_Speaker_Verification """ import glob import numpy as np import os import random from random import shuffle import torch from torch.utils.data import Dataset from utils.hparam import hparam as hp from utils.utils import mfccs_and_spec class GujaratiSpeakerVerificationDataset(Dataset): def __init__(self, shuffle=True, utter_start=0, split='train'): # data path if split!='val': self.path = hp.data.train_path self.utter_num = hp.train.M else: self.path = hp.data.test_path self.utter_num = hp.test.M self.file_list = os.listdir(self.path) self.shuffle=shuffle self.utter_start = utter_start self.split = split def __len__(self): return len(self.file_list) def __getitem__(self, idx): np_file_list = os.listdir(self.path) if self.shuffle: selected_file = random.sample(np_file_list, 1)[0] # select random speaker else: selected_file = np_file_list[idx] utters = np.load(os.path.join(self.path, selected_file)) # load utterance spectrogram of selected speaker if self.shuffle: utter_index = np.random.randint(0, utters.shape[0], self.utter_num) # select M utterances per speaker utterance = utters[utter_index] else: utterance = utters[self.utter_start: self.utter_start+self.utter_num] # utterances of a speaker [batch(M), n_mels, frames] utterance = utterance[:,:,:160] # TODO implement variable length batch size utterance = torch.tensor(np.transpose(utterance, axes=(0,2,1))) # transpose [batch, frames, n_mels] return utterance def __repr__(self): return f"{self.__class__.__name__}(split={self.split!r}, num_speakers={len(self.file_list)}, num_utterances={self.utter_num})"