BSC-LT
/

wavenext-mel

Model card Files Files and versions Community

wavenext-mel / config.yaml

wetdog's picture

Upload model

f5c97ea verified 5 months ago

history blame contribute delete

2.55 kB

	# pytorch_lightning==1.8.6
	seed_everything: 4444

	data:
	class_path: vocos.dataset.VocosDataModule
	init_args:
	train_params:
	filelist_path: ???
	sampling_rate: 22050
	num_samples: 16384
	batch_size: 16
	num_workers: 8

	val_params:
	filelist_path: ???
	sampling_rate: 22050
	num_samples: 48384
	batch_size: 16
	num_workers: 8

	model:
	class_path: vocos.experiment.VocosExp
	init_args:
	sample_rate: 22050
	initial_learning_rate: 1e-3
	mel_loss_coeff: 45
	mrd_loss_coeff: 0.1 # original value 0.1
	num_warmup_steps: 500 # Optimizers warmup steps
	pretrain_mel_steps: 0 # 0 means GAN objective from the first iteration

	# automatic evaluation
	evaluate_utmos: true
	evaluate_pesq: true
	evaluate_periodicty: true

	feature_extractor:
	class_path: vocos.feature_extractors.MelSpectrogramFeatures
	init_args:
	sample_rate: 22050
	n_fft: 1024
	hop_length: 256
	n_mels: 80
	padding: same
	f_min: 0
	f_max: 8000
	norm: "slaney"
	mel_scale: "slaney"
	clip_val: 1e-5


	backbone:
	class_path: vocos.models.VocosBackbone
	init_args:
	input_channels: 80
	dim: 512
	intermediate_dim: 1536
	num_layers: 8

	head:
	class_path: vocos.heads.WaveNextHead
	init_args:
	dim: 512
	n_fft: 1024
	hop_length: 256
	padding: same

	melspec_loss:
	class_path: vocos.loss.MelSpecReconstructionLoss
	init_args:
	sample_rate: 22050
	n_fft: 1024
	hop_length: 256
	n_mels: 128
	f_min: 0
	f_max: 11000
	norm: "slaney"
	mel_scale: "slaney"
	clip_val: 1e-5


	trainer:
	logger:
	class_path: pytorch_lightning.loggers.TensorBoardLogger
	init_args:
	save_dir: ???
	callbacks:
	- class_path: pytorch_lightning.callbacks.LearningRateMonitor
	- class_path: pytorch_lightning.callbacks.ModelSummary
	init_args:
	max_depth: 2
	- class_path: pytorch_lightning.callbacks.ModelCheckpoint
	init_args:
	monitor: val_loss
	filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f}
	save_top_k: 3
	save_last: true
	- class_path: vocos.helpers.GradNormCallback

	# Lightning calculates max_steps across all optimizer steps (rather than number of batches)
	# This equals to 1M steps per generator and 1M per discriminator
	max_steps: 2000000
	# You might want to limit val batches when evaluating all the metrics, as they are time-consuming
	limit_val_batches: 50
	accelerator: gpu
	strategy: ddp
	devices: [0]
	log_every_n_steps: 250