|
|
|
seed_everything: 4444 |
|
|
|
data: |
|
class_path: vocos.dataset.VocosDataModule |
|
init_args: |
|
train_params: |
|
filelist_path: ??? |
|
sampling_rate: 22050 |
|
num_samples: 16384 |
|
batch_size: 16 |
|
num_workers: 8 |
|
|
|
val_params: |
|
filelist_path: ??? |
|
sampling_rate: 22050 |
|
num_samples: 48384 |
|
batch_size: 16 |
|
num_workers: 8 |
|
|
|
model: |
|
class_path: vocos.experiment.VocosExp |
|
init_args: |
|
sample_rate: 22050 |
|
initial_learning_rate: 1e-3 |
|
mel_loss_coeff: 45 |
|
mrd_loss_coeff: 0.1 |
|
num_warmup_steps: 500 |
|
pretrain_mel_steps: 0 |
|
|
|
|
|
evaluate_utmos: true |
|
evaluate_pesq: true |
|
evaluate_periodicty: true |
|
|
|
feature_extractor: |
|
class_path: vocos.feature_extractors.MelSpectrogramFeatures |
|
init_args: |
|
sample_rate: 22050 |
|
n_fft: 1024 |
|
hop_length: 256 |
|
n_mels: 80 |
|
padding: same |
|
f_min: 0 |
|
f_max: 8000 |
|
norm: "slaney" |
|
mel_scale: "slaney" |
|
clip_val: 1e-5 |
|
|
|
|
|
backbone: |
|
class_path: vocos.models.VocosBackbone |
|
init_args: |
|
input_channels: 80 |
|
dim: 512 |
|
intermediate_dim: 1536 |
|
num_layers: 8 |
|
|
|
head: |
|
class_path: vocos.heads.WaveNextHead |
|
init_args: |
|
dim: 512 |
|
n_fft: 1024 |
|
hop_length: 256 |
|
padding: same |
|
|
|
melspec_loss: |
|
class_path: vocos.loss.MelSpecReconstructionLoss |
|
init_args: |
|
sample_rate: 22050 |
|
n_fft: 1024 |
|
hop_length: 256 |
|
n_mels: 128 |
|
f_min: 0 |
|
f_max: 11000 |
|
norm: "slaney" |
|
mel_scale: "slaney" |
|
clip_val: 1e-5 |
|
|
|
|
|
trainer: |
|
logger: |
|
class_path: pytorch_lightning.loggers.TensorBoardLogger |
|
init_args: |
|
save_dir: ??? |
|
callbacks: |
|
- class_path: pytorch_lightning.callbacks.LearningRateMonitor |
|
- class_path: pytorch_lightning.callbacks.ModelSummary |
|
init_args: |
|
max_depth: 2 |
|
- class_path: pytorch_lightning.callbacks.ModelCheckpoint |
|
init_args: |
|
monitor: val_loss |
|
filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f} |
|
save_top_k: 3 |
|
save_last: true |
|
- class_path: vocos.helpers.GradNormCallback |
|
|
|
|
|
|
|
max_steps: 2000000 |
|
|
|
limit_val_batches: 50 |
|
accelerator: gpu |
|
strategy: ddp |
|
devices: [0] |
|
log_every_n_steps: 250 |
|
|