|
{ |
|
"saving_path": "/home/ubuntu/experiments/a2s_mls", |
|
"resume_checkpoint": null, |
|
"vocoder_type": "SPEECHTOKENIZER", |
|
"vocoder_config_path": null, |
|
"vocoder_ckpt_path": null, |
|
"metapath": [ |
|
"/var/data_mls/train.json" |
|
], |
|
"val_metapath": [ |
|
"/var/data_mls/test.json" |
|
], |
|
"pretrained_path": null, |
|
"speaker_embedding_dir": null, |
|
"sampledir": "/home/ubuntu/experiments/a2s_mls", |
|
"lr": 0.0005, |
|
"batch_size": 100.0, |
|
"train_bucket_size": 8192, |
|
"training_step": 800000, |
|
"optim_flat_percent": 0.0, |
|
"warmup_step": 10000, |
|
"adam_beta1": 0.9, |
|
"adam_beta2": 0.98, |
|
"ffd_size": 1024, |
|
"hidden_size": 1024, |
|
"enc_nlayers": 8, |
|
"dec_nlayers": 6, |
|
"nheads": 8, |
|
"dropout": 0.1, |
|
"depthwise_conv_kernel_size": 5, |
|
"aligner_softmax_temp": 1.0, |
|
"layer_norm_eps": 1e-05, |
|
"use_sem_tokens": true, |
|
"use_spkr_emb": false, |
|
"use_text_emb": false, |
|
"fairseq": false, |
|
"only_inference": false, |
|
"speaker_embed_dropout": 0.05, |
|
"label_smoothing": 0.0, |
|
"val_check_interval": 1, |
|
"max_dataset_samples": -1, |
|
"check_val_every_n_epoch": 1, |
|
"precision": "bf16", |
|
"nworkers": 12, |
|
"distributed": true, |
|
"accelerator": "gpu", |
|
"version": null, |
|
"accumulate_grad_batches": 1, |
|
"sagemaker": false, |
|
"use_repetition_token": false, |
|
"use_repetition_gating": false, |
|
"repetition_penalty": 1.0, |
|
"sampling_temperature": 1.0, |
|
"top_k": -1, |
|
"min_top_k": 3, |
|
"top_p": 0.8, |
|
"sample_num": 4, |
|
"length_penalty_max_length": 150, |
|
"length_penalty_max_prob": 0.95, |
|
"max_input_length": 2048, |
|
"max_output_length": 2000, |
|
"phone_context_window": 3, |
|
"sample_rate": 16000, |
|
"n_codes": 1024, |
|
"n_cluster_groups": 7, |
|
"first_n_lvls": 7, |
|
"use_pretrained_ckpt_cfg": false, |
|
"n_semantic_codes": 1024 |
|
} |
|
|