Spaces:
Configuration error
Configuration error
Kangarroar
commited on
Commit
•
632f309
1
Parent(s):
ef5bee9
Upload 11 files
Browse files- .gitattributes +1 -0
- app.py +116 -0
- checkpoints/0102_xiaoma_pe/config.yaml +172 -0
- checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt +3 -0
- checkpoints/Unnamed/config.yaml +445 -0
- checkpoints/Unnamed/config_nsf.yaml +445 -0
- checkpoints/Unnamed/lightning_logs/lastest/hparams.yaml +1 -0
- checkpoints/Unnamed/model_ckpt_steps_192000.ckpt +3 -0
- checkpoints/nsf_hifigan/NOTICE.txt +74 -0
- checkpoints/nsf_hifigan/NOTICE.zh-CN.txt +72 -0
- checkpoints/nsf_hifigan/config.json +38 -0
- checkpoints/nsf_hifigan/model +3 -0
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
results/test_output.wav filter=lfs diff=lfs merge=lfs -text
|
36 |
test_output.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
results/test_output.wav filter=lfs diff=lfs merge=lfs -text
|
36 |
test_output.wav filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoints/nsf_hifigan/model filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tkinter as tk
|
2 |
+
import tkinter.filedialog
|
3 |
+
import tkinter.ttk as ttk
|
4 |
+
import tkinter as tk
|
5 |
+
from tkinter import ttk
|
6 |
+
import wave
|
7 |
+
from utils.hparams import hparams
|
8 |
+
from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe
|
9 |
+
import numpy as np
|
10 |
+
import IPython.display as ipd
|
11 |
+
import utils
|
12 |
+
import librosa
|
13 |
+
import torchcrepe
|
14 |
+
from infer import *
|
15 |
+
import logging
|
16 |
+
from infer_tools.infer_tool import *
|
17 |
+
from tkinter import Label
|
18 |
+
from time import sleep
|
19 |
+
import os
|
20 |
+
# Create the main window
|
21 |
+
window = tk.Tk()
|
22 |
+
|
23 |
+
# Set the window title and size
|
24 |
+
|
25 |
+
window.title("Diff-SVC Rendering Tool")
|
26 |
+
window.geometry("250x400")
|
27 |
+
|
28 |
+
# console Textbox
|
29 |
+
textbox = tk.Text(window)
|
30 |
+
textbox.grid(row=4, column=0, padx=20, pady=20, sticky="nsew")
|
31 |
+
|
32 |
+
loading_animation_label = Label(window)
|
33 |
+
loading_animation_label.grid(row=5, column=0)
|
34 |
+
|
35 |
+
# Set the column and row to stretch to fill the available space
|
36 |
+
window.grid_columnconfigure(0, weight=1)
|
37 |
+
window.grid_rowconfigure(4, weight=1)
|
38 |
+
|
39 |
+
button1 = ttk.Button(window, text="Load Model")
|
40 |
+
button1.grid(row=0, column=0, padx=20, pady=20)
|
41 |
+
|
42 |
+
pb = ttk.Progressbar(
|
43 |
+
window,
|
44 |
+
orient='horizontal',
|
45 |
+
mode='indeterminate',
|
46 |
+
length=250
|
47 |
+
)
|
48 |
+
def start():
|
49 |
+
pb.grid(column=0, row=5, padx=0, pady=0)
|
50 |
+
pb.start(10)
|
51 |
+
|
52 |
+
def stop():
|
53 |
+
pb.stop()
|
54 |
+
pb.grid_remove()
|
55 |
+
def button1_clicked():
|
56 |
+
filepath1 = tkinter.filedialog.askopenfilename(title = "Select CKPT File", filetypes=[("Checkpoint files", "*.ckpt")])
|
57 |
+
if filepath1 == '':
|
58 |
+
tkinter.messagebox.showerror("Error", "No CKPT file selected")
|
59 |
+
return
|
60 |
+
filepath2 = tkinter.filedialog.askopenfilename(title = "Select YAML File",filetypes=[("Yaml files", "*.yaml")])
|
61 |
+
if filepath2 == '':
|
62 |
+
tkinter.messagebox.showerror("Error", "No YAML file selected")
|
63 |
+
return
|
64 |
+
model_path = filepath1
|
65 |
+
config_path = filepath2
|
66 |
+
logging.getLogger('numba').setLevel(logging.WARNING)
|
67 |
+
start()
|
68 |
+
# Show a dialog box to input text
|
69 |
+
global project_name
|
70 |
+
project_name = tkinter.simpledialog.askstring("Input", "Enter project name:", parent=window)
|
71 |
+
if project_name == '':
|
72 |
+
tkinter.messagebox.showerror("Error", "No Project Name")
|
73 |
+
return
|
74 |
+
# Use the input text and the value of hubert_gpu as arguments when creating an instance of the Svc class
|
75 |
+
global svc_model
|
76 |
+
hubert_gpu = False
|
77 |
+
svc_model = Svc(project_name, config_path, hubert_gpu, model_path)
|
78 |
+
textbox.insert('end', 'model loaded\n')
|
79 |
+
stop()
|
80 |
+
|
81 |
+
|
82 |
+
# Assign the callback function to the button's "command" attribute
|
83 |
+
button1["command"] = button1_clicked
|
84 |
+
|
85 |
+
button2 = ttk.Button(window, text="Start Rendering")
|
86 |
+
button2.grid(row=1, column=0, padx=20, pady=20)
|
87 |
+
|
88 |
+
# Define a callback function for the second button
|
89 |
+
def button2_clicked():
|
90 |
+
# Open a file selection dialog for WAV files
|
91 |
+
filepath = tkinter.filedialog.askopenfilename(filetypes=[("WAV files", "*.wav")])
|
92 |
+
|
93 |
+
# Show a dialog box to input the "key" value
|
94 |
+
key = tkinter.simpledialog.askinteger("Input", "Enter key value:", parent=window)
|
95 |
+
textbox.insert('end', 'Rendering Started, please wait...\n')
|
96 |
+
start()
|
97 |
+
wav_gen = tkinter.simpledialog.askstring("Input", "Enter the track name:", parent=window)
|
98 |
+
if not wav_gen.endswith('.wav'):
|
99 |
+
wav_gen += '.wav'
|
100 |
+
wav_fn = filepath
|
101 |
+
demoaudio, sr = librosa.load(wav_fn)
|
102 |
+
pndm_speedup = 20
|
103 |
+
f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=pndm_speedup, use_crepe=True, use_pe=True, thre=0.05,
|
104 |
+
use_gt_mel=False, add_noise_step=500,project_name=project_name,out_path=wav_gen)
|
105 |
+
time.sleep(2)
|
106 |
+
textbox.insert('end', 'Rendering process done!\nPlaying Audio now...')
|
107 |
+
os.startfile(wav_gen)
|
108 |
+
stop()
|
109 |
+
button2["command"] = button2_clicked
|
110 |
+
#Checkbox
|
111 |
+
hubert_gpu = tk.BooleanVar()
|
112 |
+
checkbox = tk.Checkbutton(window, text="Use GPU", variable=hubert_gpu)
|
113 |
+
checkbox.grid(row=3, column=0)
|
114 |
+
|
115 |
+
# Start the event loop
|
116 |
+
window.mainloop()
|
checkpoints/0102_xiaoma_pe/config.yaml
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accumulate_grad_batches: 1
|
2 |
+
audio_num_mel_bins: 80
|
3 |
+
audio_sample_rate: 24000
|
4 |
+
base_config:
|
5 |
+
- configs/tts/lj/fs2.yaml
|
6 |
+
binarization_args:
|
7 |
+
shuffle: false
|
8 |
+
with_align: true
|
9 |
+
with_f0: true
|
10 |
+
with_f0cwt: true
|
11 |
+
with_spk_embed: true
|
12 |
+
with_txt: true
|
13 |
+
with_wav: false
|
14 |
+
binarizer_cls: data_gen.tts.base_binarizer.BaseBinarizer
|
15 |
+
binary_data_dir: data/binary/xiaoma1022_24k_128hop
|
16 |
+
check_val_every_n_epoch: 10
|
17 |
+
clip_grad_norm: 1
|
18 |
+
cwt_add_f0_loss: false
|
19 |
+
cwt_hidden_size: 128
|
20 |
+
cwt_layers: 2
|
21 |
+
cwt_loss: l1
|
22 |
+
cwt_std_scale: 0.8
|
23 |
+
debug: false
|
24 |
+
dec_ffn_kernel_size: 9
|
25 |
+
dec_layers: 4
|
26 |
+
decoder_type: fft
|
27 |
+
dict_dir: ''
|
28 |
+
dropout: 0.1
|
29 |
+
ds_workers: 4
|
30 |
+
dur_enc_hidden_stride_kernel:
|
31 |
+
- 0,2,3
|
32 |
+
- 0,2,3
|
33 |
+
- 0,1,3
|
34 |
+
dur_loss: mse
|
35 |
+
dur_predictor_kernel: 3
|
36 |
+
dur_predictor_layers: 2
|
37 |
+
enc_ffn_kernel_size: 9
|
38 |
+
enc_layers: 4
|
39 |
+
encoder_K: 8
|
40 |
+
encoder_type: fft
|
41 |
+
endless_ds: true
|
42 |
+
ffn_act: gelu
|
43 |
+
ffn_padding: SAME
|
44 |
+
fft_size: 512
|
45 |
+
fmax: 12000
|
46 |
+
fmin: 30
|
47 |
+
gen_dir_name: ''
|
48 |
+
hidden_size: 256
|
49 |
+
hop_size: 128
|
50 |
+
infer: false
|
51 |
+
lambda_commit: 0.25
|
52 |
+
lambda_energy: 0.1
|
53 |
+
lambda_f0: 1.0
|
54 |
+
lambda_ph_dur: 1.0
|
55 |
+
lambda_sent_dur: 1.0
|
56 |
+
lambda_uv: 1.0
|
57 |
+
lambda_word_dur: 1.0
|
58 |
+
load_ckpt: ''
|
59 |
+
log_interval: 100
|
60 |
+
loud_norm: false
|
61 |
+
lr: 2.0
|
62 |
+
max_epochs: 1000
|
63 |
+
max_eval_sentences: 1
|
64 |
+
max_eval_tokens: 60000
|
65 |
+
max_frames: 5000
|
66 |
+
max_input_tokens: 1550
|
67 |
+
max_sentences: 100000
|
68 |
+
max_tokens: 20000
|
69 |
+
max_updates: 60000
|
70 |
+
mel_loss: l1
|
71 |
+
mel_vmax: 1.5
|
72 |
+
mel_vmin: -6
|
73 |
+
min_level_db: -120
|
74 |
+
norm_type: gn
|
75 |
+
num_ckpt_keep: 3
|
76 |
+
num_heads: 2
|
77 |
+
num_sanity_val_steps: 5
|
78 |
+
num_spk: 1
|
79 |
+
num_test_samples: 20
|
80 |
+
num_valid_plots: 10
|
81 |
+
optimizer_adam_beta1: 0.9
|
82 |
+
optimizer_adam_beta2: 0.98
|
83 |
+
out_wav_norm: false
|
84 |
+
pitch_ar: false
|
85 |
+
pitch_enc_hidden_stride_kernel:
|
86 |
+
- 0,2,5
|
87 |
+
- 0,2,5
|
88 |
+
- 0,2,5
|
89 |
+
pitch_extractor_conv_layers: 2
|
90 |
+
pitch_loss: l1
|
91 |
+
pitch_norm: log
|
92 |
+
pitch_type: frame
|
93 |
+
pre_align_args:
|
94 |
+
allow_no_txt: false
|
95 |
+
denoise: false
|
96 |
+
forced_align: mfa
|
97 |
+
txt_processor: en
|
98 |
+
use_sox: false
|
99 |
+
use_tone: true
|
100 |
+
pre_align_cls: data_gen.tts.lj.pre_align.LJPreAlign
|
101 |
+
predictor_dropout: 0.5
|
102 |
+
predictor_grad: 0.1
|
103 |
+
predictor_hidden: -1
|
104 |
+
predictor_kernel: 5
|
105 |
+
predictor_layers: 2
|
106 |
+
prenet_dropout: 0.5
|
107 |
+
prenet_hidden_size: 256
|
108 |
+
pretrain_fs_ckpt: ''
|
109 |
+
processed_data_dir: data/processed/ljspeech
|
110 |
+
profile_infer: false
|
111 |
+
raw_data_dir: data/raw/LJSpeech-1.1
|
112 |
+
ref_norm_layer: bn
|
113 |
+
reset_phone_dict: true
|
114 |
+
save_best: false
|
115 |
+
save_ckpt: true
|
116 |
+
save_codes:
|
117 |
+
- configs
|
118 |
+
- modules
|
119 |
+
- tasks
|
120 |
+
- utils
|
121 |
+
- usr
|
122 |
+
save_f0: false
|
123 |
+
save_gt: false
|
124 |
+
seed: 1234
|
125 |
+
sort_by_len: true
|
126 |
+
stop_token_weight: 5.0
|
127 |
+
task_cls: tasks.tts.pe.PitchExtractionTask
|
128 |
+
test_ids:
|
129 |
+
- 68
|
130 |
+
- 70
|
131 |
+
- 74
|
132 |
+
- 87
|
133 |
+
- 110
|
134 |
+
- 172
|
135 |
+
- 190
|
136 |
+
- 215
|
137 |
+
- 231
|
138 |
+
- 294
|
139 |
+
- 316
|
140 |
+
- 324
|
141 |
+
- 402
|
142 |
+
- 422
|
143 |
+
- 485
|
144 |
+
- 500
|
145 |
+
- 505
|
146 |
+
- 508
|
147 |
+
- 509
|
148 |
+
- 519
|
149 |
+
test_input_dir: ''
|
150 |
+
test_num: 523
|
151 |
+
test_set_name: test
|
152 |
+
train_set_name: train
|
153 |
+
use_denoise: false
|
154 |
+
use_energy_embed: false
|
155 |
+
use_gt_dur: false
|
156 |
+
use_gt_f0: false
|
157 |
+
use_pitch_embed: true
|
158 |
+
use_pos_embed: true
|
159 |
+
use_spk_embed: false
|
160 |
+
use_spk_id: false
|
161 |
+
use_split_spk_id: false
|
162 |
+
use_uv: true
|
163 |
+
use_var_enc: false
|
164 |
+
val_check_interval: 2000
|
165 |
+
valid_num: 348
|
166 |
+
valid_set_name: valid
|
167 |
+
vocoder: pwg
|
168 |
+
vocoder_ckpt: ''
|
169 |
+
warmup_updates: 2000
|
170 |
+
weight_decay: 0
|
171 |
+
win_size: 512
|
172 |
+
work_dir: checkpoints/0102_xiaoma_pe
|
checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1863f12324e43783089ab933edeeb969106b851e30d71019ebbaa9b82099d82a
|
3 |
+
size 39141959
|
checkpoints/Unnamed/config.yaml
ADDED
@@ -0,0 +1,445 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
K_step: 1000
|
2 |
+
accumulate_grad_batches: 1
|
3 |
+
audio_num_mel_bins: 128
|
4 |
+
audio_sample_rate: 44100
|
5 |
+
binarization_args:
|
6 |
+
shuffle: false
|
7 |
+
with_align: true
|
8 |
+
with_f0: true
|
9 |
+
with_hubert: false
|
10 |
+
with_spk_embed: false
|
11 |
+
with_wav: false
|
12 |
+
binarizer_cls: preprocessing.SVCpre.SVCBinarizer
|
13 |
+
binary_data_dir: data/binary/Unnamed
|
14 |
+
check_val_every_n_epoch: 10
|
15 |
+
choose_test_manually: false
|
16 |
+
clip_grad_norm: 1
|
17 |
+
config_path: training/config_nsf.yaml
|
18 |
+
content_cond_steps: []
|
19 |
+
cwt_add_f0_loss: false
|
20 |
+
cwt_hidden_size: 128
|
21 |
+
cwt_layers: 2
|
22 |
+
cwt_loss: l1
|
23 |
+
cwt_std_scale: 0.8
|
24 |
+
datasets:
|
25 |
+
- opencpop
|
26 |
+
debug: false
|
27 |
+
dec_ffn_kernel_size: 9
|
28 |
+
dec_layers: 4
|
29 |
+
decay_steps: 40000
|
30 |
+
decoder_type: fft
|
31 |
+
dict_dir: ''
|
32 |
+
diff_decoder_type: wavenet
|
33 |
+
diff_loss_type: l2
|
34 |
+
dilation_cycle_length: 4
|
35 |
+
dropout: 0.1
|
36 |
+
ds_workers: 4
|
37 |
+
dur_enc_hidden_stride_kernel:
|
38 |
+
- 0,2,3
|
39 |
+
- 0,2,3
|
40 |
+
- 0,1,3
|
41 |
+
dur_loss: mse
|
42 |
+
dur_predictor_kernel: 3
|
43 |
+
dur_predictor_layers: 5
|
44 |
+
enc_ffn_kernel_size: 9
|
45 |
+
enc_layers: 4
|
46 |
+
encoder_K: 8
|
47 |
+
encoder_type: fft
|
48 |
+
endless_ds: false
|
49 |
+
f0_bin: 256
|
50 |
+
f0_max: 1100.0
|
51 |
+
f0_min: 40.0
|
52 |
+
ffn_act: gelu
|
53 |
+
ffn_padding: SAME
|
54 |
+
fft_size: 2048
|
55 |
+
fmax: 16000
|
56 |
+
fmin: 40
|
57 |
+
fs2_ckpt: ''
|
58 |
+
gaussian_start: true
|
59 |
+
gen_dir_name: ''
|
60 |
+
gen_tgt_spk_id: -1
|
61 |
+
hidden_size: 256
|
62 |
+
hop_size: 512
|
63 |
+
hubert_gpu: true
|
64 |
+
hubert_path: checkpoints/hubert/hubert_soft.pt
|
65 |
+
infer: false
|
66 |
+
keep_bins: 128
|
67 |
+
lambda_commit: 0.25
|
68 |
+
lambda_energy: 0.0
|
69 |
+
lambda_f0: 1.0
|
70 |
+
lambda_ph_dur: 0.3
|
71 |
+
lambda_sent_dur: 1.0
|
72 |
+
lambda_uv: 1.0
|
73 |
+
lambda_word_dur: 1.0
|
74 |
+
load_ckpt: ''
|
75 |
+
log_interval: 100
|
76 |
+
loud_norm: false
|
77 |
+
lr: 0.0008
|
78 |
+
max_beta: 0.02
|
79 |
+
max_epochs: 3000
|
80 |
+
max_eval_sentences: 1
|
81 |
+
max_eval_tokens: 60000
|
82 |
+
max_frames: 42000
|
83 |
+
max_input_tokens: 60000
|
84 |
+
max_sentences: 12
|
85 |
+
max_tokens: 128000
|
86 |
+
max_updates: 1000000
|
87 |
+
mel_loss: ssim:0.5|l1:0.5
|
88 |
+
mel_vmax: 1.5
|
89 |
+
mel_vmin: -6.0
|
90 |
+
min_level_db: -120
|
91 |
+
no_fs2: true
|
92 |
+
norm_type: gn
|
93 |
+
num_ckpt_keep: 10
|
94 |
+
num_heads: 2
|
95 |
+
num_sanity_val_steps: 1
|
96 |
+
num_spk: 1
|
97 |
+
num_test_samples: 0
|
98 |
+
num_valid_plots: 10
|
99 |
+
optimizer_adam_beta1: 0.9
|
100 |
+
optimizer_adam_beta2: 0.98
|
101 |
+
out_wav_norm: false
|
102 |
+
pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
|
103 |
+
pe_enable: false
|
104 |
+
perform_enhance: true
|
105 |
+
pitch_ar: false
|
106 |
+
pitch_enc_hidden_stride_kernel:
|
107 |
+
- 0,2,5
|
108 |
+
- 0,2,5
|
109 |
+
- 0,2,5
|
110 |
+
pitch_extractor: parselmouth
|
111 |
+
pitch_loss: l2
|
112 |
+
pitch_norm: log
|
113 |
+
pitch_type: frame
|
114 |
+
pndm_speedup: 10
|
115 |
+
pre_align_args:
|
116 |
+
allow_no_txt: false
|
117 |
+
denoise: false
|
118 |
+
forced_align: mfa
|
119 |
+
txt_processor: zh_g2pM
|
120 |
+
use_sox: true
|
121 |
+
use_tone: false
|
122 |
+
pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
|
123 |
+
predictor_dropout: 0.5
|
124 |
+
predictor_grad: 0.1
|
125 |
+
predictor_hidden: -1
|
126 |
+
predictor_kernel: 5
|
127 |
+
predictor_layers: 5
|
128 |
+
prenet_dropout: 0.5
|
129 |
+
prenet_hidden_size: 256
|
130 |
+
pretrain_fs_ckpt: ''
|
131 |
+
processed_data_dir: xxx
|
132 |
+
profile_infer: false
|
133 |
+
raw_data_dir: data/raw/Unnamed
|
134 |
+
ref_norm_layer: bn
|
135 |
+
rel_pos: true
|
136 |
+
reset_phone_dict: true
|
137 |
+
residual_channels: 384
|
138 |
+
residual_layers: 20
|
139 |
+
save_best: false
|
140 |
+
save_ckpt: true
|
141 |
+
save_codes:
|
142 |
+
- configs
|
143 |
+
- modules
|
144 |
+
- src
|
145 |
+
- utils
|
146 |
+
save_f0: true
|
147 |
+
save_gt: false
|
148 |
+
schedule_type: linear
|
149 |
+
seed: 1234
|
150 |
+
sort_by_len: true
|
151 |
+
speaker_id: Unnamed
|
152 |
+
spec_max:
|
153 |
+
- -0.025250941514968872
|
154 |
+
- 0.004534448496997356
|
155 |
+
- 0.5684943795204163
|
156 |
+
- 0.6527385115623474
|
157 |
+
- 0.659079372882843
|
158 |
+
- 0.7416915893554688
|
159 |
+
- 0.844637930393219
|
160 |
+
- 0.806076169013977
|
161 |
+
- 0.7238750457763672
|
162 |
+
- 0.9744535088539124
|
163 |
+
- 0.9476388692855835
|
164 |
+
- 0.9883336424827576
|
165 |
+
- 1.0821290016174316
|
166 |
+
- 1.046391248703003
|
167 |
+
- 0.9829667806625366
|
168 |
+
- 1.0163493156433105
|
169 |
+
- 0.9825412631034851
|
170 |
+
- 0.9834834337234497
|
171 |
+
- 1.052114725112915
|
172 |
+
- 1.128888726234436
|
173 |
+
- 1.186057209968567
|
174 |
+
- 1.112004280090332
|
175 |
+
- 1.1282787322998047
|
176 |
+
- 1.051572322845459
|
177 |
+
- 1.1104764938354492
|
178 |
+
- 1.176831603050232
|
179 |
+
- 1.13348388671875
|
180 |
+
- 0.9916292428970337
|
181 |
+
- 0.8383486270904541
|
182 |
+
- 0.7735869288444519
|
183 |
+
- 0.9303848743438721
|
184 |
+
- 1.1257890462875366
|
185 |
+
- 1.1610286235809326
|
186 |
+
- 1.0335885286331177
|
187 |
+
- 1.0645352602005005
|
188 |
+
- 1.0619306564331055
|
189 |
+
- 1.1310148239135742
|
190 |
+
- 1.1191954612731934
|
191 |
+
- 1.1307402849197388
|
192 |
+
- 1.2094721794128418
|
193 |
+
- 1.2683185338974
|
194 |
+
- 1.1045044660568237
|
195 |
+
- 1.0479614734649658
|
196 |
+
- 0.9491603374481201
|
197 |
+
- 0.9858523011207581
|
198 |
+
- 0.9226155281066895
|
199 |
+
- 0.9469702839851379
|
200 |
+
- 1.023751139640808
|
201 |
+
- 1.1348609924316406
|
202 |
+
- 1.087107539176941
|
203 |
+
- 0.962234377861023
|
204 |
+
- 0.8551340699195862
|
205 |
+
- 0.8397778272628784
|
206 |
+
- 0.8908605575561523
|
207 |
+
- 0.7986546158790588
|
208 |
+
- 0.7983465194702148
|
209 |
+
- 0.6965265274047852
|
210 |
+
- 0.689120352268219
|
211 |
+
- 0.6862147450447083
|
212 |
+
- 0.5631484985351562
|
213 |
+
- 0.48587048053741455
|
214 |
+
- 0.5326520800590515
|
215 |
+
- 0.4286036193370819
|
216 |
+
- 0.35252484679222107
|
217 |
+
- 0.3290073573589325
|
218 |
+
- 0.4754445552825928
|
219 |
+
- 0.3632410168647766
|
220 |
+
- 0.391481876373291
|
221 |
+
- 0.2200046181678772
|
222 |
+
- 0.1869768500328064
|
223 |
+
- 0.1539602279663086
|
224 |
+
- 0.07932852953672409
|
225 |
+
- 0.012834634631872177
|
226 |
+
- 0.16596835851669312
|
227 |
+
- 0.10024689882993698
|
228 |
+
- -0.023952053859829903
|
229 |
+
- 0.05635542422533035
|
230 |
+
- 0.10877621918916702
|
231 |
+
- 0.0382893942296505
|
232 |
+
- 0.07318088412284851
|
233 |
+
- 0.14075303077697754
|
234 |
+
- 0.057870157063007355
|
235 |
+
- -0.0520513579249382
|
236 |
+
- 0.1741427332162857
|
237 |
+
- -0.11154910922050476
|
238 |
+
- 0.03305494412779808
|
239 |
+
- -0.022758174687623978
|
240 |
+
- -0.05313302204012871
|
241 |
+
- 0.00024538111756555736
|
242 |
+
- -0.26880618929862976
|
243 |
+
- -0.0825519785284996
|
244 |
+
- -0.3040400445461273
|
245 |
+
- -0.44150036573410034
|
246 |
+
- -0.36957985162734985
|
247 |
+
- -0.438098281621933
|
248 |
+
- -0.49879470467567444
|
249 |
+
- -0.5903350710868835
|
250 |
+
- -0.6418567895889282
|
251 |
+
- -0.6425778865814209
|
252 |
+
- -0.6178902387619019
|
253 |
+
- -0.47356730699539185
|
254 |
+
- -0.6052739024162292
|
255 |
+
- -0.5359307527542114
|
256 |
+
- -0.5759448409080505
|
257 |
+
- -0.5498068332672119
|
258 |
+
- -0.4661938548088074
|
259 |
+
- -0.5811225771903992
|
260 |
+
- -0.5229856967926025
|
261 |
+
- -0.3902229070663452
|
262 |
+
- -0.7037366032600403
|
263 |
+
- -0.7260795831680298
|
264 |
+
- -0.7540019750595093
|
265 |
+
- -0.828707754611969
|
266 |
+
- -0.8374698758125305
|
267 |
+
- -0.8328713178634644
|
268 |
+
- -0.9081047177314758
|
269 |
+
- -0.9679695963859558
|
270 |
+
- -0.9587443470954895
|
271 |
+
- -1.0706337690353394
|
272 |
+
- -0.9818469285964966
|
273 |
+
- -0.8360191583633423
|
274 |
+
- -0.9938982725143433
|
275 |
+
- -1.0823708772659302
|
276 |
+
- -1.0617167949676514
|
277 |
+
- -1.1093820333480835
|
278 |
+
- -1.1300138235092163
|
279 |
+
- -1.2141350507736206
|
280 |
+
- -1.3147293329238892
|
281 |
+
spec_min:
|
282 |
+
- -4.473258972167969
|
283 |
+
- -4.296891689300537
|
284 |
+
- -4.390527725219727
|
285 |
+
- -4.350704669952393
|
286 |
+
- -4.446024417877197
|
287 |
+
- -4.3960185050964355
|
288 |
+
- -4.164802551269531
|
289 |
+
- -4.5063300132751465
|
290 |
+
- -4.608232021331787
|
291 |
+
- -4.251623630523682
|
292 |
+
- -4.4799604415893555
|
293 |
+
- -4.733210563659668
|
294 |
+
- -4.411860466003418
|
295 |
+
- -4.609100818634033
|
296 |
+
- -4.726972579956055
|
297 |
+
- -4.497627258300781
|
298 |
+
- -4.487612247467041
|
299 |
+
- -4.665065765380859
|
300 |
+
- -4.480506896972656
|
301 |
+
- -4.589383125305176
|
302 |
+
- -4.86366605758667
|
303 |
+
- -4.5183892250061035
|
304 |
+
- -4.816161632537842
|
305 |
+
- -4.906436443328857
|
306 |
+
- -4.897279262542725
|
307 |
+
- -4.431278705596924
|
308 |
+
- -4.999994277954102
|
309 |
+
- -4.871325969696045
|
310 |
+
- -4.527368068695068
|
311 |
+
- -4.872085094451904
|
312 |
+
- -4.894851207733154
|
313 |
+
- -4.511948585510254
|
314 |
+
- -4.534575939178467
|
315 |
+
- -4.57792854309082
|
316 |
+
- -4.444681644439697
|
317 |
+
- -4.996480464935303
|
318 |
+
- -4.74341344833374
|
319 |
+
- -4.85427713394165
|
320 |
+
- -4.723776817321777
|
321 |
+
- -4.7166008949279785
|
322 |
+
- -4.749168395996094
|
323 |
+
- -4.67240047454834
|
324 |
+
- -4.590690612792969
|
325 |
+
- -4.576009750366211
|
326 |
+
- -4.542308330535889
|
327 |
+
- -4.890907287597656
|
328 |
+
- -4.631724834442139
|
329 |
+
- -4.494126796722412
|
330 |
+
- -4.499763488769531
|
331 |
+
- -4.574635028839111
|
332 |
+
- -4.49362850189209
|
333 |
+
- -4.651009559631348
|
334 |
+
- -4.684722900390625
|
335 |
+
- -4.594520568847656
|
336 |
+
- -4.5510125160217285
|
337 |
+
- -4.616012096405029
|
338 |
+
- -4.561031341552734
|
339 |
+
- -4.633460521697998
|
340 |
+
- -4.541748046875
|
341 |
+
- -4.625052452087402
|
342 |
+
- -4.524572372436523
|
343 |
+
- -4.563175201416016
|
344 |
+
- -4.515830039978027
|
345 |
+
- -4.581448554992676
|
346 |
+
- -4.556764125823975
|
347 |
+
- -4.695038795471191
|
348 |
+
- -4.548621654510498
|
349 |
+
- -4.5828471183776855
|
350 |
+
- -4.750834941864014
|
351 |
+
- -4.569651126861572
|
352 |
+
- -4.577111721038818
|
353 |
+
- -4.549272537231445
|
354 |
+
- -4.5840277671813965
|
355 |
+
- -4.574136257171631
|
356 |
+
- -4.574832439422607
|
357 |
+
- -4.549546718597412
|
358 |
+
- -4.490700721740723
|
359 |
+
- -4.635391712188721
|
360 |
+
- -4.567677974700928
|
361 |
+
- -4.516189098358154
|
362 |
+
- -4.6232805252075195
|
363 |
+
- -4.592589855194092
|
364 |
+
- -4.593951225280762
|
365 |
+
- -4.557478904724121
|
366 |
+
- -4.503338813781738
|
367 |
+
- -4.512742519378662
|
368 |
+
- -4.515079498291016
|
369 |
+
- -4.531710147857666
|
370 |
+
- -4.5540852546691895
|
371 |
+
- -4.441158771514893
|
372 |
+
- -4.489132404327393
|
373 |
+
- -4.519915580749512
|
374 |
+
- -4.570128917694092
|
375 |
+
- -4.480836391448975
|
376 |
+
- -4.494598865509033
|
377 |
+
- -4.51900053024292
|
378 |
+
- -4.518474578857422
|
379 |
+
- -4.519540309906006
|
380 |
+
- -4.495180130004883
|
381 |
+
- -4.471179962158203
|
382 |
+
- -4.478188514709473
|
383 |
+
- -4.475483417510986
|
384 |
+
- -4.479583263397217
|
385 |
+
- -4.491953372955322
|
386 |
+
- -4.4339680671691895
|
387 |
+
- -4.469926834106445
|
388 |
+
- -4.46633768081665
|
389 |
+
- -4.468038082122803
|
390 |
+
- -4.489401817321777
|
391 |
+
- -4.472512722015381
|
392 |
+
- -4.43712043762207
|
393 |
+
- -4.469909191131592
|
394 |
+
- -4.475585460662842
|
395 |
+
- -4.460614204406738
|
396 |
+
- -4.4658942222595215
|
397 |
+
- -4.4960408210754395
|
398 |
+
- -4.499384880065918
|
399 |
+
- -4.4431610107421875
|
400 |
+
- -4.440634727478027
|
401 |
+
- -4.468203544616699
|
402 |
+
- -4.461722373962402
|
403 |
+
- -4.503596305847168
|
404 |
+
- -4.457762241363525
|
405 |
+
- -4.453769207000732
|
406 |
+
- -4.509873390197754
|
407 |
+
- -4.505057334899902
|
408 |
+
- -4.486324787139893
|
409 |
+
- -4.49452018737793
|
410 |
+
spk_cond_steps: []
|
411 |
+
stop_token_weight: 5.0
|
412 |
+
task_cls: training.task.SVC_task.SVCTask
|
413 |
+
test_ids: []
|
414 |
+
test_input_dir: ''
|
415 |
+
test_num: 0
|
416 |
+
test_prefixes:
|
417 |
+
- test
|
418 |
+
test_set_name: test
|
419 |
+
timesteps: 1000
|
420 |
+
train_set_name: train
|
421 |
+
use_crepe: true
|
422 |
+
use_denoise: false
|
423 |
+
use_energy_embed: false
|
424 |
+
use_gt_dur: false
|
425 |
+
use_gt_f0: false
|
426 |
+
use_midi: false
|
427 |
+
use_nsf: true
|
428 |
+
use_pitch_embed: true
|
429 |
+
use_pos_embed: true
|
430 |
+
use_spk_embed: false
|
431 |
+
use_spk_id: false
|
432 |
+
use_split_spk_id: false
|
433 |
+
use_uv: false
|
434 |
+
use_var_enc: false
|
435 |
+
use_vec: false
|
436 |
+
val_check_interval: 2000
|
437 |
+
valid_num: 0
|
438 |
+
valid_set_name: valid
|
439 |
+
vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN
|
440 |
+
vocoder_ckpt: checkpoints/nsf_hifigan/model
|
441 |
+
warmup_updates: 2000
|
442 |
+
wav2spec_eps: 1e-6
|
443 |
+
weight_decay: 0
|
444 |
+
win_size: 2048
|
445 |
+
work_dir: checkpoints/Unnamed
|
checkpoints/Unnamed/config_nsf.yaml
ADDED
@@ -0,0 +1,445 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
K_step: 1000
|
2 |
+
accumulate_grad_batches: 1
|
3 |
+
audio_num_mel_bins: 128
|
4 |
+
audio_sample_rate: 44100
|
5 |
+
binarization_args:
|
6 |
+
shuffle: false
|
7 |
+
with_align: true
|
8 |
+
with_f0: true
|
9 |
+
with_hubert: true
|
10 |
+
with_spk_embed: false
|
11 |
+
with_wav: false
|
12 |
+
binarizer_cls: preprocessing.SVCpre.SVCBinarizer
|
13 |
+
binary_data_dir: data/binary/Unnamed
|
14 |
+
check_val_every_n_epoch: 10
|
15 |
+
choose_test_manually: false
|
16 |
+
clip_grad_norm: 1
|
17 |
+
config_path: training/config_nsf.yaml
|
18 |
+
content_cond_steps: []
|
19 |
+
cwt_add_f0_loss: false
|
20 |
+
cwt_hidden_size: 128
|
21 |
+
cwt_layers: 2
|
22 |
+
cwt_loss: l1
|
23 |
+
cwt_std_scale: 0.8
|
24 |
+
datasets:
|
25 |
+
- opencpop
|
26 |
+
debug: false
|
27 |
+
dec_ffn_kernel_size: 9
|
28 |
+
dec_layers: 4
|
29 |
+
decay_steps: 20000
|
30 |
+
decoder_type: fft
|
31 |
+
dict_dir: ''
|
32 |
+
diff_decoder_type: wavenet
|
33 |
+
diff_loss_type: l2
|
34 |
+
dilation_cycle_length: 4
|
35 |
+
dropout: 0.1
|
36 |
+
ds_workers: 4
|
37 |
+
dur_enc_hidden_stride_kernel:
|
38 |
+
- 0,2,3
|
39 |
+
- 0,2,3
|
40 |
+
- 0,1,3
|
41 |
+
dur_loss: mse
|
42 |
+
dur_predictor_kernel: 3
|
43 |
+
dur_predictor_layers: 5
|
44 |
+
enc_ffn_kernel_size: 9
|
45 |
+
enc_layers: 4
|
46 |
+
encoder_K: 8
|
47 |
+
encoder_type: fft
|
48 |
+
endless_ds: false
|
49 |
+
f0_bin: 256
|
50 |
+
f0_max: 1100.0
|
51 |
+
f0_min: 40.0
|
52 |
+
ffn_act: gelu
|
53 |
+
ffn_padding: SAME
|
54 |
+
fft_size: 2048
|
55 |
+
fmax: 16000
|
56 |
+
fmin: 40
|
57 |
+
fs2_ckpt: ''
|
58 |
+
gaussian_start: true
|
59 |
+
gen_dir_name: ''
|
60 |
+
gen_tgt_spk_id: -1
|
61 |
+
hidden_size: 256
|
62 |
+
hop_size: 512
|
63 |
+
hubert_gpu: true
|
64 |
+
hubert_path: checkpoints/hubert/hubert_soft.pt
|
65 |
+
infer: false
|
66 |
+
keep_bins: 128
|
67 |
+
lambda_commit: 0.25
|
68 |
+
lambda_energy: 0.0
|
69 |
+
lambda_f0: 1.0
|
70 |
+
lambda_ph_dur: 0.3
|
71 |
+
lambda_sent_dur: 1.0
|
72 |
+
lambda_uv: 1.0
|
73 |
+
lambda_word_dur: 1.0
|
74 |
+
load_ckpt: pretrain/nehito_ckpt_steps_1000000.ckpt
|
75 |
+
log_interval: 100
|
76 |
+
loud_norm: false
|
77 |
+
lr: 5.0e-05
|
78 |
+
max_beta: 0.02
|
79 |
+
max_epochs: 3000
|
80 |
+
max_eval_sentences: 1
|
81 |
+
max_eval_tokens: 60000
|
82 |
+
max_frames: 42000
|
83 |
+
max_input_tokens: 60000
|
84 |
+
max_sentences: 12
|
85 |
+
max_tokens: 128000
|
86 |
+
max_updates: 1000000
|
87 |
+
mel_loss: ssim:0.5|l1:0.5
|
88 |
+
mel_vmax: 1.5
|
89 |
+
mel_vmin: -6.0
|
90 |
+
min_level_db: -120
|
91 |
+
no_fs2: true
|
92 |
+
norm_type: gn
|
93 |
+
num_ckpt_keep: 10
|
94 |
+
num_heads: 2
|
95 |
+
num_sanity_val_steps: 1
|
96 |
+
num_spk: 1
|
97 |
+
num_test_samples: 0
|
98 |
+
num_valid_plots: 10
|
99 |
+
optimizer_adam_beta1: 0.9
|
100 |
+
optimizer_adam_beta2: 0.98
|
101 |
+
out_wav_norm: false
|
102 |
+
pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
|
103 |
+
pe_enable: false
|
104 |
+
perform_enhance: true
|
105 |
+
pitch_ar: false
|
106 |
+
pitch_enc_hidden_stride_kernel:
|
107 |
+
- 0,2,5
|
108 |
+
- 0,2,5
|
109 |
+
- 0,2,5
|
110 |
+
pitch_extractor: parselmouth
|
111 |
+
pitch_loss: l2
|
112 |
+
pitch_norm: log
|
113 |
+
pitch_type: frame
|
114 |
+
pndm_speedup: 10
|
115 |
+
pre_align_args:
|
116 |
+
allow_no_txt: false
|
117 |
+
denoise: false
|
118 |
+
forced_align: mfa
|
119 |
+
txt_processor: zh_g2pM
|
120 |
+
use_sox: true
|
121 |
+
use_tone: false
|
122 |
+
pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
|
123 |
+
predictor_dropout: 0.5
|
124 |
+
predictor_grad: 0.1
|
125 |
+
predictor_hidden: -1
|
126 |
+
predictor_kernel: 5
|
127 |
+
predictor_layers: 5
|
128 |
+
prenet_dropout: 0.5
|
129 |
+
prenet_hidden_size: 256
|
130 |
+
pretrain_fs_ckpt: ''
|
131 |
+
processed_data_dir: xxx
|
132 |
+
profile_infer: false
|
133 |
+
raw_data_dir: data/raw/Unnamed
|
134 |
+
ref_norm_layer: bn
|
135 |
+
rel_pos: true
|
136 |
+
reset_phone_dict: true
|
137 |
+
residual_channels: 384
|
138 |
+
residual_layers: 20
|
139 |
+
save_best: false
|
140 |
+
save_ckpt: true
|
141 |
+
save_codes:
|
142 |
+
- configs
|
143 |
+
- modules
|
144 |
+
- src
|
145 |
+
- utils
|
146 |
+
save_f0: true
|
147 |
+
save_gt: false
|
148 |
+
schedule_type: linear
|
149 |
+
seed: 1234
|
150 |
+
sort_by_len: true
|
151 |
+
speaker_id: Unnamed
|
152 |
+
spec_max:
|
153 |
+
- -0.4884430170059204
|
154 |
+
- 0.004534448496997356
|
155 |
+
- 0.5684943795204163
|
156 |
+
- 0.6527385115623474
|
157 |
+
- 0.659079372882843
|
158 |
+
- 0.7416915893554688
|
159 |
+
- 0.844637930393219
|
160 |
+
- 0.806076169013977
|
161 |
+
- 0.7238750457763672
|
162 |
+
- 0.9744535088539124
|
163 |
+
- 0.9476388692855835
|
164 |
+
- 0.9883336424827576
|
165 |
+
- 1.0821290016174316
|
166 |
+
- 1.046391248703003
|
167 |
+
- 0.9829667806625366
|
168 |
+
- 1.0163493156433105
|
169 |
+
- 0.9825412631034851
|
170 |
+
- 0.9834834337234497
|
171 |
+
- 0.9811502695083618
|
172 |
+
- 1.128888726234436
|
173 |
+
- 1.186057209968567
|
174 |
+
- 1.112004280090332
|
175 |
+
- 1.1282787322998047
|
176 |
+
- 1.051572322845459
|
177 |
+
- 1.0510444641113281
|
178 |
+
- 1.0110565423965454
|
179 |
+
- 0.9236567616462708
|
180 |
+
- 0.8036720156669617
|
181 |
+
- 0.8383486270904541
|
182 |
+
- 0.7735869288444519
|
183 |
+
- 0.9303848743438721
|
184 |
+
- 1.1257890462875366
|
185 |
+
- 1.1610286235809326
|
186 |
+
- 1.0335885286331177
|
187 |
+
- 1.0645352602005005
|
188 |
+
- 1.0619306564331055
|
189 |
+
- 1.1310148239135742
|
190 |
+
- 1.1191954612731934
|
191 |
+
- 1.1307402849197388
|
192 |
+
- 0.8837698698043823
|
193 |
+
- 1.1153966188430786
|
194 |
+
- 1.1045044660568237
|
195 |
+
- 1.0479614734649658
|
196 |
+
- 0.9491603374481201
|
197 |
+
- 0.9858523011207581
|
198 |
+
- 0.9226155281066895
|
199 |
+
- 0.9469702839851379
|
200 |
+
- 0.8791896104812622
|
201 |
+
- 0.997624933719635
|
202 |
+
- 0.9068642854690552
|
203 |
+
- 0.9575618505477905
|
204 |
+
- 0.8551340699195862
|
205 |
+
- 0.8397778272628784
|
206 |
+
- 0.8908605575561523
|
207 |
+
- 0.7986546158790588
|
208 |
+
- 0.7983465194702148
|
209 |
+
- 0.6965265274047852
|
210 |
+
- 0.640673041343689
|
211 |
+
- 0.6690735220909119
|
212 |
+
- 0.5631484985351562
|
213 |
+
- 0.48587048053741455
|
214 |
+
- 0.5326520800590515
|
215 |
+
- 0.4286036193370819
|
216 |
+
- 0.35252484679222107
|
217 |
+
- 0.3290073573589325
|
218 |
+
- 0.4754445552825928
|
219 |
+
- 0.3632410168647766
|
220 |
+
- 0.391481876373291
|
221 |
+
- 0.20288512110710144
|
222 |
+
- 0.18305960297584534
|
223 |
+
- 0.1539602279663086
|
224 |
+
- 0.03451670706272125
|
225 |
+
- -0.16881510615348816
|
226 |
+
- -0.02030198462307453
|
227 |
+
- 0.10024689882993698
|
228 |
+
- -0.023952053859829903
|
229 |
+
- 0.05635542422533035
|
230 |
+
- 0.10877621918916702
|
231 |
+
- 0.006155031267553568
|
232 |
+
- 0.07318088412284851
|
233 |
+
- 0.14075303077697754
|
234 |
+
- 0.057870157063007355
|
235 |
+
- -0.0520513579249382
|
236 |
+
- 0.1741427332162857
|
237 |
+
- -0.11464552581310272
|
238 |
+
- 0.03305494412779808
|
239 |
+
- -0.06897418200969696
|
240 |
+
- -0.12598733603954315
|
241 |
+
- -0.09894973039627075
|
242 |
+
- -0.2817802429199219
|
243 |
+
- -0.0825519785284996
|
244 |
+
- -0.3040400445461273
|
245 |
+
- -0.4998124837875366
|
246 |
+
- -0.36957985162734985
|
247 |
+
- -0.5409602522850037
|
248 |
+
- -0.49879470467567444
|
249 |
+
- -0.713716983795166
|
250 |
+
- -0.6545754671096802
|
251 |
+
- -0.6425778865814209
|
252 |
+
- -0.6178902387619019
|
253 |
+
- -0.47356730699539185
|
254 |
+
- -0.6165243983268738
|
255 |
+
- -0.5841533541679382
|
256 |
+
- -0.5759448409080505
|
257 |
+
- -0.5498068332672119
|
258 |
+
- -0.4661938548088074
|
259 |
+
- -0.5811225771903992
|
260 |
+
- -0.614664614200592
|
261 |
+
- -0.3902229070663452
|
262 |
+
- -0.7037366032600403
|
263 |
+
- -0.7260795831680298
|
264 |
+
- -0.7540019750595093
|
265 |
+
- -0.8360528945922852
|
266 |
+
- -0.8374698758125305
|
267 |
+
- -0.8328713178634644
|
268 |
+
- -0.9081047177314758
|
269 |
+
- -0.9679695963859558
|
270 |
+
- -0.9587443470954895
|
271 |
+
- -1.0706337690353394
|
272 |
+
- -0.9818469285964966
|
273 |
+
- -0.8360191583633423
|
274 |
+
- -0.9938981533050537
|
275 |
+
- -1.0823708772659302
|
276 |
+
- -1.0617167949676514
|
277 |
+
- -1.1093820333480835
|
278 |
+
- -1.1300138235092163
|
279 |
+
- -1.2141350507736206
|
280 |
+
- -1.3147293329238892
|
281 |
+
spec_min:
|
282 |
+
- -4.473258972167969
|
283 |
+
- -4.244492530822754
|
284 |
+
- -4.390527725219727
|
285 |
+
- -4.209497928619385
|
286 |
+
- -4.446024417877197
|
287 |
+
- -4.3960185050964355
|
288 |
+
- -4.164802551269531
|
289 |
+
- -4.5063300132751465
|
290 |
+
- -4.608232021331787
|
291 |
+
- -4.251623630523682
|
292 |
+
- -4.4799604415893555
|
293 |
+
- -4.733210563659668
|
294 |
+
- -4.411860466003418
|
295 |
+
- -4.609100818634033
|
296 |
+
- -4.726972579956055
|
297 |
+
- -4.428761959075928
|
298 |
+
- -4.487612247467041
|
299 |
+
- -4.525552749633789
|
300 |
+
- -4.480506896972656
|
301 |
+
- -4.589383125305176
|
302 |
+
- -4.608384132385254
|
303 |
+
- -4.385376453399658
|
304 |
+
- -4.816161632537842
|
305 |
+
- -4.8706955909729
|
306 |
+
- -4.848956108093262
|
307 |
+
- -4.431278705596924
|
308 |
+
- -4.999994277954102
|
309 |
+
- -4.818373203277588
|
310 |
+
- -4.527368068695068
|
311 |
+
- -4.872085094451904
|
312 |
+
- -4.894851207733154
|
313 |
+
- -4.511948585510254
|
314 |
+
- -4.534575939178467
|
315 |
+
- -4.57792854309082
|
316 |
+
- -4.444681644439697
|
317 |
+
- -4.628803253173828
|
318 |
+
- -4.74341344833374
|
319 |
+
- -4.85427713394165
|
320 |
+
- -4.723776817321777
|
321 |
+
- -4.7166008949279785
|
322 |
+
- -4.749168395996094
|
323 |
+
- -4.67240047454834
|
324 |
+
- -4.590690612792969
|
325 |
+
- -4.576009750366211
|
326 |
+
- -4.542308330535889
|
327 |
+
- -4.890907287597656
|
328 |
+
- -4.613001823425293
|
329 |
+
- -4.494126796722412
|
330 |
+
- -4.474257946014404
|
331 |
+
- -4.574635028839111
|
332 |
+
- -4.4817585945129395
|
333 |
+
- -4.651009559631348
|
334 |
+
- -4.478254795074463
|
335 |
+
- -4.523812770843506
|
336 |
+
- -4.546536922454834
|
337 |
+
- -4.535660266876221
|
338 |
+
- -4.470296859741211
|
339 |
+
- -4.577486991882324
|
340 |
+
- -4.541748046875
|
341 |
+
- -4.428532123565674
|
342 |
+
- -4.461862564086914
|
343 |
+
- -4.489077091217041
|
344 |
+
- -4.515830039978027
|
345 |
+
- -4.395663738250732
|
346 |
+
- -4.439975738525391
|
347 |
+
- -4.4290876388549805
|
348 |
+
- -4.397741794586182
|
349 |
+
- -4.478252410888672
|
350 |
+
- -4.399686336517334
|
351 |
+
- -4.45617151260376
|
352 |
+
- -4.434477806091309
|
353 |
+
- -4.442898750305176
|
354 |
+
- -4.5840277671813965
|
355 |
+
- -4.537542819976807
|
356 |
+
- -4.492046356201172
|
357 |
+
- -4.534677505493164
|
358 |
+
- -4.477104187011719
|
359 |
+
- -4.511618614196777
|
360 |
+
- -4.387601375579834
|
361 |
+
- -4.499236106872559
|
362 |
+
- -4.3717169761657715
|
363 |
+
- -4.4242024421691895
|
364 |
+
- -4.4055657386779785
|
365 |
+
- -4.429355144500732
|
366 |
+
- -4.4636993408203125
|
367 |
+
- -4.508528232574463
|
368 |
+
- -4.515079498291016
|
369 |
+
- -4.426190376281738
|
370 |
+
- -4.433525085449219
|
371 |
+
- -4.4200215339660645
|
372 |
+
- -4.421280860900879
|
373 |
+
- -4.400143623352051
|
374 |
+
- -4.419166088104248
|
375 |
+
- -4.429825305938721
|
376 |
+
- -4.436781406402588
|
377 |
+
- -4.51550817489624
|
378 |
+
- -4.518474578857422
|
379 |
+
- -4.495880603790283
|
380 |
+
- -4.483924865722656
|
381 |
+
- -4.409562587738037
|
382 |
+
- -4.3811845779418945
|
383 |
+
- -4.411908149719238
|
384 |
+
- -4.427165985107422
|
385 |
+
- -4.396549701690674
|
386 |
+
- -4.340637683868408
|
387 |
+
- -4.405435085296631
|
388 |
+
- -4.367630481719971
|
389 |
+
- -4.419083595275879
|
390 |
+
- -4.389026165008545
|
391 |
+
- -4.371067047119141
|
392 |
+
- -4.370710372924805
|
393 |
+
- -4.3755269050598145
|
394 |
+
- -4.39500093460083
|
395 |
+
- -4.451773166656494
|
396 |
+
- -4.365351676940918
|
397 |
+
- -4.348028182983398
|
398 |
+
- -4.408270359039307
|
399 |
+
- -4.390385627746582
|
400 |
+
- -4.347931861877441
|
401 |
+
- -4.378237247467041
|
402 |
+
- -4.426717758178711
|
403 |
+
- -4.364233493804932
|
404 |
+
- -4.371546745300293
|
405 |
+
- -4.402477264404297
|
406 |
+
- -4.430750846862793
|
407 |
+
- -4.404538154602051
|
408 |
+
- -4.384459018707275
|
409 |
+
- -4.401677131652832
|
410 |
+
spk_cond_steps: []
|
411 |
+
stop_token_weight: 5.0
|
412 |
+
task_cls: training.task.SVC_task.SVCTask
|
413 |
+
test_ids: []
|
414 |
+
test_input_dir: ''
|
415 |
+
test_num: 0
|
416 |
+
test_prefixes:
|
417 |
+
- test
|
418 |
+
test_set_name: test
|
419 |
+
timesteps: 1000
|
420 |
+
train_set_name: train
|
421 |
+
use_crepe: false
|
422 |
+
use_denoise: false
|
423 |
+
use_energy_embed: false
|
424 |
+
use_gt_dur: false
|
425 |
+
use_gt_f0: false
|
426 |
+
use_midi: false
|
427 |
+
use_nsf: true
|
428 |
+
use_pitch_embed: true
|
429 |
+
use_pos_embed: true
|
430 |
+
use_spk_embed: false
|
431 |
+
use_spk_id: false
|
432 |
+
use_split_spk_id: false
|
433 |
+
use_uv: false
|
434 |
+
use_var_enc: false
|
435 |
+
use_vec: false
|
436 |
+
val_check_interval: 1000
|
437 |
+
valid_num: 0
|
438 |
+
valid_set_name: valid
|
439 |
+
vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN
|
440 |
+
vocoder_ckpt: checkpoints/nsf_hifigan/model
|
441 |
+
warmup_updates: 2000
|
442 |
+
wav2spec_eps: 1e-6
|
443 |
+
weight_decay: 0
|
444 |
+
win_size: 2048
|
445 |
+
work_dir: checkpoints/HokoHifi
|
checkpoints/Unnamed/lightning_logs/lastest/hparams.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
checkpoints/Unnamed/model_ckpt_steps_192000.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c441462923580893a6170dd00126084be0a20b387b1c4fb1860755acd36c881b
|
3 |
+
size 391390823
|
checkpoints/nsf_hifigan/NOTICE.txt
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--- DiffSinger Community Vocoder ---
|
2 |
+
|
3 |
+
ARCHITECTURE: NSF-HiFiGAN
|
4 |
+
RELEASE DATE: 2022-12-11
|
5 |
+
|
6 |
+
HYPER PARAMETERS:
|
7 |
+
- 44100 sample rate
|
8 |
+
- 128 mel bins
|
9 |
+
- 512 hop size
|
10 |
+
- 2048 window size
|
11 |
+
- fmin at 40Hz
|
12 |
+
- fmax at 16000Hz
|
13 |
+
|
14 |
+
|
15 |
+
NOTICE:
|
16 |
+
|
17 |
+
All model weights in the [DiffSinger Community Vocoder Project](https://openvpi.github.io/vocoders/), including
|
18 |
+
model weights in this directory, are provided by the [OpenVPI Team](https://github.com/openvpi/), under the
|
19 |
+
[Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
|
20 |
+
|
21 |
+
|
22 |
+
ACKNOWLEDGEMENTS:
|
23 |
+
|
24 |
+
Training data of this vocoder is provided and permitted by the following organizations, societies and individuals:
|
25 |
+
|
26 |
+
孙飒 https://www.qfssr.cn
|
27 |
+
赤松_Akamatsu https://www.zhibin.club
|
28 |
+
乐威 https://www.zhibin.club
|
29 |
+
伯添 https://space.bilibili.com/24087011
|
30 |
+
雲宇光 https://space.bilibili.com/660675050
|
31 |
+
橙子言 https://space.bilibili.com/318486464
|
32 |
+
人衣大人 https://space.bilibili.com/2270344
|
33 |
+
玖蝶 https://space.bilibili.com/676771003
|
34 |
+
Yuuko
|
35 |
+
白夜零BYL https://space.bilibili.com/1605040503
|
36 |
+
嗷天 https://space.bilibili.com/5675252
|
37 |
+
洛泠羽 https://space.bilibili.com/347373318
|
38 |
+
灰条纹的灰猫君 https://space.bilibili.com/2083633
|
39 |
+
幽寂 https://space.bilibili.com/478860
|
40 |
+
恶魔王女 https://space.bilibili.com/2475098
|
41 |
+
AlexYHX 芮晴
|
42 |
+
绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
|
43 |
+
诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
|
44 |
+
汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
|
45 |
+
1262917464
|
46 |
+
炜阳
|
47 |
+
叶卡yolka
|
48 |
+
幸の夏 https://space.bilibili.com/1017297686
|
49 |
+
暮色未量 https://space.bilibili.com/272904686
|
50 |
+
晓寞sama https://space.bilibili.com/3463394
|
51 |
+
没头绪的节操君
|
52 |
+
串串BunC https://space.bilibili.com/95817834
|
53 |
+
落雨 https://space.bilibili.com/1292427
|
54 |
+
长尾巴的翎艾 https://space.bilibili.com/1638666
|
55 |
+
声闻计划 https://space.bilibili.com/392812269
|
56 |
+
唐家大小姐 http://5sing.kugou.com/palmusic/default.html
|
57 |
+
不伊子
|
58 |
+
|
59 |
+
Training machines are provided by:
|
60 |
+
|
61 |
+
花儿不哭 https://space.bilibili.com/5760446
|
62 |
+
|
63 |
+
|
64 |
+
TERMS OF REDISTRIBUTIONS:
|
65 |
+
|
66 |
+
1. Do not sell this vocoder, or charge any fees from redistributing it, as prohibited by
|
67 |
+
the license.
|
68 |
+
2. Include a copy of the CC BY-NC-SA 4.0 license, or a link referring to it.
|
69 |
+
3. Include a copy of this notice, or any other notices informing that this vocoder is
|
70 |
+
provided by the OpenVPI Team, that this vocoder is licensed under CC BY-NC-SA 4.0, and
|
71 |
+
with a complete acknowledgement list as shown above.
|
72 |
+
4. If you fine-tuned or modified the weights, leave a notice about what has been changed.
|
73 |
+
5. (Optional) Leave a link to the official release page of the vocoder, and tell users
|
74 |
+
that other versions and future updates of this vocoder can be obtained from the website.
|
checkpoints/nsf_hifigan/NOTICE.zh-CN.txt
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--- DiffSinger 社区声码器 ---
|
2 |
+
|
3 |
+
架构:NSF-HiFiGAN
|
4 |
+
发布日期:2022-12-11
|
5 |
+
|
6 |
+
超参数:
|
7 |
+
- 44100 sample rate
|
8 |
+
- 128 mel bins
|
9 |
+
- 512 hop size
|
10 |
+
- 2048 window size
|
11 |
+
- fmin at 40Hz
|
12 |
+
- fmax at 16000Hz
|
13 |
+
|
14 |
+
|
15 |
+
注意事项:
|
16 |
+
|
17 |
+
[DiffSinger 社区声码器企划](https://openvpi.github.io/vocoders/) 中的所有模型权重,
|
18 |
+
包括此目录下的模型权重,均由 [OpenVPI Team](https://github.com/openvpi/) 提供,并基于
|
19 |
+
[Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/)
|
20 |
+
进行许可。
|
21 |
+
|
22 |
+
|
23 |
+
致谢:
|
24 |
+
|
25 |
+
此声码器的训练数据由以下组织、社团和个人提供并许可:
|
26 |
+
|
27 |
+
孙飒 https://www.qfssr.cn
|
28 |
+
赤松_Akamatsu https://www.zhibin.club
|
29 |
+
乐威 https://www.zhibin.club
|
30 |
+
伯添 https://space.bilibili.com/24087011
|
31 |
+
雲宇光 https://space.bilibili.com/660675050
|
32 |
+
橙子言 https://space.bilibili.com/318486464
|
33 |
+
人衣大人 https://space.bilibili.com/2270344
|
34 |
+
玖蝶 https://space.bilibili.com/676771003
|
35 |
+
Yuuko
|
36 |
+
白夜零BYL https://space.bilibili.com/1605040503
|
37 |
+
嗷天 https://space.bilibili.com/5675252
|
38 |
+
洛泠羽 https://space.bilibili.com/347373318
|
39 |
+
灰条纹的灰猫君 https://space.bilibili.com/2083633
|
40 |
+
幽寂 https://space.bilibili.com/478860
|
41 |
+
恶魔王女 https://space.bilibili.com/2475098
|
42 |
+
AlexYHX 芮晴
|
43 |
+
绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
|
44 |
+
诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
|
45 |
+
汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
|
46 |
+
1262917464
|
47 |
+
炜阳
|
48 |
+
叶卡yolka
|
49 |
+
幸の夏 https://space.bilibili.com/1017297686
|
50 |
+
暮色未量 https://space.bilibili.com/272904686
|
51 |
+
晓寞sama https://space.bilibili.com/3463394
|
52 |
+
没头绪的节操君
|
53 |
+
串串BunC https://space.bilibili.com/95817834
|
54 |
+
落雨 https://space.bilibili.com/1292427
|
55 |
+
长尾巴的翎艾 https://space.bilibili.com/1638666
|
56 |
+
声闻计划 https://space.bilibili.com/392812269
|
57 |
+
唐家大小姐 http://5sing.kugou.com/palmusic/default.html
|
58 |
+
不伊子
|
59 |
+
|
60 |
+
训练算力的提供者如下:
|
61 |
+
|
62 |
+
花儿不哭 https://space.bilibili.com/5760446
|
63 |
+
|
64 |
+
|
65 |
+
二次分发条款:
|
66 |
+
|
67 |
+
1. 请勿售卖此声码器或从其二次分发过程中收取任何费用,因为此类行为受到许可证的禁止。
|
68 |
+
2. 请在二次分发文件中包含一份 CC BY-NC-SA 4.0 许可证的副本或指向该许可证的链接。
|
69 |
+
3. 请在二次分发文件中包含这份声明,或以其他形式声明此声码器由 OpenVPI Team 提供并基于 CC BY-NC-SA 4.0 许可,
|
70 |
+
并附带上述完整的致谢名单。
|
71 |
+
4. 如果您微调或修改了权重,请留下一份关于其受到了何种修改的说明。
|
72 |
+
5.(可选)留下一份指向此声码器的官方发布页面的链接,并告知使用者可从该网站获取此声码器的其他版本和未来的更新。
|
checkpoints/nsf_hifigan/config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"resblock": "1",
|
3 |
+
"num_gpus": 4,
|
4 |
+
"batch_size": 10,
|
5 |
+
"learning_rate": 0.0002,
|
6 |
+
"adam_b1": 0.8,
|
7 |
+
"adam_b2": 0.99,
|
8 |
+
"lr_decay": 0.999,
|
9 |
+
"seed": 1234,
|
10 |
+
|
11 |
+
"upsample_rates": [ 8, 8, 2, 2, 2],
|
12 |
+
"upsample_kernel_sizes": [16,16, 4, 4, 4],
|
13 |
+
"upsample_initial_channel": 512,
|
14 |
+
"resblock_kernel_sizes": [3,7,11],
|
15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
16 |
+
"discriminator_periods": [3, 5, 7, 11, 17, 23, 37],
|
17 |
+
|
18 |
+
"segment_size": 16384,
|
19 |
+
"num_mels": 128,
|
20 |
+
"num_freq": 1025,
|
21 |
+
"n_fft" : 2048,
|
22 |
+
"hop_size": 512,
|
23 |
+
"win_size": 2048,
|
24 |
+
|
25 |
+
"sampling_rate": 44100,
|
26 |
+
|
27 |
+
"fmin": 40,
|
28 |
+
"fmax": 16000,
|
29 |
+
"fmax_for_loss": null,
|
30 |
+
|
31 |
+
"num_workers": 16,
|
32 |
+
|
33 |
+
"dist_config": {
|
34 |
+
"dist_backend": "nccl",
|
35 |
+
"dist_url": "tcp://localhost:54321",
|
36 |
+
"world_size": 1
|
37 |
+
}
|
38 |
+
}
|
checkpoints/nsf_hifigan/model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c576b63b7ed952161b70fad34e0562ace502ce689195520d8a2a6c051de29d6
|
3 |
+
size 56825430
|