Kangarroar commited on
Commit
632f309
1 Parent(s): ef5bee9

Upload 11 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  results/test_output.wav filter=lfs diff=lfs merge=lfs -text
36
  test_output.wav filter=lfs diff=lfs merge=lfs -text
 
 
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  results/test_output.wav filter=lfs diff=lfs merge=lfs -text
36
  test_output.wav filter=lfs diff=lfs merge=lfs -text
37
+ checkpoints/nsf_hifigan/model filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tkinter as tk
2
+ import tkinter.filedialog
3
+ import tkinter.ttk as ttk
4
+ import tkinter as tk
5
+ from tkinter import ttk
6
+ import wave
7
+ from utils.hparams import hparams
8
+ from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe
9
+ import numpy as np
10
+ import IPython.display as ipd
11
+ import utils
12
+ import librosa
13
+ import torchcrepe
14
+ from infer import *
15
+ import logging
16
+ from infer_tools.infer_tool import *
17
+ from tkinter import Label
18
+ from time import sleep
19
+ import os
20
+ # Create the main window
21
+ window = tk.Tk()
22
+
23
+ # Set the window title and size
24
+
25
+ window.title("Diff-SVC Rendering Tool")
26
+ window.geometry("250x400")
27
+
28
+ # console Textbox
29
+ textbox = tk.Text(window)
30
+ textbox.grid(row=4, column=0, padx=20, pady=20, sticky="nsew")
31
+
32
+ loading_animation_label = Label(window)
33
+ loading_animation_label.grid(row=5, column=0)
34
+
35
+ # Set the column and row to stretch to fill the available space
36
+ window.grid_columnconfigure(0, weight=1)
37
+ window.grid_rowconfigure(4, weight=1)
38
+
39
+ button1 = ttk.Button(window, text="Load Model")
40
+ button1.grid(row=0, column=0, padx=20, pady=20)
41
+
42
+ pb = ttk.Progressbar(
43
+ window,
44
+ orient='horizontal',
45
+ mode='indeterminate',
46
+ length=250
47
+ )
48
+ def start():
49
+ pb.grid(column=0, row=5, padx=0, pady=0)
50
+ pb.start(10)
51
+
52
+ def stop():
53
+ pb.stop()
54
+ pb.grid_remove()
55
+ def button1_clicked():
56
+ filepath1 = tkinter.filedialog.askopenfilename(title = "Select CKPT File", filetypes=[("Checkpoint files", "*.ckpt")])
57
+ if filepath1 == '':
58
+ tkinter.messagebox.showerror("Error", "No CKPT file selected")
59
+ return
60
+ filepath2 = tkinter.filedialog.askopenfilename(title = "Select YAML File",filetypes=[("Yaml files", "*.yaml")])
61
+ if filepath2 == '':
62
+ tkinter.messagebox.showerror("Error", "No YAML file selected")
63
+ return
64
+ model_path = filepath1
65
+ config_path = filepath2
66
+ logging.getLogger('numba').setLevel(logging.WARNING)
67
+ start()
68
+ # Show a dialog box to input text
69
+ global project_name
70
+ project_name = tkinter.simpledialog.askstring("Input", "Enter project name:", parent=window)
71
+ if project_name == '':
72
+ tkinter.messagebox.showerror("Error", "No Project Name")
73
+ return
74
+ # Use the input text and the value of hubert_gpu as arguments when creating an instance of the Svc class
75
+ global svc_model
76
+ hubert_gpu = False
77
+ svc_model = Svc(project_name, config_path, hubert_gpu, model_path)
78
+ textbox.insert('end', 'model loaded\n')
79
+ stop()
80
+
81
+
82
+ # Assign the callback function to the button's "command" attribute
83
+ button1["command"] = button1_clicked
84
+
85
+ button2 = ttk.Button(window, text="Start Rendering")
86
+ button2.grid(row=1, column=0, padx=20, pady=20)
87
+
88
+ # Define a callback function for the second button
89
+ def button2_clicked():
90
+ # Open a file selection dialog for WAV files
91
+ filepath = tkinter.filedialog.askopenfilename(filetypes=[("WAV files", "*.wav")])
92
+
93
+ # Show a dialog box to input the "key" value
94
+ key = tkinter.simpledialog.askinteger("Input", "Enter key value:", parent=window)
95
+ textbox.insert('end', 'Rendering Started, please wait...\n')
96
+ start()
97
+ wav_gen = tkinter.simpledialog.askstring("Input", "Enter the track name:", parent=window)
98
+ if not wav_gen.endswith('.wav'):
99
+ wav_gen += '.wav'
100
+ wav_fn = filepath
101
+ demoaudio, sr = librosa.load(wav_fn)
102
+ pndm_speedup = 20
103
+ f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_fn, key=key, acc=pndm_speedup, use_crepe=True, use_pe=True, thre=0.05,
104
+ use_gt_mel=False, add_noise_step=500,project_name=project_name,out_path=wav_gen)
105
+ time.sleep(2)
106
+ textbox.insert('end', 'Rendering process done!\nPlaying Audio now...')
107
+ os.startfile(wav_gen)
108
+ stop()
109
+ button2["command"] = button2_clicked
110
+ #Checkbox
111
+ hubert_gpu = tk.BooleanVar()
112
+ checkbox = tk.Checkbutton(window, text="Use GPU", variable=hubert_gpu)
113
+ checkbox.grid(row=3, column=0)
114
+
115
+ # Start the event loop
116
+ window.mainloop()
checkpoints/0102_xiaoma_pe/config.yaml ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accumulate_grad_batches: 1
2
+ audio_num_mel_bins: 80
3
+ audio_sample_rate: 24000
4
+ base_config:
5
+ - configs/tts/lj/fs2.yaml
6
+ binarization_args:
7
+ shuffle: false
8
+ with_align: true
9
+ with_f0: true
10
+ with_f0cwt: true
11
+ with_spk_embed: true
12
+ with_txt: true
13
+ with_wav: false
14
+ binarizer_cls: data_gen.tts.base_binarizer.BaseBinarizer
15
+ binary_data_dir: data/binary/xiaoma1022_24k_128hop
16
+ check_val_every_n_epoch: 10
17
+ clip_grad_norm: 1
18
+ cwt_add_f0_loss: false
19
+ cwt_hidden_size: 128
20
+ cwt_layers: 2
21
+ cwt_loss: l1
22
+ cwt_std_scale: 0.8
23
+ debug: false
24
+ dec_ffn_kernel_size: 9
25
+ dec_layers: 4
26
+ decoder_type: fft
27
+ dict_dir: ''
28
+ dropout: 0.1
29
+ ds_workers: 4
30
+ dur_enc_hidden_stride_kernel:
31
+ - 0,2,3
32
+ - 0,2,3
33
+ - 0,1,3
34
+ dur_loss: mse
35
+ dur_predictor_kernel: 3
36
+ dur_predictor_layers: 2
37
+ enc_ffn_kernel_size: 9
38
+ enc_layers: 4
39
+ encoder_K: 8
40
+ encoder_type: fft
41
+ endless_ds: true
42
+ ffn_act: gelu
43
+ ffn_padding: SAME
44
+ fft_size: 512
45
+ fmax: 12000
46
+ fmin: 30
47
+ gen_dir_name: ''
48
+ hidden_size: 256
49
+ hop_size: 128
50
+ infer: false
51
+ lambda_commit: 0.25
52
+ lambda_energy: 0.1
53
+ lambda_f0: 1.0
54
+ lambda_ph_dur: 1.0
55
+ lambda_sent_dur: 1.0
56
+ lambda_uv: 1.0
57
+ lambda_word_dur: 1.0
58
+ load_ckpt: ''
59
+ log_interval: 100
60
+ loud_norm: false
61
+ lr: 2.0
62
+ max_epochs: 1000
63
+ max_eval_sentences: 1
64
+ max_eval_tokens: 60000
65
+ max_frames: 5000
66
+ max_input_tokens: 1550
67
+ max_sentences: 100000
68
+ max_tokens: 20000
69
+ max_updates: 60000
70
+ mel_loss: l1
71
+ mel_vmax: 1.5
72
+ mel_vmin: -6
73
+ min_level_db: -120
74
+ norm_type: gn
75
+ num_ckpt_keep: 3
76
+ num_heads: 2
77
+ num_sanity_val_steps: 5
78
+ num_spk: 1
79
+ num_test_samples: 20
80
+ num_valid_plots: 10
81
+ optimizer_adam_beta1: 0.9
82
+ optimizer_adam_beta2: 0.98
83
+ out_wav_norm: false
84
+ pitch_ar: false
85
+ pitch_enc_hidden_stride_kernel:
86
+ - 0,2,5
87
+ - 0,2,5
88
+ - 0,2,5
89
+ pitch_extractor_conv_layers: 2
90
+ pitch_loss: l1
91
+ pitch_norm: log
92
+ pitch_type: frame
93
+ pre_align_args:
94
+ allow_no_txt: false
95
+ denoise: false
96
+ forced_align: mfa
97
+ txt_processor: en
98
+ use_sox: false
99
+ use_tone: true
100
+ pre_align_cls: data_gen.tts.lj.pre_align.LJPreAlign
101
+ predictor_dropout: 0.5
102
+ predictor_grad: 0.1
103
+ predictor_hidden: -1
104
+ predictor_kernel: 5
105
+ predictor_layers: 2
106
+ prenet_dropout: 0.5
107
+ prenet_hidden_size: 256
108
+ pretrain_fs_ckpt: ''
109
+ processed_data_dir: data/processed/ljspeech
110
+ profile_infer: false
111
+ raw_data_dir: data/raw/LJSpeech-1.1
112
+ ref_norm_layer: bn
113
+ reset_phone_dict: true
114
+ save_best: false
115
+ save_ckpt: true
116
+ save_codes:
117
+ - configs
118
+ - modules
119
+ - tasks
120
+ - utils
121
+ - usr
122
+ save_f0: false
123
+ save_gt: false
124
+ seed: 1234
125
+ sort_by_len: true
126
+ stop_token_weight: 5.0
127
+ task_cls: tasks.tts.pe.PitchExtractionTask
128
+ test_ids:
129
+ - 68
130
+ - 70
131
+ - 74
132
+ - 87
133
+ - 110
134
+ - 172
135
+ - 190
136
+ - 215
137
+ - 231
138
+ - 294
139
+ - 316
140
+ - 324
141
+ - 402
142
+ - 422
143
+ - 485
144
+ - 500
145
+ - 505
146
+ - 508
147
+ - 509
148
+ - 519
149
+ test_input_dir: ''
150
+ test_num: 523
151
+ test_set_name: test
152
+ train_set_name: train
153
+ use_denoise: false
154
+ use_energy_embed: false
155
+ use_gt_dur: false
156
+ use_gt_f0: false
157
+ use_pitch_embed: true
158
+ use_pos_embed: true
159
+ use_spk_embed: false
160
+ use_spk_id: false
161
+ use_split_spk_id: false
162
+ use_uv: true
163
+ use_var_enc: false
164
+ val_check_interval: 2000
165
+ valid_num: 348
166
+ valid_set_name: valid
167
+ vocoder: pwg
168
+ vocoder_ckpt: ''
169
+ warmup_updates: 2000
170
+ weight_decay: 0
171
+ win_size: 512
172
+ work_dir: checkpoints/0102_xiaoma_pe
checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1863f12324e43783089ab933edeeb969106b851e30d71019ebbaa9b82099d82a
3
+ size 39141959
checkpoints/Unnamed/config.yaml ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ K_step: 1000
2
+ accumulate_grad_batches: 1
3
+ audio_num_mel_bins: 128
4
+ audio_sample_rate: 44100
5
+ binarization_args:
6
+ shuffle: false
7
+ with_align: true
8
+ with_f0: true
9
+ with_hubert: false
10
+ with_spk_embed: false
11
+ with_wav: false
12
+ binarizer_cls: preprocessing.SVCpre.SVCBinarizer
13
+ binary_data_dir: data/binary/Unnamed
14
+ check_val_every_n_epoch: 10
15
+ choose_test_manually: false
16
+ clip_grad_norm: 1
17
+ config_path: training/config_nsf.yaml
18
+ content_cond_steps: []
19
+ cwt_add_f0_loss: false
20
+ cwt_hidden_size: 128
21
+ cwt_layers: 2
22
+ cwt_loss: l1
23
+ cwt_std_scale: 0.8
24
+ datasets:
25
+ - opencpop
26
+ debug: false
27
+ dec_ffn_kernel_size: 9
28
+ dec_layers: 4
29
+ decay_steps: 40000
30
+ decoder_type: fft
31
+ dict_dir: ''
32
+ diff_decoder_type: wavenet
33
+ diff_loss_type: l2
34
+ dilation_cycle_length: 4
35
+ dropout: 0.1
36
+ ds_workers: 4
37
+ dur_enc_hidden_stride_kernel:
38
+ - 0,2,3
39
+ - 0,2,3
40
+ - 0,1,3
41
+ dur_loss: mse
42
+ dur_predictor_kernel: 3
43
+ dur_predictor_layers: 5
44
+ enc_ffn_kernel_size: 9
45
+ enc_layers: 4
46
+ encoder_K: 8
47
+ encoder_type: fft
48
+ endless_ds: false
49
+ f0_bin: 256
50
+ f0_max: 1100.0
51
+ f0_min: 40.0
52
+ ffn_act: gelu
53
+ ffn_padding: SAME
54
+ fft_size: 2048
55
+ fmax: 16000
56
+ fmin: 40
57
+ fs2_ckpt: ''
58
+ gaussian_start: true
59
+ gen_dir_name: ''
60
+ gen_tgt_spk_id: -1
61
+ hidden_size: 256
62
+ hop_size: 512
63
+ hubert_gpu: true
64
+ hubert_path: checkpoints/hubert/hubert_soft.pt
65
+ infer: false
66
+ keep_bins: 128
67
+ lambda_commit: 0.25
68
+ lambda_energy: 0.0
69
+ lambda_f0: 1.0
70
+ lambda_ph_dur: 0.3
71
+ lambda_sent_dur: 1.0
72
+ lambda_uv: 1.0
73
+ lambda_word_dur: 1.0
74
+ load_ckpt: ''
75
+ log_interval: 100
76
+ loud_norm: false
77
+ lr: 0.0008
78
+ max_beta: 0.02
79
+ max_epochs: 3000
80
+ max_eval_sentences: 1
81
+ max_eval_tokens: 60000
82
+ max_frames: 42000
83
+ max_input_tokens: 60000
84
+ max_sentences: 12
85
+ max_tokens: 128000
86
+ max_updates: 1000000
87
+ mel_loss: ssim:0.5|l1:0.5
88
+ mel_vmax: 1.5
89
+ mel_vmin: -6.0
90
+ min_level_db: -120
91
+ no_fs2: true
92
+ norm_type: gn
93
+ num_ckpt_keep: 10
94
+ num_heads: 2
95
+ num_sanity_val_steps: 1
96
+ num_spk: 1
97
+ num_test_samples: 0
98
+ num_valid_plots: 10
99
+ optimizer_adam_beta1: 0.9
100
+ optimizer_adam_beta2: 0.98
101
+ out_wav_norm: false
102
+ pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
103
+ pe_enable: false
104
+ perform_enhance: true
105
+ pitch_ar: false
106
+ pitch_enc_hidden_stride_kernel:
107
+ - 0,2,5
108
+ - 0,2,5
109
+ - 0,2,5
110
+ pitch_extractor: parselmouth
111
+ pitch_loss: l2
112
+ pitch_norm: log
113
+ pitch_type: frame
114
+ pndm_speedup: 10
115
+ pre_align_args:
116
+ allow_no_txt: false
117
+ denoise: false
118
+ forced_align: mfa
119
+ txt_processor: zh_g2pM
120
+ use_sox: true
121
+ use_tone: false
122
+ pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
123
+ predictor_dropout: 0.5
124
+ predictor_grad: 0.1
125
+ predictor_hidden: -1
126
+ predictor_kernel: 5
127
+ predictor_layers: 5
128
+ prenet_dropout: 0.5
129
+ prenet_hidden_size: 256
130
+ pretrain_fs_ckpt: ''
131
+ processed_data_dir: xxx
132
+ profile_infer: false
133
+ raw_data_dir: data/raw/Unnamed
134
+ ref_norm_layer: bn
135
+ rel_pos: true
136
+ reset_phone_dict: true
137
+ residual_channels: 384
138
+ residual_layers: 20
139
+ save_best: false
140
+ save_ckpt: true
141
+ save_codes:
142
+ - configs
143
+ - modules
144
+ - src
145
+ - utils
146
+ save_f0: true
147
+ save_gt: false
148
+ schedule_type: linear
149
+ seed: 1234
150
+ sort_by_len: true
151
+ speaker_id: Unnamed
152
+ spec_max:
153
+ - -0.025250941514968872
154
+ - 0.004534448496997356
155
+ - 0.5684943795204163
156
+ - 0.6527385115623474
157
+ - 0.659079372882843
158
+ - 0.7416915893554688
159
+ - 0.844637930393219
160
+ - 0.806076169013977
161
+ - 0.7238750457763672
162
+ - 0.9744535088539124
163
+ - 0.9476388692855835
164
+ - 0.9883336424827576
165
+ - 1.0821290016174316
166
+ - 1.046391248703003
167
+ - 0.9829667806625366
168
+ - 1.0163493156433105
169
+ - 0.9825412631034851
170
+ - 0.9834834337234497
171
+ - 1.052114725112915
172
+ - 1.128888726234436
173
+ - 1.186057209968567
174
+ - 1.112004280090332
175
+ - 1.1282787322998047
176
+ - 1.051572322845459
177
+ - 1.1104764938354492
178
+ - 1.176831603050232
179
+ - 1.13348388671875
180
+ - 0.9916292428970337
181
+ - 0.8383486270904541
182
+ - 0.7735869288444519
183
+ - 0.9303848743438721
184
+ - 1.1257890462875366
185
+ - 1.1610286235809326
186
+ - 1.0335885286331177
187
+ - 1.0645352602005005
188
+ - 1.0619306564331055
189
+ - 1.1310148239135742
190
+ - 1.1191954612731934
191
+ - 1.1307402849197388
192
+ - 1.2094721794128418
193
+ - 1.2683185338974
194
+ - 1.1045044660568237
195
+ - 1.0479614734649658
196
+ - 0.9491603374481201
197
+ - 0.9858523011207581
198
+ - 0.9226155281066895
199
+ - 0.9469702839851379
200
+ - 1.023751139640808
201
+ - 1.1348609924316406
202
+ - 1.087107539176941
203
+ - 0.962234377861023
204
+ - 0.8551340699195862
205
+ - 0.8397778272628784
206
+ - 0.8908605575561523
207
+ - 0.7986546158790588
208
+ - 0.7983465194702148
209
+ - 0.6965265274047852
210
+ - 0.689120352268219
211
+ - 0.6862147450447083
212
+ - 0.5631484985351562
213
+ - 0.48587048053741455
214
+ - 0.5326520800590515
215
+ - 0.4286036193370819
216
+ - 0.35252484679222107
217
+ - 0.3290073573589325
218
+ - 0.4754445552825928
219
+ - 0.3632410168647766
220
+ - 0.391481876373291
221
+ - 0.2200046181678772
222
+ - 0.1869768500328064
223
+ - 0.1539602279663086
224
+ - 0.07932852953672409
225
+ - 0.012834634631872177
226
+ - 0.16596835851669312
227
+ - 0.10024689882993698
228
+ - -0.023952053859829903
229
+ - 0.05635542422533035
230
+ - 0.10877621918916702
231
+ - 0.0382893942296505
232
+ - 0.07318088412284851
233
+ - 0.14075303077697754
234
+ - 0.057870157063007355
235
+ - -0.0520513579249382
236
+ - 0.1741427332162857
237
+ - -0.11154910922050476
238
+ - 0.03305494412779808
239
+ - -0.022758174687623978
240
+ - -0.05313302204012871
241
+ - 0.00024538111756555736
242
+ - -0.26880618929862976
243
+ - -0.0825519785284996
244
+ - -0.3040400445461273
245
+ - -0.44150036573410034
246
+ - -0.36957985162734985
247
+ - -0.438098281621933
248
+ - -0.49879470467567444
249
+ - -0.5903350710868835
250
+ - -0.6418567895889282
251
+ - -0.6425778865814209
252
+ - -0.6178902387619019
253
+ - -0.47356730699539185
254
+ - -0.6052739024162292
255
+ - -0.5359307527542114
256
+ - -0.5759448409080505
257
+ - -0.5498068332672119
258
+ - -0.4661938548088074
259
+ - -0.5811225771903992
260
+ - -0.5229856967926025
261
+ - -0.3902229070663452
262
+ - -0.7037366032600403
263
+ - -0.7260795831680298
264
+ - -0.7540019750595093
265
+ - -0.828707754611969
266
+ - -0.8374698758125305
267
+ - -0.8328713178634644
268
+ - -0.9081047177314758
269
+ - -0.9679695963859558
270
+ - -0.9587443470954895
271
+ - -1.0706337690353394
272
+ - -0.9818469285964966
273
+ - -0.8360191583633423
274
+ - -0.9938982725143433
275
+ - -1.0823708772659302
276
+ - -1.0617167949676514
277
+ - -1.1093820333480835
278
+ - -1.1300138235092163
279
+ - -1.2141350507736206
280
+ - -1.3147293329238892
281
+ spec_min:
282
+ - -4.473258972167969
283
+ - -4.296891689300537
284
+ - -4.390527725219727
285
+ - -4.350704669952393
286
+ - -4.446024417877197
287
+ - -4.3960185050964355
288
+ - -4.164802551269531
289
+ - -4.5063300132751465
290
+ - -4.608232021331787
291
+ - -4.251623630523682
292
+ - -4.4799604415893555
293
+ - -4.733210563659668
294
+ - -4.411860466003418
295
+ - -4.609100818634033
296
+ - -4.726972579956055
297
+ - -4.497627258300781
298
+ - -4.487612247467041
299
+ - -4.665065765380859
300
+ - -4.480506896972656
301
+ - -4.589383125305176
302
+ - -4.86366605758667
303
+ - -4.5183892250061035
304
+ - -4.816161632537842
305
+ - -4.906436443328857
306
+ - -4.897279262542725
307
+ - -4.431278705596924
308
+ - -4.999994277954102
309
+ - -4.871325969696045
310
+ - -4.527368068695068
311
+ - -4.872085094451904
312
+ - -4.894851207733154
313
+ - -4.511948585510254
314
+ - -4.534575939178467
315
+ - -4.57792854309082
316
+ - -4.444681644439697
317
+ - -4.996480464935303
318
+ - -4.74341344833374
319
+ - -4.85427713394165
320
+ - -4.723776817321777
321
+ - -4.7166008949279785
322
+ - -4.749168395996094
323
+ - -4.67240047454834
324
+ - -4.590690612792969
325
+ - -4.576009750366211
326
+ - -4.542308330535889
327
+ - -4.890907287597656
328
+ - -4.631724834442139
329
+ - -4.494126796722412
330
+ - -4.499763488769531
331
+ - -4.574635028839111
332
+ - -4.49362850189209
333
+ - -4.651009559631348
334
+ - -4.684722900390625
335
+ - -4.594520568847656
336
+ - -4.5510125160217285
337
+ - -4.616012096405029
338
+ - -4.561031341552734
339
+ - -4.633460521697998
340
+ - -4.541748046875
341
+ - -4.625052452087402
342
+ - -4.524572372436523
343
+ - -4.563175201416016
344
+ - -4.515830039978027
345
+ - -4.581448554992676
346
+ - -4.556764125823975
347
+ - -4.695038795471191
348
+ - -4.548621654510498
349
+ - -4.5828471183776855
350
+ - -4.750834941864014
351
+ - -4.569651126861572
352
+ - -4.577111721038818
353
+ - -4.549272537231445
354
+ - -4.5840277671813965
355
+ - -4.574136257171631
356
+ - -4.574832439422607
357
+ - -4.549546718597412
358
+ - -4.490700721740723
359
+ - -4.635391712188721
360
+ - -4.567677974700928
361
+ - -4.516189098358154
362
+ - -4.6232805252075195
363
+ - -4.592589855194092
364
+ - -4.593951225280762
365
+ - -4.557478904724121
366
+ - -4.503338813781738
367
+ - -4.512742519378662
368
+ - -4.515079498291016
369
+ - -4.531710147857666
370
+ - -4.5540852546691895
371
+ - -4.441158771514893
372
+ - -4.489132404327393
373
+ - -4.519915580749512
374
+ - -4.570128917694092
375
+ - -4.480836391448975
376
+ - -4.494598865509033
377
+ - -4.51900053024292
378
+ - -4.518474578857422
379
+ - -4.519540309906006
380
+ - -4.495180130004883
381
+ - -4.471179962158203
382
+ - -4.478188514709473
383
+ - -4.475483417510986
384
+ - -4.479583263397217
385
+ - -4.491953372955322
386
+ - -4.4339680671691895
387
+ - -4.469926834106445
388
+ - -4.46633768081665
389
+ - -4.468038082122803
390
+ - -4.489401817321777
391
+ - -4.472512722015381
392
+ - -4.43712043762207
393
+ - -4.469909191131592
394
+ - -4.475585460662842
395
+ - -4.460614204406738
396
+ - -4.4658942222595215
397
+ - -4.4960408210754395
398
+ - -4.499384880065918
399
+ - -4.4431610107421875
400
+ - -4.440634727478027
401
+ - -4.468203544616699
402
+ - -4.461722373962402
403
+ - -4.503596305847168
404
+ - -4.457762241363525
405
+ - -4.453769207000732
406
+ - -4.509873390197754
407
+ - -4.505057334899902
408
+ - -4.486324787139893
409
+ - -4.49452018737793
410
+ spk_cond_steps: []
411
+ stop_token_weight: 5.0
412
+ task_cls: training.task.SVC_task.SVCTask
413
+ test_ids: []
414
+ test_input_dir: ''
415
+ test_num: 0
416
+ test_prefixes:
417
+ - test
418
+ test_set_name: test
419
+ timesteps: 1000
420
+ train_set_name: train
421
+ use_crepe: true
422
+ use_denoise: false
423
+ use_energy_embed: false
424
+ use_gt_dur: false
425
+ use_gt_f0: false
426
+ use_midi: false
427
+ use_nsf: true
428
+ use_pitch_embed: true
429
+ use_pos_embed: true
430
+ use_spk_embed: false
431
+ use_spk_id: false
432
+ use_split_spk_id: false
433
+ use_uv: false
434
+ use_var_enc: false
435
+ use_vec: false
436
+ val_check_interval: 2000
437
+ valid_num: 0
438
+ valid_set_name: valid
439
+ vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN
440
+ vocoder_ckpt: checkpoints/nsf_hifigan/model
441
+ warmup_updates: 2000
442
+ wav2spec_eps: 1e-6
443
+ weight_decay: 0
444
+ win_size: 2048
445
+ work_dir: checkpoints/Unnamed
checkpoints/Unnamed/config_nsf.yaml ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ K_step: 1000
2
+ accumulate_grad_batches: 1
3
+ audio_num_mel_bins: 128
4
+ audio_sample_rate: 44100
5
+ binarization_args:
6
+ shuffle: false
7
+ with_align: true
8
+ with_f0: true
9
+ with_hubert: true
10
+ with_spk_embed: false
11
+ with_wav: false
12
+ binarizer_cls: preprocessing.SVCpre.SVCBinarizer
13
+ binary_data_dir: data/binary/Unnamed
14
+ check_val_every_n_epoch: 10
15
+ choose_test_manually: false
16
+ clip_grad_norm: 1
17
+ config_path: training/config_nsf.yaml
18
+ content_cond_steps: []
19
+ cwt_add_f0_loss: false
20
+ cwt_hidden_size: 128
21
+ cwt_layers: 2
22
+ cwt_loss: l1
23
+ cwt_std_scale: 0.8
24
+ datasets:
25
+ - opencpop
26
+ debug: false
27
+ dec_ffn_kernel_size: 9
28
+ dec_layers: 4
29
+ decay_steps: 20000
30
+ decoder_type: fft
31
+ dict_dir: ''
32
+ diff_decoder_type: wavenet
33
+ diff_loss_type: l2
34
+ dilation_cycle_length: 4
35
+ dropout: 0.1
36
+ ds_workers: 4
37
+ dur_enc_hidden_stride_kernel:
38
+ - 0,2,3
39
+ - 0,2,3
40
+ - 0,1,3
41
+ dur_loss: mse
42
+ dur_predictor_kernel: 3
43
+ dur_predictor_layers: 5
44
+ enc_ffn_kernel_size: 9
45
+ enc_layers: 4
46
+ encoder_K: 8
47
+ encoder_type: fft
48
+ endless_ds: false
49
+ f0_bin: 256
50
+ f0_max: 1100.0
51
+ f0_min: 40.0
52
+ ffn_act: gelu
53
+ ffn_padding: SAME
54
+ fft_size: 2048
55
+ fmax: 16000
56
+ fmin: 40
57
+ fs2_ckpt: ''
58
+ gaussian_start: true
59
+ gen_dir_name: ''
60
+ gen_tgt_spk_id: -1
61
+ hidden_size: 256
62
+ hop_size: 512
63
+ hubert_gpu: true
64
+ hubert_path: checkpoints/hubert/hubert_soft.pt
65
+ infer: false
66
+ keep_bins: 128
67
+ lambda_commit: 0.25
68
+ lambda_energy: 0.0
69
+ lambda_f0: 1.0
70
+ lambda_ph_dur: 0.3
71
+ lambda_sent_dur: 1.0
72
+ lambda_uv: 1.0
73
+ lambda_word_dur: 1.0
74
+ load_ckpt: pretrain/nehito_ckpt_steps_1000000.ckpt
75
+ log_interval: 100
76
+ loud_norm: false
77
+ lr: 5.0e-05
78
+ max_beta: 0.02
79
+ max_epochs: 3000
80
+ max_eval_sentences: 1
81
+ max_eval_tokens: 60000
82
+ max_frames: 42000
83
+ max_input_tokens: 60000
84
+ max_sentences: 12
85
+ max_tokens: 128000
86
+ max_updates: 1000000
87
+ mel_loss: ssim:0.5|l1:0.5
88
+ mel_vmax: 1.5
89
+ mel_vmin: -6.0
90
+ min_level_db: -120
91
+ no_fs2: true
92
+ norm_type: gn
93
+ num_ckpt_keep: 10
94
+ num_heads: 2
95
+ num_sanity_val_steps: 1
96
+ num_spk: 1
97
+ num_test_samples: 0
98
+ num_valid_plots: 10
99
+ optimizer_adam_beta1: 0.9
100
+ optimizer_adam_beta2: 0.98
101
+ out_wav_norm: false
102
+ pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt
103
+ pe_enable: false
104
+ perform_enhance: true
105
+ pitch_ar: false
106
+ pitch_enc_hidden_stride_kernel:
107
+ - 0,2,5
108
+ - 0,2,5
109
+ - 0,2,5
110
+ pitch_extractor: parselmouth
111
+ pitch_loss: l2
112
+ pitch_norm: log
113
+ pitch_type: frame
114
+ pndm_speedup: 10
115
+ pre_align_args:
116
+ allow_no_txt: false
117
+ denoise: false
118
+ forced_align: mfa
119
+ txt_processor: zh_g2pM
120
+ use_sox: true
121
+ use_tone: false
122
+ pre_align_cls: data_gen.singing.pre_align.SingingPreAlign
123
+ predictor_dropout: 0.5
124
+ predictor_grad: 0.1
125
+ predictor_hidden: -1
126
+ predictor_kernel: 5
127
+ predictor_layers: 5
128
+ prenet_dropout: 0.5
129
+ prenet_hidden_size: 256
130
+ pretrain_fs_ckpt: ''
131
+ processed_data_dir: xxx
132
+ profile_infer: false
133
+ raw_data_dir: data/raw/Unnamed
134
+ ref_norm_layer: bn
135
+ rel_pos: true
136
+ reset_phone_dict: true
137
+ residual_channels: 384
138
+ residual_layers: 20
139
+ save_best: false
140
+ save_ckpt: true
141
+ save_codes:
142
+ - configs
143
+ - modules
144
+ - src
145
+ - utils
146
+ save_f0: true
147
+ save_gt: false
148
+ schedule_type: linear
149
+ seed: 1234
150
+ sort_by_len: true
151
+ speaker_id: Unnamed
152
+ spec_max:
153
+ - -0.4884430170059204
154
+ - 0.004534448496997356
155
+ - 0.5684943795204163
156
+ - 0.6527385115623474
157
+ - 0.659079372882843
158
+ - 0.7416915893554688
159
+ - 0.844637930393219
160
+ - 0.806076169013977
161
+ - 0.7238750457763672
162
+ - 0.9744535088539124
163
+ - 0.9476388692855835
164
+ - 0.9883336424827576
165
+ - 1.0821290016174316
166
+ - 1.046391248703003
167
+ - 0.9829667806625366
168
+ - 1.0163493156433105
169
+ - 0.9825412631034851
170
+ - 0.9834834337234497
171
+ - 0.9811502695083618
172
+ - 1.128888726234436
173
+ - 1.186057209968567
174
+ - 1.112004280090332
175
+ - 1.1282787322998047
176
+ - 1.051572322845459
177
+ - 1.0510444641113281
178
+ - 1.0110565423965454
179
+ - 0.9236567616462708
180
+ - 0.8036720156669617
181
+ - 0.8383486270904541
182
+ - 0.7735869288444519
183
+ - 0.9303848743438721
184
+ - 1.1257890462875366
185
+ - 1.1610286235809326
186
+ - 1.0335885286331177
187
+ - 1.0645352602005005
188
+ - 1.0619306564331055
189
+ - 1.1310148239135742
190
+ - 1.1191954612731934
191
+ - 1.1307402849197388
192
+ - 0.8837698698043823
193
+ - 1.1153966188430786
194
+ - 1.1045044660568237
195
+ - 1.0479614734649658
196
+ - 0.9491603374481201
197
+ - 0.9858523011207581
198
+ - 0.9226155281066895
199
+ - 0.9469702839851379
200
+ - 0.8791896104812622
201
+ - 0.997624933719635
202
+ - 0.9068642854690552
203
+ - 0.9575618505477905
204
+ - 0.8551340699195862
205
+ - 0.8397778272628784
206
+ - 0.8908605575561523
207
+ - 0.7986546158790588
208
+ - 0.7983465194702148
209
+ - 0.6965265274047852
210
+ - 0.640673041343689
211
+ - 0.6690735220909119
212
+ - 0.5631484985351562
213
+ - 0.48587048053741455
214
+ - 0.5326520800590515
215
+ - 0.4286036193370819
216
+ - 0.35252484679222107
217
+ - 0.3290073573589325
218
+ - 0.4754445552825928
219
+ - 0.3632410168647766
220
+ - 0.391481876373291
221
+ - 0.20288512110710144
222
+ - 0.18305960297584534
223
+ - 0.1539602279663086
224
+ - 0.03451670706272125
225
+ - -0.16881510615348816
226
+ - -0.02030198462307453
227
+ - 0.10024689882993698
228
+ - -0.023952053859829903
229
+ - 0.05635542422533035
230
+ - 0.10877621918916702
231
+ - 0.006155031267553568
232
+ - 0.07318088412284851
233
+ - 0.14075303077697754
234
+ - 0.057870157063007355
235
+ - -0.0520513579249382
236
+ - 0.1741427332162857
237
+ - -0.11464552581310272
238
+ - 0.03305494412779808
239
+ - -0.06897418200969696
240
+ - -0.12598733603954315
241
+ - -0.09894973039627075
242
+ - -0.2817802429199219
243
+ - -0.0825519785284996
244
+ - -0.3040400445461273
245
+ - -0.4998124837875366
246
+ - -0.36957985162734985
247
+ - -0.5409602522850037
248
+ - -0.49879470467567444
249
+ - -0.713716983795166
250
+ - -0.6545754671096802
251
+ - -0.6425778865814209
252
+ - -0.6178902387619019
253
+ - -0.47356730699539185
254
+ - -0.6165243983268738
255
+ - -0.5841533541679382
256
+ - -0.5759448409080505
257
+ - -0.5498068332672119
258
+ - -0.4661938548088074
259
+ - -0.5811225771903992
260
+ - -0.614664614200592
261
+ - -0.3902229070663452
262
+ - -0.7037366032600403
263
+ - -0.7260795831680298
264
+ - -0.7540019750595093
265
+ - -0.8360528945922852
266
+ - -0.8374698758125305
267
+ - -0.8328713178634644
268
+ - -0.9081047177314758
269
+ - -0.9679695963859558
270
+ - -0.9587443470954895
271
+ - -1.0706337690353394
272
+ - -0.9818469285964966
273
+ - -0.8360191583633423
274
+ - -0.9938981533050537
275
+ - -1.0823708772659302
276
+ - -1.0617167949676514
277
+ - -1.1093820333480835
278
+ - -1.1300138235092163
279
+ - -1.2141350507736206
280
+ - -1.3147293329238892
281
+ spec_min:
282
+ - -4.473258972167969
283
+ - -4.244492530822754
284
+ - -4.390527725219727
285
+ - -4.209497928619385
286
+ - -4.446024417877197
287
+ - -4.3960185050964355
288
+ - -4.164802551269531
289
+ - -4.5063300132751465
290
+ - -4.608232021331787
291
+ - -4.251623630523682
292
+ - -4.4799604415893555
293
+ - -4.733210563659668
294
+ - -4.411860466003418
295
+ - -4.609100818634033
296
+ - -4.726972579956055
297
+ - -4.428761959075928
298
+ - -4.487612247467041
299
+ - -4.525552749633789
300
+ - -4.480506896972656
301
+ - -4.589383125305176
302
+ - -4.608384132385254
303
+ - -4.385376453399658
304
+ - -4.816161632537842
305
+ - -4.8706955909729
306
+ - -4.848956108093262
307
+ - -4.431278705596924
308
+ - -4.999994277954102
309
+ - -4.818373203277588
310
+ - -4.527368068695068
311
+ - -4.872085094451904
312
+ - -4.894851207733154
313
+ - -4.511948585510254
314
+ - -4.534575939178467
315
+ - -4.57792854309082
316
+ - -4.444681644439697
317
+ - -4.628803253173828
318
+ - -4.74341344833374
319
+ - -4.85427713394165
320
+ - -4.723776817321777
321
+ - -4.7166008949279785
322
+ - -4.749168395996094
323
+ - -4.67240047454834
324
+ - -4.590690612792969
325
+ - -4.576009750366211
326
+ - -4.542308330535889
327
+ - -4.890907287597656
328
+ - -4.613001823425293
329
+ - -4.494126796722412
330
+ - -4.474257946014404
331
+ - -4.574635028839111
332
+ - -4.4817585945129395
333
+ - -4.651009559631348
334
+ - -4.478254795074463
335
+ - -4.523812770843506
336
+ - -4.546536922454834
337
+ - -4.535660266876221
338
+ - -4.470296859741211
339
+ - -4.577486991882324
340
+ - -4.541748046875
341
+ - -4.428532123565674
342
+ - -4.461862564086914
343
+ - -4.489077091217041
344
+ - -4.515830039978027
345
+ - -4.395663738250732
346
+ - -4.439975738525391
347
+ - -4.4290876388549805
348
+ - -4.397741794586182
349
+ - -4.478252410888672
350
+ - -4.399686336517334
351
+ - -4.45617151260376
352
+ - -4.434477806091309
353
+ - -4.442898750305176
354
+ - -4.5840277671813965
355
+ - -4.537542819976807
356
+ - -4.492046356201172
357
+ - -4.534677505493164
358
+ - -4.477104187011719
359
+ - -4.511618614196777
360
+ - -4.387601375579834
361
+ - -4.499236106872559
362
+ - -4.3717169761657715
363
+ - -4.4242024421691895
364
+ - -4.4055657386779785
365
+ - -4.429355144500732
366
+ - -4.4636993408203125
367
+ - -4.508528232574463
368
+ - -4.515079498291016
369
+ - -4.426190376281738
370
+ - -4.433525085449219
371
+ - -4.4200215339660645
372
+ - -4.421280860900879
373
+ - -4.400143623352051
374
+ - -4.419166088104248
375
+ - -4.429825305938721
376
+ - -4.436781406402588
377
+ - -4.51550817489624
378
+ - -4.518474578857422
379
+ - -4.495880603790283
380
+ - -4.483924865722656
381
+ - -4.409562587738037
382
+ - -4.3811845779418945
383
+ - -4.411908149719238
384
+ - -4.427165985107422
385
+ - -4.396549701690674
386
+ - -4.340637683868408
387
+ - -4.405435085296631
388
+ - -4.367630481719971
389
+ - -4.419083595275879
390
+ - -4.389026165008545
391
+ - -4.371067047119141
392
+ - -4.370710372924805
393
+ - -4.3755269050598145
394
+ - -4.39500093460083
395
+ - -4.451773166656494
396
+ - -4.365351676940918
397
+ - -4.348028182983398
398
+ - -4.408270359039307
399
+ - -4.390385627746582
400
+ - -4.347931861877441
401
+ - -4.378237247467041
402
+ - -4.426717758178711
403
+ - -4.364233493804932
404
+ - -4.371546745300293
405
+ - -4.402477264404297
406
+ - -4.430750846862793
407
+ - -4.404538154602051
408
+ - -4.384459018707275
409
+ - -4.401677131652832
410
+ spk_cond_steps: []
411
+ stop_token_weight: 5.0
412
+ task_cls: training.task.SVC_task.SVCTask
413
+ test_ids: []
414
+ test_input_dir: ''
415
+ test_num: 0
416
+ test_prefixes:
417
+ - test
418
+ test_set_name: test
419
+ timesteps: 1000
420
+ train_set_name: train
421
+ use_crepe: false
422
+ use_denoise: false
423
+ use_energy_embed: false
424
+ use_gt_dur: false
425
+ use_gt_f0: false
426
+ use_midi: false
427
+ use_nsf: true
428
+ use_pitch_embed: true
429
+ use_pos_embed: true
430
+ use_spk_embed: false
431
+ use_spk_id: false
432
+ use_split_spk_id: false
433
+ use_uv: false
434
+ use_var_enc: false
435
+ use_vec: false
436
+ val_check_interval: 1000
437
+ valid_num: 0
438
+ valid_set_name: valid
439
+ vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN
440
+ vocoder_ckpt: checkpoints/nsf_hifigan/model
441
+ warmup_updates: 2000
442
+ wav2spec_eps: 1e-6
443
+ weight_decay: 0
444
+ win_size: 2048
445
+ work_dir: checkpoints/HokoHifi
checkpoints/Unnamed/lightning_logs/lastest/hparams.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
checkpoints/Unnamed/model_ckpt_steps_192000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c441462923580893a6170dd00126084be0a20b387b1c4fb1860755acd36c881b
3
+ size 391390823
checkpoints/nsf_hifigan/NOTICE.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- DiffSinger Community Vocoder ---
2
+
3
+ ARCHITECTURE: NSF-HiFiGAN
4
+ RELEASE DATE: 2022-12-11
5
+
6
+ HYPER PARAMETERS:
7
+ - 44100 sample rate
8
+ - 128 mel bins
9
+ - 512 hop size
10
+ - 2048 window size
11
+ - fmin at 40Hz
12
+ - fmax at 16000Hz
13
+
14
+
15
+ NOTICE:
16
+
17
+ All model weights in the [DiffSinger Community Vocoder Project](https://openvpi.github.io/vocoders/), including
18
+ model weights in this directory, are provided by the [OpenVPI Team](https://github.com/openvpi/), under the
19
+ [Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/) license.
20
+
21
+
22
+ ACKNOWLEDGEMENTS:
23
+
24
+ Training data of this vocoder is provided and permitted by the following organizations, societies and individuals:
25
+
26
+ 孙飒 https://www.qfssr.cn
27
+ 赤松_Akamatsu https://www.zhibin.club
28
+ 乐威 https://www.zhibin.club
29
+ 伯添 https://space.bilibili.com/24087011
30
+ 雲宇光 https://space.bilibili.com/660675050
31
+ 橙子言 https://space.bilibili.com/318486464
32
+ 人衣大人 https://space.bilibili.com/2270344
33
+ 玖蝶 https://space.bilibili.com/676771003
34
+ Yuuko
35
+ 白夜零BYL https://space.bilibili.com/1605040503
36
+ 嗷天 https://space.bilibili.com/5675252
37
+ 洛泠羽 https://space.bilibili.com/347373318
38
+ 灰条纹的灰猫君 https://space.bilibili.com/2083633
39
+ 幽寂 https://space.bilibili.com/478860
40
+ 恶魔王女 https://space.bilibili.com/2475098
41
+ AlexYHX 芮晴
42
+ 绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
43
+ 诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
44
+ 汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
45
+ 1262917464
46
+ 炜阳
47
+ 叶卡yolka
48
+ 幸の夏 https://space.bilibili.com/1017297686
49
+ 暮色未量 https://space.bilibili.com/272904686
50
+ 晓寞sama https://space.bilibili.com/3463394
51
+ 没头绪的节操君
52
+ 串串BunC https://space.bilibili.com/95817834
53
+ 落雨 https://space.bilibili.com/1292427
54
+ 长尾巴的翎艾 https://space.bilibili.com/1638666
55
+ 声闻计划 https://space.bilibili.com/392812269
56
+ 唐家大小姐 http://5sing.kugou.com/palmusic/default.html
57
+ 不伊子
58
+
59
+ Training machines are provided by:
60
+
61
+ 花儿不哭 https://space.bilibili.com/5760446
62
+
63
+
64
+ TERMS OF REDISTRIBUTIONS:
65
+
66
+ 1. Do not sell this vocoder, or charge any fees from redistributing it, as prohibited by
67
+ the license.
68
+ 2. Include a copy of the CC BY-NC-SA 4.0 license, or a link referring to it.
69
+ 3. Include a copy of this notice, or any other notices informing that this vocoder is
70
+ provided by the OpenVPI Team, that this vocoder is licensed under CC BY-NC-SA 4.0, and
71
+ with a complete acknowledgement list as shown above.
72
+ 4. If you fine-tuned or modified the weights, leave a notice about what has been changed.
73
+ 5. (Optional) Leave a link to the official release page of the vocoder, and tell users
74
+ that other versions and future updates of this vocoder can be obtained from the website.
checkpoints/nsf_hifigan/NOTICE.zh-CN.txt ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- DiffSinger 社区声码器 ---
2
+
3
+ 架构:NSF-HiFiGAN
4
+ 发布日期:2022-12-11
5
+
6
+ 超参数:
7
+ - 44100 sample rate
8
+ - 128 mel bins
9
+ - 512 hop size
10
+ - 2048 window size
11
+ - fmin at 40Hz
12
+ - fmax at 16000Hz
13
+
14
+
15
+ 注意事项:
16
+
17
+ [DiffSinger 社区声码器企划](https://openvpi.github.io/vocoders/) 中的所有模型权重,
18
+ 包括此目录下的模型权重,均由 [OpenVPI Team](https://github.com/openvpi/) 提供,并基于
19
+ [Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/)
20
+ 进行许可。
21
+
22
+
23
+ 致谢:
24
+
25
+ 此声码器的训练数据由以下组织、社团和个人提供并许可:
26
+
27
+ 孙飒 https://www.qfssr.cn
28
+ 赤松_Akamatsu https://www.zhibin.club
29
+ 乐威 https://www.zhibin.club
30
+ 伯添 https://space.bilibili.com/24087011
31
+ 雲宇光 https://space.bilibili.com/660675050
32
+ 橙子言 https://space.bilibili.com/318486464
33
+ 人衣大人 https://space.bilibili.com/2270344
34
+ 玖蝶 https://space.bilibili.com/676771003
35
+ Yuuko
36
+ 白夜零BYL https://space.bilibili.com/1605040503
37
+ 嗷天 https://space.bilibili.com/5675252
38
+ 洛泠羽 https://space.bilibili.com/347373318
39
+ 灰条纹的灰猫君 https://space.bilibili.com/2083633
40
+ 幽寂 https://space.bilibili.com/478860
41
+ 恶魔王女 https://space.bilibili.com/2475098
42
+ AlexYHX 芮晴
43
+ 绮萱 https://y.qq.com/n/ryqq/singer/003HjD6H4aZn1K
44
+ 诗芸 https://y.qq.com/n/ryqq/singer/0005NInj142zm0
45
+ 汐蕾 https://y.qq.com/n/ryqq/singer/0023cWMH1Bq1PJ
46
+ 1262917464
47
+ 炜阳
48
+ 叶卡yolka
49
+ 幸の夏 https://space.bilibili.com/1017297686
50
+ 暮色未量 https://space.bilibili.com/272904686
51
+ 晓寞sama https://space.bilibili.com/3463394
52
+ 没头绪的节操君
53
+ 串串BunC https://space.bilibili.com/95817834
54
+ 落雨 https://space.bilibili.com/1292427
55
+ 长尾巴的翎艾 https://space.bilibili.com/1638666
56
+ 声闻计划 https://space.bilibili.com/392812269
57
+ 唐家大小姐 http://5sing.kugou.com/palmusic/default.html
58
+ 不伊子
59
+
60
+ 训练算力的提供者如下:
61
+
62
+ 花儿不哭 https://space.bilibili.com/5760446
63
+
64
+
65
+ 二次分发条款:
66
+
67
+ 1. 请勿售卖此声码器或从其二次分发过程中收取任何费用,因为此类行为受到许可证的禁止。
68
+ 2. 请在二次分发文件中包含一份 CC BY-NC-SA 4.0 许可证的副本或指向该许可证的链接。
69
+ 3. 请在二次分发文件中包含这份声明,或以其他形式声明此声码器由 OpenVPI Team 提供并基于 CC BY-NC-SA 4.0 许可,
70
+ 并附带上述完整的致谢名单。
71
+ 4. 如果您微调或修改了权重,请留下一份关于其受到了何种修改的说明。
72
+ 5.(可选)留下一份指向此声码器的官方发布页面的链接,并告知使用者可从该网站获取此声码器的其他版本和未来的更新。
checkpoints/nsf_hifigan/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 4,
4
+ "batch_size": 10,
5
+ "learning_rate": 0.0002,
6
+ "adam_b1": 0.8,
7
+ "adam_b2": 0.99,
8
+ "lr_decay": 0.999,
9
+ "seed": 1234,
10
+
11
+ "upsample_rates": [ 8, 8, 2, 2, 2],
12
+ "upsample_kernel_sizes": [16,16, 4, 4, 4],
13
+ "upsample_initial_channel": 512,
14
+ "resblock_kernel_sizes": [3,7,11],
15
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
16
+ "discriminator_periods": [3, 5, 7, 11, 17, 23, 37],
17
+
18
+ "segment_size": 16384,
19
+ "num_mels": 128,
20
+ "num_freq": 1025,
21
+ "n_fft" : 2048,
22
+ "hop_size": 512,
23
+ "win_size": 2048,
24
+
25
+ "sampling_rate": 44100,
26
+
27
+ "fmin": 40,
28
+ "fmax": 16000,
29
+ "fmax_for_loss": null,
30
+
31
+ "num_workers": 16,
32
+
33
+ "dist_config": {
34
+ "dist_backend": "nccl",
35
+ "dist_url": "tcp://localhost:54321",
36
+ "world_size": 1
37
+ }
38
+ }
checkpoints/nsf_hifigan/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c576b63b7ed952161b70fad34e0562ace502ce689195520d8a2a6c051de29d6
3
+ size 56825430