jonathanjordan21 commited on
Commit
23598cb
1 Parent(s): 5b0829a

Create hparams.py

Browse files
Files changed (1) hide show
  1. hparams.py +167 -0
hparams.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Map(dict):
2
+ """
3
+ Example:
4
+ m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
5
+
6
+ Credits to epool:
7
+ https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary
8
+ """
9
+
10
+ def __init__(self, *args, **kwargs):
11
+ super(Map, self).__init__(*args, **kwargs)
12
+ for arg in args:
13
+ if isinstance(arg, dict):
14
+ for k, v in arg.items():
15
+ self[k] = v
16
+
17
+ if kwargs:
18
+ for k, v in kwargs.iteritems():
19
+ self[k] = v
20
+
21
+ def __getattr__(self, attr):
22
+ return self.get(attr)
23
+
24
+ def __setattr__(self, key, value):
25
+ self.__setitem__(key, value)
26
+
27
+ def __setitem__(self, key, value):
28
+ super(Map, self).__setitem__(key, value)
29
+ self.__dict__.update({key: value})
30
+
31
+ def __delattr__(self, item):
32
+ self.__delitem__(item)
33
+
34
+ def __delitem__(self, key):
35
+ super(Map, self).__delitem__(key)
36
+ del self.__dict__[key]
37
+
38
+
39
+ # Default hyperparameters:
40
+ hparams = Map({
41
+ 'name': "wavenet_vocoder",
42
+
43
+ # Convenient model builder
44
+ 'builder': "wavenet",
45
+
46
+ # Input type:
47
+ # 1. raw [-1, 1]
48
+ # 2. mulaw [-1, 1]
49
+ # 3. mulaw-quantize [0, mu]
50
+ # If input_type is raw or mulaw, network assumes scalar input and
51
+ # discretized mixture of logistic distributions output, otherwise one-hot
52
+ # input and softmax output are assumed.
53
+ # **NOTE**: if you change the one of the two parameters below, you need to
54
+ # re-run preprocessing before training.
55
+ 'input_type': "raw",
56
+ 'quantize_channels': 65536, # 65536 or 256
57
+
58
+ # Audio:
59
+ 'sample_rate': 16000,
60
+ # this is only valid for mulaw is True
61
+ 'silence_threshold': 2,
62
+ 'num_mels': 80,
63
+ 'fmin': 125,
64
+ 'fmax': 7600,
65
+ 'fft_size': 1024,
66
+ # shift can be specified by either hop_size or frame_shift_ms
67
+ 'hop_size': 256,
68
+ 'frame_shift_ms': None,
69
+ 'min_level_db': -100,
70
+ 'ref_level_db': 20,
71
+ # whether to rescale waveform or not.
72
+ # Let x is an input waveform, rescaled waveform y is given by:
73
+ # y = x / np.abs(x).max() * rescaling_max
74
+ 'rescaling': True,
75
+ 'rescaling_max': 0.999,
76
+ # mel-spectrogram is normalized to [0, 1] for each utterance and clipping may
77
+ # happen depends on min_level_db and ref_level_db, causing clipping noise.
78
+ # If False, assertion is added to ensure no clipping happens.o0
79
+ 'allow_clipping_in_normalization': True,
80
+
81
+ # Mixture of logistic distributions:
82
+ 'log_scale_min': float(-32.23619130191664),
83
+
84
+ # Model:
85
+ # This should equal to `quantize_channels` if mu-law quantize enabled
86
+ # otherwise num_mixture * 3 (pi, mean, log_scale)
87
+ 'out_channels': 10 * 3,
88
+ 'layers': 24,
89
+ 'stacks': 4,
90
+ 'residual_channels': 512,
91
+ 'gate_channels': 512, # split into 2 gropus internally for gated activation
92
+ 'skip_out_channels': 256,
93
+ 'dropout': 1 - 0.95,
94
+ 'kernel_size': 3,
95
+ # If True, apply weight normalization as same as DeepVoice3
96
+ 'weight_normalization': True,
97
+ # Use legacy code or not. Default is True since we already provided a model
98
+ # based on the legacy code that can generate high-quality audio.
99
+ # Ref: https://github.com/r9y9/wavenet_vocoder/pull/73
100
+ 'legacy': True,
101
+
102
+ # Local conditioning (set negative value to disable))
103
+ 'cin_channels': 80,
104
+ # If True, use transposed convolutions to upsample conditional features,
105
+ # otherwise repeat features to adjust time resolution
106
+ 'upsample_conditional_features': True,
107
+ # should np.prod(upsample_scales) == hop_size
108
+ 'upsample_scales': [4, 4, 4, 4],
109
+ # Freq axis kernel size for upsampling network
110
+ 'freq_axis_kernel_size': 3,
111
+
112
+ # Global conditioning (set negative value to disable)
113
+ # currently limited for speaker embedding
114
+ # this should only be enabled for multi-speaker dataset
115
+ 'gin_channels': -1, # i.e., speaker embedding dim
116
+ 'n_speakers': -1,
117
+
118
+ # Data loader
119
+ 'pin_memory': True,
120
+ 'num_workers': 2,
121
+
122
+ # train/test
123
+ # test size can be specified as portion or num samples
124
+ 'test_size': 0.0441, # 50 for CMU ARCTIC single speaker
125
+ 'test_num_samples': None,
126
+ 'random_state': 1234,
127
+
128
+ # Loss
129
+
130
+ # Training:
131
+ 'batch_size': 2,
132
+ 'adam_beta1': 0.9,
133
+ 'adam_beta2': 0.999,
134
+ 'adam_eps': 1e-8,
135
+ 'amsgrad': False,
136
+ 'initial_learning_rate': 1e-3,
137
+ # see lrschedule.py for available lr_schedule
138
+ 'lr_schedule': "noam_learning_rate_decay",
139
+ 'lr_schedule_kwargs': {}, # {"anneal_rate": 0.5, "anneal_interval": 50000},
140
+ 'nepochs': 2000,
141
+ 'weight_decay': 0.0,
142
+ 'clip_thresh': -1,
143
+ # max time steps can either be specified as sec or steps
144
+ # if both are None, then full audio samples are used in a batch
145
+ 'max_time_sec': None,
146
+ 'max_time_steps': 8000,
147
+ # Hold moving averaged parameters and use them for evaluation
148
+ 'exponential_moving_average': True,
149
+ # averaged = decay * averaged + (1 - decay) * x
150
+ 'ema_decay': 0.9999,
151
+
152
+ # Save
153
+ # per-step intervals
154
+ 'checkpoint_interval': 10000,
155
+ 'train_eval_interval': 10000,
156
+ # per-epoch interval
157
+ 'test_eval_epoch_interval': 5,
158
+ 'save_optimizer_state': True,
159
+
160
+ # Eval:
161
+ })
162
+
163
+
164
+ def hparams_debug_string():
165
+ values = hparams.values()
166
+ hp = [' %s: %s' % (name, values[name]) for name in sorted(values)]
167
+ return 'Hyperparameters:\n' + '\n'.join(hp)