-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathwavenext.yaml
107 lines (95 loc) · 2.64 KB
/
wavenext.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# pytorch_lightning==1.8.6
seed_everything: 4444
data:
class_path: vocos.dataset.VocosDataModule
init_args:
train_params:
filelist_path: ???
sampling_rate: 22050
num_samples: 16384
batch_size: 16
num_workers: 8
val_params:
filelist_path: ???
sampling_rate: 22050
num_samples: 48384
batch_size: 16
num_workers: 8
model:
class_path: vocos.experiment.VocosExp
init_args:
sample_rate: 22050
initial_learning_rate: 1e-4
mel_loss_coeff: 45
mrd_loss_coeff: 0.1 # original value 0.1
num_warmup_steps: 500 # Optimizers warmup steps
pretrain_mel_steps: 0 # 0 means GAN objective from the first iteration
# automatic evaluation
evaluate_utmos: true
evaluate_pesq: true
evaluate_periodicty: true
feature_extractor:
class_path: vocos.feature_extractors.MelSpectrogramFeatures
init_args:
sample_rate: 22050
n_fft: 1024
hop_length: 256
n_mels: 80
padding: same
f_min: 0
f_max: 8000
norm: "slaney"
mel_scale: "slaney"
clip_val: 1e-5
backbone:
class_path: vocos.models.VocosBackbone
init_args:
input_channels: 80
dim: 512
intermediate_dim: 1536
num_layers: 8
head:
class_path: vocos.heads.WaveNextHead
init_args:
dim: 512
n_fft: 1024
hop_length: 256
padding: same
melspec_loss:
class_path: vocos.loss.MelSpecReconstructionLoss
init_args:
sample_rate: 22050
n_fft: 1024
hop_length: 256
n_mels: 128
f_min: 0
f_max: 11025
norm: "slaney"
mel_scale: "slaney"
clip_val: 1e-5
trainer:
logger:
class_path: pytorch_lightning.loggers.TensorBoardLogger
init_args:
save_dir: ???
callbacks:
- class_path: pytorch_lightning.callbacks.LearningRateMonitor
- class_path: pytorch_lightning.callbacks.ModelSummary
init_args:
max_depth: 2
- class_path: pytorch_lightning.callbacks.ModelCheckpoint
init_args:
monitor: val_loss
filename: vocos_checkpoint_{epoch}_{step}_{val_loss:.4f}
save_top_k: 3
save_last: true
- class_path: vocos.helpers.GradNormCallback
# Lightning calculates max_steps across all optimizer steps (rather than number of batches)
# This equals to 1M steps per generator and 1M per discriminator
max_steps: 1000000
# You might want to limit val batches when evaluating all the metrics, as they are time-consuming
limit_val_batches: 50
accelerator: gpu
strategy: ddp
devices: [0]
log_every_n_steps: 250