forked from X-LANCE/VoiceFlow-TTS
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ffe04e2
commit 0532b53
Showing
6 changed files
with
163 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -152,4 +152,5 @@ cython_debug/ | |
.idea/ | ||
synthetic_wav/ | ||
exp/ | ||
**/*.wav | ||
**/*.wav | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
xvector: false # whether to use xvector for speaker modeling. | ||
|
||
perform_reflow: true # if true, will need noise_scp be specified | ||
|
||
train: | ||
test_size: 4 | ||
n_epochs: 10000 | ||
batch_size: 24 | ||
learning_rate: !!float 5e-5 | ||
seed: 37 | ||
save_every: 10 | ||
use_gt_dur: true # whether to supervise duration modeling | ||
|
||
data: | ||
sampling_rate: 16000 | ||
n_mel_channels: 80 | ||
add_blank: false # whether to add blank tokens between each input phones | ||
hop_length: 200 # in sampling points | ||
|
||
phn2id: "data/ljspeech/phones.txt" | ||
|
||
train: | ||
utts: "data/ljspeech/train/utts.list" | ||
utt2phns: "data/ljspeech/train/text" | ||
utt2phn_duration: "data/ljspeech/train/phn_duration" | ||
feats_scp: "synthetic_wav/lj_16k_gt_dur/train/feats.scp" | ||
noise_scp: "synthetic_wav/lj_16k_gt_dur/train/noise.scp" | ||
utt2num_frames: "feats/normed_fbank/ljspeech/train/utt2num_frames" | ||
utt2spk: "data/ljspeech/train/utt2spk_id.json" | ||
|
||
val: | ||
utts: "data/ljspeech/val/utts.list" | ||
utt2phns: "data/ljspeech/val/text" | ||
utt2phn_duration: "data/ljspeech/val/phn_duration" | ||
feats_scp: "synthetic_wav/lj_16k_gt_dur/val/feats.scp" | ||
noise_scp: "synthetic_wav/lj_16k_gt_dur/val/noise.scp" | ||
utt2num_frames: "feats/normed_fbank/ljspeech/val/utt2num_frames" | ||
utt2spk: "data/ljspeech/val/utt2spk_id.json" | ||
|
||
model: | ||
n_vocab: 148 | ||
n_spks: 1 | ||
spk_emb_dim: 64 | ||
n_enc_channels: 192 | ||
filter_channels: 768 | ||
filter_channels_dp: 256 | ||
n_enc_layers: 6 | ||
enc_kernel: 3 | ||
enc_dropout: 0.1 | ||
n_heads: 2 | ||
window_size: 4 | ||
dec_dim: 128 | ||
pe_scale: 1000 | ||
fm_type: "CFM" # FM, CFM | ||
fm_net_type: "unet" # unet or diffsinger | ||
shift_by_mu: false # whether to shift the prior distribution by mu. True means GradTTS-style. | ||
condition_by_mu: true # whether to condition the flow matching decoder by mu. False supports text-agnostic voice conversion like GlowTTS. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters