forked from X-LANCE/VoiceFlow-TTS
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
93c56c5
commit ffe04e2
Showing
86 changed files
with
319 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== | ||
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...> | ||
# e.g. | ||
# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB | ||
# | ||
# Options: | ||
# --time <time>: Limit the maximum time to execute. | ||
# --mem <mem>: Limit the maximum memory usage. | ||
# -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs. | ||
# --num-threads <ngpu>: Specify the number of CPU core. | ||
# --gpu <ngpu>: Specify the number of GPU devices. | ||
# --config: Change the configuration file from default. | ||
# | ||
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs. | ||
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name, | ||
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively. | ||
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example. | ||
# | ||
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend. | ||
# These options are mapping to specific options for each backend and | ||
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default. | ||
# If jobs failed, your configuration might be wrong for your environment. | ||
# | ||
# | ||
# The official documentaion for run.pl, queue.pl, slurm.pl, and ssh.pl: | ||
# "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html | ||
# =========================================================~ | ||
|
||
|
||
# Select the backend used by run.sh from "local", "stdout", "sge", "slurm", or "ssh" | ||
cmd_backend="local" | ||
|
||
# Local machine, without any Job scheduling system | ||
if [ "${cmd_backend}" = local ]; then | ||
|
||
# The other usage | ||
export train_cmd="utils/run.pl" | ||
# Used for "*_train.py": "--gpu" is appended optionally by run.sh | ||
export cuda_cmd="utils/run.pl" | ||
# Used for "*_recog.py" | ||
export decode_cmd="utils/run.pl" | ||
|
||
# Local machine, without any Job scheduling system | ||
elif [ "${cmd_backend}" = stdout ]; then | ||
|
||
# The other usage | ||
export train_cmd="utils/stdout.pl" | ||
# Used for "*_train.py": "--gpu" is appended optionally by run.sh | ||
export cuda_cmd="utils/stdout.pl" | ||
# Used for "*_recog.py" | ||
export decode_cmd="utils/stdout.pl" | ||
|
||
# "qsub" (SGE, Torque, PBS, etc.) | ||
elif [ "${cmd_backend}" = sge ]; then | ||
# The default setting is written in conf/queue.conf. | ||
# You must change "-q g.q" for the "queue" for your environment. | ||
# To know the "queue" names, type "qhost -q" | ||
# Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler. | ||
|
||
export train_cmd="utils/queue.pl" | ||
export cuda_cmd="utils/queue.pl" | ||
export decode_cmd="utils/queue.pl" | ||
|
||
# "sbatch" (Slurm) | ||
elif [ "${cmd_backend}" = slurm ]; then | ||
# The default setting is written in conf/slurm.conf. | ||
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. | ||
# To know the "partion" names, type "sinfo". | ||
# You can use "--gpu * " by defualt for slurm and it is interpreted as "--gres gpu:*" | ||
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". | ||
|
||
export train_cmd="utils/slurm.pl" | ||
export cuda_cmd="utils/slurm.pl" | ||
export decode_cmd="utils/slurm.pl" | ||
|
||
elif [ "${cmd_backend}" = ssh ]; then | ||
# You have to create ".queue/machines" to specify the host to execute jobs. | ||
# e.g. .queue/machines | ||
# host1 | ||
# host2 | ||
# host3 | ||
# Assuming you can login them without any password, i.e. You have to set ssh keys. | ||
|
||
export train_cmd="utils/ssh.pl" | ||
export cuda_cmd="utils/ssh.pl" | ||
export decode_cmd="utils/ssh.pl" | ||
|
||
else | ||
echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2 | ||
return 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
xvector: false # whether to use xvector for speaker modeling. | ||
|
||
perform_reflow: false # if true, will need noise_scp be specified | ||
|
||
train: | ||
test_size: 4 | ||
n_epochs: 10000 | ||
batch_size: 24 | ||
learning_rate: !!float 5e-5 | ||
seed: 37 | ||
save_every: 10 | ||
use_gt_dur: true # whether to supervise duration modeling | ||
|
||
data: | ||
sampling_rate: 16000 | ||
n_mel_channels: 80 | ||
add_blank: false # whether to add blank tokens between each input phones | ||
hop_length: 200 # in sampling points | ||
|
||
phn2id: "data/ljspeech/phones.txt" | ||
|
||
train: | ||
utts: "data/ljspeech/train/utts.list" | ||
utt2phns: "data/ljspeech/train/text" | ||
utt2phn_duration: "data/ljspeech/train/phn_duration" | ||
feats_scp: "feats/normed_fbank/ljspeech/train/feats.scp" | ||
utt2num_frames: "feats/normed_fbank/ljspeech/train/utt2num_frames" | ||
utt2spk: "data/ljspeech/train/utt2spk_id.json" | ||
|
||
val: | ||
utts: "data/ljspeech/val/utts.list" | ||
utt2phns: "data/ljspeech/val/text" | ||
utt2phn_duration: "data/ljspeech/val/phn_duration" | ||
feats_scp: "feats/normed_fbank/ljspeech/val/feats.scp" | ||
utt2num_frames: "feats/normed_fbank/ljspeech/val/utt2num_frames" | ||
utt2spk: "data/ljspeech/val/utt2spk_id.json" | ||
|
||
model: | ||
n_vocab: 148 | ||
n_spks: 1 | ||
spk_emb_dim: 64 | ||
n_enc_channels: 192 | ||
filter_channels: 768 | ||
filter_channels_dp: 256 | ||
n_enc_layers: 6 | ||
enc_kernel: 3 | ||
enc_dropout: 0.1 | ||
n_heads: 2 | ||
window_size: 4 | ||
dec_dim: 128 | ||
pe_scale: 1000 | ||
fm_type: "CFM" # FM, CFM | ||
fm_net_type: "unet" # unet or diffsinger | ||
shift_by_mu: false # whether to shift the prior distribution by mu. True means GradTTS-style. | ||
condition_by_mu: true # whether to condition the flow matching decoder by mu. False supports text-agnostic voice conversion like GlowTTS. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
LJ001-0110 9 4 3 4 3 4 9 9 7 5 8 4 8 13 10 14 8 3 4 4 7 5 11 6 9 9 11 15 9 5 7 5 5 3 11 3 4 4 6 5 5 4 7 5 4 4 4 4 8 4 5 4 6 6 9 11 | ||
LJ002-0018 3 3 3 3 7 3 3 5 3 3 9 4 4 3 3 4 10 7 8 4 3 5 5 6 4 7 4 6 3 6 4 3 3 4 6 4 3 4 3 3 5 6 11 10 11 11 9 10 9 6 4 3 4 9 10 5 5 3 4 4 3 5 8 4 4 5 9 4 7 5 3 3 3 5 6 4 5 4 4 4 7 7 4 4 4 7 3 | ||
LJ002-0043 4 15 7 6 5 7 10 5 13 9 24 3 12 7 5 9 6 4 6 8 6 4 7 4 13 13 11 17 7 5 10 4 7 6 4 3 4 6 7 7 5 8 14 13 9 7 3 4 3 7 13 13 14 17 9 15 13 | ||
LJ003-0111 4 3 4 4 4 5 5 6 4 5 4 5 4 3 6 5 9 7 6 8 4 10 5 4 3 3 4 4 5 6 5 5 6 4 3 4 3 4 8 5 8 6 8 7 4 3 5 9 24 19 12 4 4 6 5 10 12 27 5 9 5 8 12 3 3 4 3 6 8 3 3 6 3 4 3 3 3 7 6 4 5 5 3 4 7 6 7 4 6 16 | ||
LJ003-0345 10 12 4 3 4 4 4 3 4 4 6 3 5 3 6 12 3 3 3 3 8 4 5 5 6 10 6 8 17 8 3 4 4 4 7 5 6 3 4 3 6 4 4 5 6 4 4 3 4 4 5 5 8 8 4 3 10 5 10 21 | ||
LJ004-0045 3 3 3 3 4 12 5 8 5 5 7 10 9 9 11 24 15 5 11 8 5 6 6 6 5 3 3 8 12 25 3 14 6 9 6 5 4 13 4 4 6 7 5 14 21 10 4 5 3 4 3 6 5 6 5 7 5 3 3 5 8 5 7 18 | ||
LJ004-0096 15 4 5 8 4 4 3 4 6 5 4 3 5 4 3 3 3 5 4 4 5 3 6 9 4 3 6 3 3 4 6 3 3 5 5 6 4 4 5 3 4 5 4 3 8 3 5 3 4 6 5 4 5 3 3 4 7 7 5 5 4 7 6 5 8 10 6 8 10 6 6 13 | ||
LJ004-0152 5 4 6 6 3 3 3 3 4 4 5 5 4 4 4 4 3 3 3 3 4 7 4 8 5 4 8 7 8 6 8 3 3 4 5 4 5 3 5 7 5 4 5 3 3 8 4 5 7 4 11 7 6 5 5 6 6 7 6 6 6 10 5 6 4 4 6 3 3 9 9 7 7 5 3 3 3 7 9 6 3 3 3 3 4 4 5 7 8 8 6 10 22 10 6 6 4 4 3 4 6 6 7 6 5 4 3 4 4 12 8 8 3 | ||
LJ005-0014 8 4 7 5 4 6 3 5 4 3 4 5 12 7 4 7 4 5 3 5 4 4 3 4 9 5 8 23 12 9 3 3 5 6 4 7 5 3 3 6 12 | ||
LJ005-0079 4 3 3 3 3 6 4 5 4 3 4 4 5 4 4 3 6 4 3 3 4 5 4 4 4 6 9 14 18 6 3 4 7 4 6 3 4 6 6 4 4 3 5 4 4 6 4 7 5 7 5 4 5 5 5 3 4 5 5 4 3 7 3 5 6 3 4 5 5 5 5 4 4 5 3 6 9 4 4 5 8 22 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
LJ001-0110 IY1 V IH0 N DH AH1 K AE1 S L AH0 N T AY1 P sil W EH1 N IH0 N L AA1 R JH D sil SH OW1 Z G R EY1 T SH AO1 R T K AH2 M IH0 NG Z IH1 N DH IH1 S R IY0 S P EH1 K T | ||
LJ002-0018 DH IY0 IH0 N AE1 D IH0 K W AH0 S IY0 AH0 V DH AH0 JH EY1 L W AH0 Z N OW1 T AH0 S T sil AE1 N D R IH0 P AO1 R T AH0 D AH0 P AA1 N sil AH0 G EH1 N AE1 N D AH0 G EH1 N B AY1 DH AH1 G R AE1 N D JH UH1 R IY0 Z AH0 V DH AH0 S IH1 T IY0 AH0 V L AH1 N D AH0 N sil | ||
LJ002-0043 L AO1 NG N EH1 R OW0 R UW1 M Z sil W AH1 N TH ER1 T IY2 S IH1 K S F IY1 T sil S IH1 K S sil T W EH1 N T IY0 TH R IY1 F IY1 T sil AE1 N D DH IY0 EY1 TH sil EY0 T IY1 N | ||
LJ003-0111 HH IY1 W AH0 Z IH0 N K AA1 N S AH0 K W AH0 N S P UH1 T sil AW1 T AH0 V DH AH1 P ER0 T EH1 K SH AH0 N AH0 V DH EH1 R IH0 N T ER1 N AH0 L L AO1 sil EH1 N D K W OW1 T sil DH EH1 R K OW1 D W AH0 Z AH0 S AH1 B JH IH0 K T AH0 V S AH1 M K Y UH2 R IY0 AA1 S AH0 T IY0 | ||
LJ003-0345 sil AO1 L DH AH1 K AH0 M IH1 T IY0 K UH1 D D UW1 IH0 N DH IH0 S R IY0 S P EH1 K T sil W AH0 Z T AH0 TH R OW1 DH AH0 R IY0 S P AA2 N S AH0 B IH1 L AH0 T IY0 AA1 N sil AH1 DH ER0 Z | ||
LJ004-0045 M IH1 S T ER0 S T ER1 JH IH0 Z B AO1 R N sil S ER1 JH EY1 M Z M AE1 K AH0 N T AA2 SH sil S ER1 JH EY1 M Z S K AA1 R L IH0 T sil AE1 N D W IH1 L Y AH0 M sil W IH1 L B ER0 F AO2 R S | ||
LJ004-0096 sil DH AH1 F EY1 T AH0 L K AA1 N S AH0 K W EH2 N S AH0 Z W EH2 R AH1 V M AY1 T B IY0 P R IH0 V EH1 N T IH0 D sil IH0 F DH AH0 JH AH1 S T IH0 S IH0 Z AH0 V DH AH1 P IY1 S W ER0 D UW1 L IY0 AO1 TH ER0 AY2 Z D sil | ||
LJ004-0152 AO2 L DH OW1 AE1 T M IH1 S T ER0 B AH1 K S T AH0 N EH1 S V IH1 Z IH0 T AH0 N UW1 JH EY1 L W AH0 Z IH0 N P R AA1 S EH2 S AH0 V IH0 R EH1 K SH AH0 N sil DH AH0 F ER1 S T S T EH1 P T AO1 R D Z R AH0 F AO1 R M S IH1 N S HH AW1 ER0 D EH1 S V IH2 Z IH0 T EY1 SH AH0 N sil IH1 N S EH1 V AH0 N T IY1 N S EH1 V AH0 N IY0 F AO1 R sil | ||
LJ005-0014 S P IY1 K IH0 NG AA1 N AH0 D AH0 B EY1 T sil AO1 N P R IH1 Z AH0 N M AE1 T ER0 Z sil HH IY1 D IH0 K L EH1 R D DH AE1 T | ||
LJ005-0079 AE1 N D IH0 M P R UW1 V DH AH0 M AO1 R AH0 L Z AH0 V DH AH1 P R IH1 Z N ER0 Z sil AE1 N D SH AE1 L IH0 N SH UH1 R DH AH0 P R AA1 P ER0 M EH1 ZH ER0 AH0 V P AH1 N IH0 SH M AH0 N T sil T AH0 K AH0 N V IH1 K T AH0 D sil AH0 F EH1 N D ER0 Z |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
LJ001-0110 LJ | ||
LJ002-0018 LJ | ||
LJ002-0043 LJ | ||
LJ003-0111 LJ | ||
LJ003-0345 LJ | ||
LJ004-0045 LJ | ||
LJ004-0096 LJ | ||
LJ004-0152 LJ | ||
LJ005-0014 LJ | ||
LJ005-0079 LJ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
LJ001-0110 | ||
LJ002-0018 | ||
LJ002-0043 | ||
LJ003-0111 | ||
LJ003-0345 | ||
LJ004-0045 | ||
LJ004-0096 | ||
LJ004-0152 | ||
LJ005-0014 | ||
LJ005-0079 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
LJ001-0110 /path/to/dataset/LJ001-0110.wav | ||
LJ002-0018 /path/to/dataset/LJ002-0018.wav | ||
LJ002-0043 /path/to/dataset/LJ002-0043.wav | ||
LJ003-0111 /path/to/dataset/LJ003-0111.wav | ||
LJ003-0345 /path/to/dataset/LJ003-0345.wav | ||
LJ004-0045 /path/to/dataset/LJ004-0045.wav | ||
LJ004-0096 /path/to/dataset/LJ004-0096.wav | ||
LJ004-0152 /path/to/dataset/LJ004-0152.wav | ||
LJ005-0014 /path/to/dataset/LJ005-0014.wav | ||
LJ005-0079 /path/to/dataset/LJ005-0079.wav |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.