forked from X-LANCE/VoiceFlow-TTS
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0532b53
commit 60eea03
Showing
42 changed files
with
7,319 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# HifiGAN (parallel_wavegan implemented version) | ||
|
||
We release the trained checkpoints on LJspeech and LibriTTS here. | ||
The detailed information is: | ||
|
||
| Dataset | Sampling Rate | Hop Size | Window Length | Normed | | ||
|----------|---------------|----------|---------------|--------| | ||
| LJSpeech | 16k | 256 | 1024 | True | | ||
| LibriTTS | 16k | 200 | 800 | True | | ||
|
||
The trained checkpoint on both datasets are provided online. You can unzip them to sub-folders in `exp/`. | ||
|
||
Vocoding can be done by | ||
```shell | ||
cd ../; source path.sh; cd -; # if path.sh not activated | ||
bash generation.sh --dataset "ljspeech/libritts" --eval_dir /path/that/contains/feats.scp | ||
``` | ||
The program will read feats.scp in $eval_dir and synthesize audio to save in that dir. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== | ||
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...> | ||
# e.g. | ||
# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB | ||
# | ||
# Options: | ||
# --time <time>: Limit the maximum time to execute. | ||
# --mem <mem>: Limit the maximum memory usage. | ||
# -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs. | ||
# --num-threads <ngpu>: Specify the number of CPU core. | ||
# --gpu <ngpu>: Specify the number of GPU devices. | ||
# --config: Change the configuration file from default. | ||
# | ||
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs. | ||
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name, | ||
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively. | ||
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example. | ||
# | ||
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend. | ||
# These options are mapping to specific options for each backend and | ||
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default. | ||
# If jobs failed, your configuration might be wrong for your environment. | ||
# | ||
# | ||
# The official documentaion for run.pl, queue.pl, slurm.pl, and ssh.pl: | ||
# "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html | ||
# =========================================================~ | ||
|
||
|
||
# Select the backend used by run.sh from "local", "stdout", "sge", "slurm", or "ssh" | ||
cmd_backend="local" | ||
|
||
# Local machine, without any Job scheduling system | ||
if [ "${cmd_backend}" = local ]; then | ||
|
||
# The other usage | ||
export train_cmd="utils/run.pl" | ||
# Used for "*_train.py": "--gpu" is appended optionally by run.sh | ||
export cuda_cmd="utils/run.pl" | ||
# Used for "*_recog.py" | ||
export decode_cmd="utils/run.pl" | ||
|
||
# Local machine, without any Job scheduling system | ||
elif [ "${cmd_backend}" = stdout ]; then | ||
|
||
# The other usage | ||
export train_cmd="utils/stdout.pl" | ||
# Used for "*_train.py": "--gpu" is appended optionally by run.sh | ||
export cuda_cmd="utils/stdout.pl" | ||
# Used for "*_recog.py" | ||
export decode_cmd="utils/stdout.pl" | ||
|
||
# "qsub" (SGE, Torque, PBS, etc.) | ||
elif [ "${cmd_backend}" = sge ]; then | ||
# The default setting is written in conf/queue.conf. | ||
# You must change "-q g.q" for the "queue" for your environment. | ||
# To know the "queue" names, type "qhost -q" | ||
# Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler. | ||
|
||
export train_cmd="utils/queue.pl" | ||
export cuda_cmd="utils/queue.pl" | ||
export decode_cmd="utils/queue.pl" | ||
|
||
# "sbatch" (Slurm) | ||
elif [ "${cmd_backend}" = slurm ]; then | ||
# The default setting is written in conf/slurm.conf. | ||
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. | ||
# To know the "partion" names, type "sinfo". | ||
# You can use "--gpu * " by defualt for slurm and it is interpreted as "--gres gpu:*" | ||
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". | ||
|
||
export train_cmd="utils/slurm.pl" | ||
export cuda_cmd="utils/slurm.pl" | ||
export decode_cmd="utils/slurm.pl" | ||
|
||
elif [ "${cmd_backend}" = ssh ]; then | ||
# You have to create ".queue/machines" to specify the host to execute jobs. | ||
# e.g. .queue/machines | ||
# host1 | ||
# host2 | ||
# host3 | ||
# Assuming you can login them without any password, i.e. You have to set ssh keys. | ||
|
||
export train_cmd="utils/ssh.pl" | ||
export cuda_cmd="utils/ssh.pl" | ||
export decode_cmd="utils/ssh.pl" | ||
|
||
else | ||
echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2 | ||
return 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/bin/bash | ||
|
||
. ./cmd.sh | ||
|
||
dataset=ljspeech | ||
expdir=exp/train_hifigan.${dataset} | ||
|
||
eval_dir= | ||
|
||
. parse_options.sh || exit 1; | ||
|
||
checkpoint="$(ls -dt "${expdir}"/*.pkl | head -1 || true)" | ||
echo $checkpoint | ||
outdir=$eval_dir/hifigan | ||
|
||
# =========================================== | ||
feat-to-len.py scp:${eval_dir}/feats.scp > ${eval_dir}/utt2num_frames || exit 1 | ||
|
||
mkdir -p ${outdir}/log | ||
echo ========== HifiGAN Generation ========== | ||
|
||
${cuda_cmd} --gpu 1 "${outdir}/${name}/log/decode.log" \ | ||
parallel_wavegan/bin/decode.py \ | ||
--feats-scp $eval_dir/feats.scp \ | ||
--num-frames $eval_dir/utt2num_frames \ | ||
--checkpoint "${checkpoint}" \ | ||
--outdir "${outdir}/wav" \ | ||
--verbose "1" | ||
echo "Successfully finished decoding." | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
__version__ = "0.5.3" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
# Copyright 2019 Tomoki Hayashi | ||
# MIT License (https://opensource.org/licenses/MIT) | ||
|
||
"""Calculate statistics of feature files.""" | ||
|
||
import argparse | ||
import logging | ||
import os | ||
|
||
import numpy as np | ||
import yaml | ||
|
||
from sklearn.preprocessing import StandardScaler | ||
from tqdm import tqdm | ||
|
||
from parallel_wavegan.datasets import MelDataset | ||
from parallel_wavegan.datasets import MelSCPDataset | ||
from parallel_wavegan.utils import read_hdf5 | ||
from parallel_wavegan.utils import write_hdf5 | ||
|
||
|
||
def main(): | ||
"""Run preprocessing process.""" | ||
parser = argparse.ArgumentParser( | ||
description="Compute mean and variance of dumped raw features " | ||
"(See detail in parallel_wavegan/bin/compute_statistics.py)." | ||
) | ||
parser.add_argument( | ||
"--feats-scp", | ||
"--scp", | ||
default=None, | ||
type=str, | ||
help="kaldi-style feats.scp file. " | ||
"you need to specify either feats-scp or rootdir.", | ||
) | ||
parser.add_argument( | ||
"--rootdir", | ||
type=str, | ||
help="directory including feature files. " | ||
"you need to specify either feats-scp or rootdir.", | ||
) | ||
parser.add_argument( | ||
"--config", | ||
type=str, | ||
required=True, | ||
help="yaml format configuration file.", | ||
) | ||
parser.add_argument( | ||
"--dumpdir", | ||
default=None, | ||
type=str, | ||
required=True, | ||
help="directory to save statistics. if not provided, " | ||
"stats will be saved in the above root directory. (default=None)", | ||
) | ||
parser.add_argument( | ||
"--verbose", | ||
type=int, | ||
default=1, | ||
help="logging level. higher is more logging. (default=1)", | ||
) | ||
args = parser.parse_args() | ||
|
||
# set logger | ||
if args.verbose > 1: | ||
logging.basicConfig( | ||
level=logging.DEBUG, | ||
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", | ||
) | ||
elif args.verbose > 0: | ||
logging.basicConfig( | ||
level=logging.INFO, | ||
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", | ||
) | ||
else: | ||
logging.basicConfig( | ||
level=logging.WARN, | ||
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", | ||
) | ||
logging.warning("Skip DEBUG/INFO messages") | ||
|
||
# load config | ||
with open(args.config) as f: | ||
config = yaml.load(f, Loader=yaml.Loader) | ||
config.update(vars(args)) | ||
|
||
# check arguments | ||
if (args.feats_scp is not None and args.rootdir is not None) or ( | ||
args.feats_scp is None and args.rootdir is None | ||
): | ||
raise ValueError("Please specify either --rootdir or --feats-scp.") | ||
|
||
# check directory existence | ||
if not os.path.exists(args.dumpdir): | ||
os.makedirs(args.dumpdir) | ||
|
||
# get dataset | ||
if args.feats_scp is None: | ||
if config["format"] == "hdf5": | ||
mel_query = "*.h5" | ||
mel_load_fn = lambda x: read_hdf5(x, "feats") # NOQA | ||
elif config["format"] == "npy": | ||
mel_query = "*-feats.npy" | ||
mel_load_fn = np.load | ||
else: | ||
raise ValueError("support only hdf5 or npy format.") | ||
dataset = MelDataset(args.rootdir, mel_query=mel_query, mel_load_fn=mel_load_fn) | ||
else: | ||
dataset = MelSCPDataset(args.feats_scp) | ||
logging.info(f"The number of files = {len(dataset)}.") | ||
|
||
# calculate statistics | ||
scaler = StandardScaler() | ||
for mel in tqdm(dataset): | ||
scaler.partial_fit(mel) | ||
|
||
if config["format"] == "hdf5": | ||
write_hdf5( | ||
os.path.join(args.dumpdir, "stats.h5"), | ||
"mean", | ||
scaler.mean_.astype(np.float32), | ||
) | ||
write_hdf5( | ||
os.path.join(args.dumpdir, "stats.h5"), | ||
"scale", | ||
scaler.scale_.astype(np.float32), | ||
) | ||
else: | ||
stats = np.stack([scaler.mean_, scaler.scale_], axis=0) | ||
np.save( | ||
os.path.join(args.dumpdir, "stats.npy"), | ||
stats.astype(np.float32), | ||
allow_pickle=False, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.