diff --git a/README.md b/README.md
index 8e86d43..7cb6ebc 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ In this example, the following plain text files are necessary:
 Also, there is a `data/ljspeech/phones.txt` file to specify all the phones together with their indexes in dictionary.
 
 For LJSpeech, we provide the processed file [online](https://huggingface.co/datasets/cantabile-kwok/ljspeech-1024-256-dur/resolve/main/ljspeech-1024-256.zip).
-You can download it and unzip to `data/ljspeech`.
+You can download it and unzip to `data/ljspeech/{train,val}`.
 If you want to train on your own dataset, you might have to create these files yourself (or change the data loading strategy).
 
 After having these manifest files, please do the following to extract mel-spectrogram for training:
@@ -115,7 +115,7 @@ python inference_dataset.py -c configs/${your_yaml} -m ${model_name} --EMA \
 This will synthesize mel-spectrograms for the validation set in your config, storing them at `synthetic_wav/${model_name}/tts_gt_spk/feats.scp`.
 Speaker, speed and temperature can be specified; see `tools.get_hparams_decode()` function for complete set of options.
 
-> TODO: VOCODER
+Inference can then be done in the `hifigan/` directory. Please refer to the [README](hifigan/README.md) there.
 
 ## Acknowledgement
 During the development, the following repositories were referred to:
diff --git a/hifigan/README.md b/hifigan/README.md
new file mode 100644
index 0000000..0888738
--- /dev/null
+++ b/hifigan/README.md
@@ -0,0 +1,18 @@
+# HifiGAN (parallel_wavegan  implemented version)
+
+We release the trained checkpoints on LJspeech and LibriTTS here.
+The detailed information is:
+
+| Dataset  | Sampling Rate | Hop Size | Window Length | Normed |
+|----------|---------------|----------|---------------|--------|
+| LJSpeech | 16k           | 256      | 1024          | True | 
+| LibriTTS | 16k           | 200      | 800           | True |
+
+The trained checkpoint on both datasets are provided online. You can unzip them to sub-folders in `exp/`.
+
+Vocoding can be done by 
+```shell
+cd ../; source path.sh; cd -;  # if path.sh not activated
+bash generation.sh --dataset "ljspeech/libritts" --eval_dir /path/that/contains/feats.scp
+```
+The program will read feats.scp in $eval_dir and synthesize audio to save in that dir.
diff --git a/hifigan/cmd.sh b/hifigan/cmd.sh
new file mode 100644
index 0000000..19f3421
--- /dev/null
+++ b/hifigan/cmd.sh
@@ -0,0 +1,91 @@
+# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
+# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
+# e.g.
+#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
+#
+# Options:
+#   --time <time>: Limit the maximum time to execute.
+#   --mem <mem>: Limit the maximum memory usage.
+#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
+#   --num-threads <ngpu>: Specify the number of CPU core.
+#   --gpu <ngpu>: Specify the number of GPU devices.
+#   --config: Change the configuration file from default.
+#
+# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
+# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
+# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
+# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
+#
+# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
+# These options are mapping to specific options for each backend and
+# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
+# If jobs failed, your configuration might be wrong for your environment.
+#
+#
+# The official documentaion for run.pl, queue.pl, slurm.pl, and ssh.pl:
+#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
+# =========================================================~
+
+
+# Select the backend used by run.sh from "local", "stdout", "sge", "slurm", or "ssh"
+cmd_backend="local"
+
+# Local machine, without any Job scheduling system
+if [ "${cmd_backend}" = local ]; then
+
+    # The other usage
+    export train_cmd="utils/run.pl"
+    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
+    export cuda_cmd="utils/run.pl"
+    # Used for "*_recog.py"
+    export decode_cmd="utils/run.pl"
+
+# Local machine, without any Job scheduling system
+elif [ "${cmd_backend}" = stdout ]; then
+
+    # The other usage
+    export train_cmd="utils/stdout.pl"
+    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
+    export cuda_cmd="utils/stdout.pl"
+    # Used for "*_recog.py"
+    export decode_cmd="utils/stdout.pl"
+
+# "qsub" (SGE, Torque, PBS, etc.)
+elif [ "${cmd_backend}" = sge ]; then
+    # The default setting is written in conf/queue.conf.
+    # You must change "-q g.q" for the "queue" for your environment.
+    # To know the "queue" names, type "qhost -q"
+    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.
+
+    export train_cmd="utils/queue.pl"
+    export cuda_cmd="utils/queue.pl"
+    export decode_cmd="utils/queue.pl"
+
+# "sbatch" (Slurm)
+elif [ "${cmd_backend}" = slurm ]; then
+    # The default setting is written in conf/slurm.conf.
+    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
+    # To know the "partion" names, type "sinfo".
+    # You can use "--gpu * " by defualt for slurm and it is interpreted as "--gres gpu:*"
+    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
+
+    export train_cmd="utils/slurm.pl"
+    export cuda_cmd="utils/slurm.pl"
+    export decode_cmd="utils/slurm.pl"
+
+elif [ "${cmd_backend}" = ssh ]; then
+    # You have to create ".queue/machines" to specify the host to execute jobs.
+    # e.g. .queue/machines
+    #   host1
+    #   host2
+    #   host3
+    # Assuming you can login them without any password, i.e. You have to set ssh keys.
+
+    export train_cmd="utils/ssh.pl"
+    export cuda_cmd="utils/ssh.pl"
+    export decode_cmd="utils/ssh.pl"
+
+else
+    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
+    return 1
+fi
diff --git a/hifigan/generation.sh b/hifigan/generation.sh
new file mode 100644
index 0000000..3371dca
--- /dev/null
+++ b/hifigan/generation.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. ./cmd.sh
+
+dataset=ljspeech
+expdir=exp/train_hifigan.${dataset}
+
+eval_dir=
+
+. parse_options.sh || exit 1;
+
+checkpoint="$(ls -dt "${expdir}"/*.pkl | head -1 || true)"
+echo $checkpoint
+outdir=$eval_dir/hifigan
+
+# ===========================================
+feat-to-len.py scp:${eval_dir}/feats.scp > ${eval_dir}/utt2num_frames || exit 1
+
+mkdir -p ${outdir}/log
+echo ========== HifiGAN Generation ==========
+
+${cuda_cmd} --gpu 1 "${outdir}/${name}/log/decode.log" \
+    parallel_wavegan/bin/decode.py \
+        --feats-scp $eval_dir/feats.scp \
+        --num-frames $eval_dir/utt2num_frames \
+        --checkpoint "${checkpoint}" \
+        --outdir "${outdir}/wav" \
+        --verbose "1"
+echo "Successfully finished decoding."
+
diff --git a/hifigan/parallel_wavegan/__init__.py b/hifigan/parallel_wavegan/__init__.py
new file mode 100644
index 0000000..28db038
--- /dev/null
+++ b/hifigan/parallel_wavegan/__init__.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+__version__ = "0.5.3"
diff --git a/hifigan/parallel_wavegan/bin/__init__.py b/hifigan/parallel_wavegan/bin/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/hifigan/parallel_wavegan/bin/compute_statistics.py b/hifigan/parallel_wavegan/bin/compute_statistics.py
new file mode 100644
index 0000000..4d06070
--- /dev/null
+++ b/hifigan/parallel_wavegan/bin/compute_statistics.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Calculate statistics of feature files."""
+
+import argparse
+import logging
+import os
+
+import numpy as np
+import yaml
+
+from sklearn.preprocessing import StandardScaler
+from tqdm import tqdm
+
+from parallel_wavegan.datasets import MelDataset
+from parallel_wavegan.datasets import MelSCPDataset
+from parallel_wavegan.utils import read_hdf5
+from parallel_wavegan.utils import write_hdf5
+
+
+def main():
+    """Run preprocessing process."""
+    parser = argparse.ArgumentParser(
+        description="Compute mean and variance of dumped raw features "
+        "(See detail in parallel_wavegan/bin/compute_statistics.py)."
+    )
+    parser.add_argument(
+        "--feats-scp",
+        "--scp",
+        default=None,
+        type=str,
+        help="kaldi-style feats.scp file. "
+        "you need to specify either feats-scp or rootdir.",
+    )
+    parser.add_argument(
+        "--rootdir",
+        type=str,
+        help="directory including feature files. "
+        "you need to specify either feats-scp or rootdir.",
+    )
+    parser.add_argument(
+        "--config",
+        type=str,
+        required=True,
+        help="yaml format configuration file.",
+    )
+    parser.add_argument(
+        "--dumpdir",
+        default=None,
+        type=str,
+        required=True,
+        help="directory to save statistics. if not provided, "
+        "stats will be saved in the above root directory. (default=None)",
+    )
+    parser.add_argument(
+        "--verbose",
+        type=int,
+        default=1,
+        help="logging level. higher is more logging. (default=1)",
+    )
+    args = parser.parse_args()
+
+    # set logger
+    if args.verbose > 1:
+        logging.basicConfig(
+            level=logging.DEBUG,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    elif args.verbose > 0:
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    else:
+        logging.basicConfig(
+            level=logging.WARN,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+        logging.warning("Skip DEBUG/INFO messages")
+
+    # load config
+    with open(args.config) as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+    config.update(vars(args))
+
+    # check arguments
+    if (args.feats_scp is not None and args.rootdir is not None) or (
+        args.feats_scp is None and args.rootdir is None
+    ):
+        raise ValueError("Please specify either --rootdir or --feats-scp.")
+
+    # check directory existence
+    if not os.path.exists(args.dumpdir):
+        os.makedirs(args.dumpdir)
+
+    # get dataset
+    if args.feats_scp is None:
+        if config["format"] == "hdf5":
+            mel_query = "*.h5"
+            mel_load_fn = lambda x: read_hdf5(x, "feats")  # NOQA
+        elif config["format"] == "npy":
+            mel_query = "*-feats.npy"
+            mel_load_fn = np.load
+        else:
+            raise ValueError("support only hdf5 or npy format.")
+        dataset = MelDataset(args.rootdir, mel_query=mel_query, mel_load_fn=mel_load_fn)
+    else:
+        dataset = MelSCPDataset(args.feats_scp)
+    logging.info(f"The number of files = {len(dataset)}.")
+
+    # calculate statistics
+    scaler = StandardScaler()
+    for mel in tqdm(dataset):
+        scaler.partial_fit(mel)
+
+    if config["format"] == "hdf5":
+        write_hdf5(
+            os.path.join(args.dumpdir, "stats.h5"),
+            "mean",
+            scaler.mean_.astype(np.float32),
+        )
+        write_hdf5(
+            os.path.join(args.dumpdir, "stats.h5"),
+            "scale",
+            scaler.scale_.astype(np.float32),
+        )
+    else:
+        stats = np.stack([scaler.mean_, scaler.scale_], axis=0)
+        np.save(
+            os.path.join(args.dumpdir, "stats.npy"),
+            stats.astype(np.float32),
+            allow_pickle=False,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hifigan/parallel_wavegan/bin/decode.py b/hifigan/parallel_wavegan/bin/decode.py
new file mode 100644
index 0000000..c5accc9
--- /dev/null
+++ b/hifigan/parallel_wavegan/bin/decode.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Decode with trained Parallel WaveGAN Generator."""
+
+import argparse
+import logging
+import os
+import time
+
+import numpy as np
+import soundfile as sf
+import torch
+import yaml
+
+from tqdm import tqdm
+
+from parallel_wavegan.datasets import MelDataset
+from parallel_wavegan.datasets import MelSCPDataset
+from parallel_wavegan.utils import load_model
+from parallel_wavegan.utils import read_hdf5
+
+
+def main():
+    """Run decoding process."""
+    parser = argparse.ArgumentParser(
+        description="Decode dumped features with trained Parallel WaveGAN Generator "
+        "(See detail in parallel_wavegan/bin/decode.py)."
+    )
+    parser.add_argument(
+        "--feats-scp",
+        "--scp",
+        default=None,
+        type=str,
+        help="kaldi-style feats.scp file. "
+        "you need to specify either feats-scp or dumpdir.",
+    )
+    parser.add_argument(
+        "--num-frames", 
+        default=None, 
+        type=str
+    )
+    parser.add_argument(
+        "--dumpdir",
+        default=None,
+        type=str,
+        help="directory including feature files. "
+        "you need to specify either feats-scp or dumpdir.",
+    )
+    parser.add_argument(
+        "--outdir",
+        type=str,
+        required=True,
+        help="directory to save generated speech.",
+    )
+    parser.add_argument(
+        "--checkpoint",
+        type=str,
+        required=True,
+        help="checkpoint file to be loaded.",
+    )
+    parser.add_argument(
+        "--config",
+        default=None,
+        type=str,
+        help="yaml format configuration file. if not explicitly provided, "
+        "it will be searched in the checkpoint directory. (default=None)",
+    )
+    parser.add_argument(
+        "--normalize-before",
+        default=False,
+        action="store_true",
+        help="whether to perform feature normalization before input to the model. "
+        "if true, it assumes that the feature is de-normalized. this is useful when "
+        "text2mel model and vocoder use different feature statistics.",
+    )
+    parser.add_argument(
+        "--verbose",
+        type=int,
+        default=1,
+        help="logging level. higher is more logging. (default=1)",
+    )
+    args = parser.parse_args()
+
+    # set logger
+    if args.verbose > 1:
+        logging.basicConfig(
+            level=logging.DEBUG,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    elif args.verbose > 0:
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    else:
+        logging.basicConfig(
+            level=logging.WARN,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+        logging.warning("Skip DEBUG/INFO messages")
+
+    # check directory existence
+    if not os.path.exists(args.outdir):
+        os.makedirs(args.outdir)
+
+    # load config
+    if args.config is None:
+        dirname = os.path.dirname(args.checkpoint)
+        args.config = os.path.join(dirname, "config.yml")
+    with open(args.config) as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+    config.update(vars(args))
+
+    # check arguments
+    if (args.feats_scp is not None and args.dumpdir is not None) or (
+        args.feats_scp is None and args.dumpdir is None
+    ):
+        raise ValueError("Please specify either --dumpdir or --feats-scp.")
+
+    # get dataset
+    if args.dumpdir is not None:
+        if config["format"] == "hdf5":
+            mel_query = "*.h5"
+            mel_load_fn = lambda x: read_hdf5(x, "feats")  # NOQA
+        elif config["format"] == "npy":
+            mel_query = "*-feats.npy"
+            mel_load_fn = np.load
+        else:
+            raise ValueError("Support only hdf5 or npy format.")
+        dataset = MelDataset(
+            args.dumpdir,
+            mel_query=mel_query,
+            mel_load_fn=mel_load_fn,
+            return_utt_id=True,
+        )
+    else:
+        dataset = MelSCPDataset(
+            feats_scp=args.feats_scp,
+            utt2num_frames=args.num_frames,
+            return_utt_id=True,
+        )
+    logging.info(f"The number of features to be decoded = {len(dataset)}.")
+
+    # setup model
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        logging.info("Using GPU.")
+    else:
+        device = torch.device("cpu")
+        logging.info("Using CPU.")
+    model = load_model(args.checkpoint, config)
+    logging.info(f"Loaded model parameters from {args.checkpoint}.")
+    if args.normalize_before:
+        assert hasattr(model, "mean"), "Feature stats are not registered."
+        assert hasattr(model, "scale"), "Feature stats are not registered."
+    model.remove_weight_norm()
+    model = model.eval().to(device)
+
+    print(f"Param num: {sum([p.numel() for p in model.parameters()])}")
+
+    # start generation
+    total_rtf = 0.0
+    with torch.no_grad(), tqdm(dataset, desc="[decode]") as pbar:
+        for idx, (utt_id, c) in enumerate(pbar, 1):
+            # generate
+            c = torch.tensor(c, dtype=torch.float).to(device)
+            start = time.time()
+            y = model.inference(c, normalize_before=args.normalize_before).view(-1)
+            rtf = (time.time() - start) / (len(y) / config["sampling_rate"])
+            pbar.set_postfix({"RTF": rtf})
+            total_rtf += rtf
+
+            # save as PCM 16 bit wav file
+            sf.write(
+                os.path.join(config["outdir"], f"{utt_id}.wav"),
+                y.cpu().numpy(),
+                config["sampling_rate"],
+                "PCM_16",
+            )
+
+    # report average RTF
+    logging.info(
+        f"Finished generation of {idx} utterances (RTF = {total_rtf / idx:.03f})."
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hifigan/parallel_wavegan/bin/normalize.py b/hifigan/parallel_wavegan/bin/normalize.py
new file mode 100644
index 0000000..53644fd
--- /dev/null
+++ b/hifigan/parallel_wavegan/bin/normalize.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Normalize feature files and dump them."""
+
+import argparse
+import logging
+import os
+
+import numpy as np
+import yaml
+
+from sklearn.preprocessing import StandardScaler
+from tqdm import tqdm
+
+from parallel_wavegan.datasets import AudioMelDataset
+from parallel_wavegan.datasets import AudioMelSCPDataset
+from parallel_wavegan.datasets import MelDataset
+from parallel_wavegan.datasets import MelSCPDataset
+from parallel_wavegan.utils import read_hdf5
+from parallel_wavegan.utils import write_hdf5
+
+
+def main():
+    """Run preprocessing process."""
+    parser = argparse.ArgumentParser(
+        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
+    )
+    parser.add_argument(
+        "--rootdir",
+        default=None,
+        type=str,
+        help="directory including feature files to be normalized. "
+        "you need to specify either *-scp or rootdir.",
+    )
+    parser.add_argument(
+        "--wav-scp",
+        default=None,
+        type=str,
+        help="kaldi-style wav.scp file. "
+        "you need to specify either *-scp or rootdir.",
+    )
+    parser.add_argument(
+        "--feats-scp",
+        default=None,
+        type=str,
+        help="kaldi-style feats.scp file. "
+        "you need to specify either *-scp or rootdir.",
+    )
+    parser.add_argument(
+        "--segments",
+        default=None,
+        type=str,
+        help="kaldi-style segments file.",
+    )
+    parser.add_argument(
+        "--dumpdir",
+        type=str,
+        required=True,
+        help="directory to dump normalized feature files.",
+    )
+    parser.add_argument(
+        "--stats",
+        type=str,
+        required=True,
+        help="statistics file.",
+    )
+    parser.add_argument(
+        "--skip-wav-copy",
+        default=False,
+        action="store_true",
+        help="whether to skip the copy of wav files.",
+    )
+    parser.add_argument(
+        "--config", type=str, required=True, help="yaml format configuration file."
+    )
+    parser.add_argument(
+        "--verbose",
+        type=int,
+        default=1,
+        help="logging level. higher is more logging. (default=1)",
+    )
+    args = parser.parse_args()
+
+    # set logger
+    if args.verbose > 1:
+        logging.basicConfig(
+            level=logging.DEBUG,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    elif args.verbose > 0:
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    else:
+        logging.basicConfig(
+            level=logging.WARN,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+        logging.warning("Skip DEBUG/INFO messages")
+
+    # load config
+    with open(args.config) as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+    config.update(vars(args))
+
+    # check arguments
+    if (args.feats_scp is not None and args.rootdir is not None) or (
+        args.feats_scp is None and args.rootdir is None
+    ):
+        raise ValueError("Please specify either --rootdir or --feats-scp.")
+
+    # check directory existence
+    if not os.path.exists(args.dumpdir):
+        os.makedirs(args.dumpdir)
+
+    # get dataset
+    if args.rootdir is not None:
+        if config["format"] == "hdf5":
+            audio_query, mel_query = "*.h5", "*.h5"
+            audio_load_fn = lambda x: read_hdf5(x, "wave")  # NOQA
+            mel_load_fn = lambda x: read_hdf5(x, "feats")  # NOQA
+        elif config["format"] == "npy":
+            audio_query, mel_query = "*-wave.npy", "*-feats.npy"
+            audio_load_fn = np.load
+            mel_load_fn = np.load
+        else:
+            raise ValueError("support only hdf5 or npy format.")
+        if not args.skip_wav_copy:
+            dataset = AudioMelDataset(
+                root_dir=args.rootdir,
+                audio_query=audio_query,
+                mel_query=mel_query,
+                audio_load_fn=audio_load_fn,
+                mel_load_fn=mel_load_fn,
+                return_utt_id=True,
+            )
+        else:
+            dataset = MelDataset(
+                root_dir=args.rootdir,
+                mel_query=mel_query,
+                mel_load_fn=mel_load_fn,
+                return_utt_id=True,
+            )
+    else:
+        if not args.skip_wav_copy:
+            dataset = AudioMelSCPDataset(
+                wav_scp=args.wav_scp,
+                feats_scp=args.feats_scp,
+                segments=args.segments,
+                return_utt_id=True,
+            )
+        else:
+            dataset = MelSCPDataset(
+                feats_scp=args.feats_scp,
+                return_utt_id=True,
+            )
+    logging.info(f"The number of files = {len(dataset)}.")
+
+    # restore scaler
+    scaler = StandardScaler()
+    if config["format"] == "hdf5":
+        scaler.mean_ = read_hdf5(args.stats, "mean")
+        scaler.scale_ = read_hdf5(args.stats, "scale")
+    elif config["format"] == "npy":
+        scaler.mean_ = np.load(args.stats)[0]
+        scaler.scale_ = np.load(args.stats)[1]
+    else:
+        raise ValueError("support only hdf5 or npy format.")
+    # from version 0.23.0, this information is needed
+    scaler.n_features_in_ = scaler.mean_.shape[0]
+
+    # process each file
+    for items in tqdm(dataset):
+        if not args.skip_wav_copy:
+            utt_id, audio, mel = items
+        else:
+            utt_id, mel = items
+
+        # normalize
+        mel = scaler.transform(mel)
+
+        # save
+        if config["format"] == "hdf5":
+            write_hdf5(
+                os.path.join(args.dumpdir, f"{utt_id}.h5"),
+                "feats",
+                mel.astype(np.float32),
+            )
+            if not args.skip_wav_copy:
+                write_hdf5(
+                    os.path.join(args.dumpdir, f"{utt_id}.h5"),
+                    "wave",
+                    audio.astype(np.float32),
+                )
+        elif config["format"] == "npy":
+            np.save(
+                os.path.join(args.dumpdir, f"{utt_id}-feats.npy"),
+                mel.astype(np.float32),
+                allow_pickle=False,
+            )
+            if not args.skip_wav_copy:
+                np.save(
+                    os.path.join(args.dumpdir, f"{utt_id}-wave.npy"),
+                    audio.astype(np.float32),
+                    allow_pickle=False,
+                )
+        else:
+            raise ValueError("support only hdf5 or npy format.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hifigan/parallel_wavegan/bin/preprocess.py b/hifigan/parallel_wavegan/bin/preprocess.py
new file mode 100644
index 0000000..b1cee74
--- /dev/null
+++ b/hifigan/parallel_wavegan/bin/preprocess.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Perform preprocessing and raw feature extraction."""
+
+import argparse
+import logging
+import os
+
+import librosa
+import numpy as np
+import soundfile as sf
+import yaml
+
+from tqdm import tqdm
+
+from parallel_wavegan.datasets import AudioDataset
+from parallel_wavegan.datasets import AudioSCPDataset
+from parallel_wavegan.utils import write_hdf5
+
+
+def logmelfilterbank(
+    audio,
+    sampling_rate,
+    fft_size=1024,
+    hop_size=256,
+    win_length=None,
+    window="hann",
+    num_mels=80,
+    fmin=None,
+    fmax=None,
+    eps=1e-10,
+    log_base=10.0,
+):
+    """Compute log-Mel filterbank feature.
+
+    Args:
+        audio (ndarray): Audio signal (T,).
+        sampling_rate (int): Sampling rate.
+        fft_size (int): FFT size.
+        hop_size (int): Hop size.
+        win_length (int): Window length. If set to None, it will be the same as fft_size.
+        window (str): Window function type.
+        num_mels (int): Number of mel basis.
+        fmin (int): Minimum frequency in mel basis calculation.
+        fmax (int): Maximum frequency in mel basis calculation.
+        eps (float): Epsilon value to avoid inf in log calculation.
+        log_base (float): Log base. If set to None, use np.log.
+
+    Returns:
+        ndarray: Log Mel filterbank feature (#frames, num_mels).
+
+    """
+    # get amplitude spectrogram
+    x_stft = librosa.stft(
+        audio,
+        n_fft=fft_size,
+        hop_length=hop_size,
+        win_length=win_length,
+        window=window,
+        pad_mode="reflect",
+    )
+    spc = np.abs(x_stft).T  # (#frames, #bins)
+
+    # get mel basis
+    fmin = 0 if fmin is None else fmin
+    fmax = sampling_rate / 2 if fmax is None else fmax
+    mel_basis = librosa.filters.mel(sampling_rate, fft_size, num_mels, fmin, fmax)
+    mel = np.maximum(eps, np.dot(spc, mel_basis.T))
+
+    if log_base is None:
+        return np.log(mel)
+    elif log_base == 10.0:
+        return np.log10(mel)
+    elif log_base == 2.0:
+        return np.log2(mel)
+    else:
+        raise ValueError(f"{log_base} is not supported.")
+
+
+def main():
+    """Run preprocessing process."""
+    parser = argparse.ArgumentParser(
+        description="Preprocess audio and then extract features (See detail in parallel_wavegan/bin/preprocess.py)."
+    )
+    parser.add_argument(
+        "--wav-scp",
+        "--scp",
+        default=None,
+        type=str,
+        help="kaldi-style wav.scp file. you need to specify either scp or rootdir.",
+    )
+    parser.add_argument(
+        "--segments",
+        default=None,
+        type=str,
+        help="kaldi-style segments file. if use, you must to specify both scp and segments.",
+    )
+    parser.add_argument(
+        "--rootdir",
+        default=None,
+        type=str,
+        help="directory including wav files. you need to specify either scp or rootdir.",
+    )
+    parser.add_argument(
+        "--dumpdir",
+        type=str,
+        required=True,
+        help="directory to dump feature files.",
+    )
+    parser.add_argument(
+        "--config",
+        type=str,
+        required=True,
+        help="yaml format configuration file.",
+    )
+    parser.add_argument(
+        "--verbose",
+        type=int,
+        default=1,
+        help="logging level. higher is more logging. (default=1)",
+    )
+    args = parser.parse_args()
+
+    # set logger
+    if args.verbose > 1:
+        logging.basicConfig(
+            level=logging.DEBUG,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    elif args.verbose > 0:
+        logging.basicConfig(
+            level=logging.INFO,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    else:
+        logging.basicConfig(
+            level=logging.WARN,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+        logging.warning("Skip DEBUG/INFO messages")
+
+    # load config
+    with open(args.config) as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+    config.update(vars(args))
+
+    # check arguments
+    if (args.wav_scp is not None and args.rootdir is not None) or (
+        args.wav_scp is None and args.rootdir is None
+    ):
+        raise ValueError("Please specify either --rootdir or --wav-scp.")
+
+    # get dataset
+    if args.rootdir is not None:
+        dataset = AudioDataset(
+            args.rootdir,
+            "*.wav",
+            audio_load_fn=sf.read,
+            return_utt_id=True,
+        )
+    else:
+        dataset = AudioSCPDataset(
+            args.wav_scp,
+            segments=args.segments,
+            return_utt_id=True,
+            return_sampling_rate=True,
+        )
+
+    # check directly existence
+    if not os.path.exists(args.dumpdir):
+        os.makedirs(args.dumpdir, exist_ok=True)
+
+    # process each data
+    for utt_id, (audio, fs) in tqdm(dataset):
+        # check
+        assert len(audio.shape) == 1, f"{utt_id} seems to be multi-channel signal."
+        assert (
+            np.abs(audio).max() <= 1.0
+        ), f"{utt_id} seems to be different from 16 bit PCM."
+        assert (
+            fs == config["sampling_rate"]
+        ), f"{utt_id} seems to have a different sampling rate."
+
+        # trim silence
+        if config["trim_silence"]:
+            audio, _ = librosa.effects.trim(
+                audio,
+                top_db=config["trim_threshold_in_db"],
+                frame_length=config["trim_frame_size"],
+                hop_length=config["trim_hop_size"],
+            )
+
+        if "sampling_rate_for_feats" not in config:
+            x = audio
+            sampling_rate = config["sampling_rate"]
+            hop_size = config["hop_size"]
+        else:
+            # NOTE(kan-bayashi): this procedure enables to train the model with different
+            #   sampling rate for feature and audio, e.g., training with mel extracted
+            #   using 16 kHz audio and 24 kHz audio as a target waveform
+            x = librosa.resample(audio, fs, config["sampling_rate_for_feats"])
+            sampling_rate = config["sampling_rate_for_feats"]
+            assert (
+                config["hop_size"] * config["sampling_rate_for_feats"] % fs == 0
+            ), "hop_size must be int value. please check sampling_rate_for_feats is correct."
+            hop_size = config["hop_size"] * config["sampling_rate_for_feats"] // fs
+
+        # extract feature
+        mel = logmelfilterbank(
+            x,
+            sampling_rate=sampling_rate,
+            hop_size=hop_size,
+            fft_size=config["fft_size"],
+            win_length=config["win_length"],
+            window=config["window"],
+            num_mels=config["num_mels"],
+            fmin=config["fmin"],
+            fmax=config["fmax"],
+            # keep compatibility
+            log_base=config.get("log_base", 10.0),
+        )
+
+        # make sure the audio length and feature length are matched
+        audio = np.pad(audio, (0, config["fft_size"]), mode="reflect")
+        audio = audio[: len(mel) * config["hop_size"]]
+        assert len(mel) * config["hop_size"] == len(audio)
+
+        # apply global gain
+        if config["global_gain_scale"] > 0.0:
+            audio *= config["global_gain_scale"]
+        if np.abs(audio).max() >= 1.0:
+            logging.warn(
+                f"{utt_id} causes clipping. "
+                f"it is better to re-consider global gain scale."
+            )
+            continue
+
+        # save
+        if config["format"] == "hdf5":
+            write_hdf5(
+                os.path.join(args.dumpdir, f"{utt_id}.h5"),
+                "wave",
+                audio.astype(np.float32),
+            )
+            write_hdf5(
+                os.path.join(args.dumpdir, f"{utt_id}.h5"),
+                "feats",
+                mel.astype(np.float32),
+            )
+        elif config["format"] == "npy":
+            np.save(
+                os.path.join(args.dumpdir, f"{utt_id}-wave.npy"),
+                audio.astype(np.float32),
+                allow_pickle=False,
+            )
+            np.save(
+                os.path.join(args.dumpdir, f"{utt_id}-feats.npy"),
+                mel.astype(np.float32),
+                allow_pickle=False,
+            )
+        else:
+            raise ValueError("support only hdf5 or npy format.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hifigan/parallel_wavegan/bin/train.py b/hifigan/parallel_wavegan/bin/train.py
new file mode 100644
index 0000000..075e790
--- /dev/null
+++ b/hifigan/parallel_wavegan/bin/train.py
@@ -0,0 +1,1110 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Train Parallel WaveGAN."""
+
+import argparse
+import logging
+import os
+import sys
+
+from collections import defaultdict
+
+import matplotlib
+import numpy as np
+import soundfile as sf
+import torch
+import yaml
+
+from tensorboardX import SummaryWriter
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
+import parallel_wavegan
+import parallel_wavegan.models
+import parallel_wavegan.optimizers
+
+from parallel_wavegan.datasets import AudioMelDataset
+from parallel_wavegan.datasets import AudioMelSCPDataset
+from parallel_wavegan.layers import PQMF
+from parallel_wavegan.losses import DiscriminatorAdversarialLoss
+from parallel_wavegan.losses import FeatureMatchLoss
+from parallel_wavegan.losses import GeneratorAdversarialLoss
+from parallel_wavegan.losses import MelSpectrogramLoss
+from parallel_wavegan.losses import MultiResolutionSTFTLoss
+from parallel_wavegan.utils import read_hdf5
+
+# set to avoid matplotlib error in CLI environment
+matplotlib.use("Agg")
+
+
+class Trainer(object):
+    """Customized trainer module for Parallel WaveGAN training."""
+
+    def __init__(
+        self,
+        steps,
+        epochs,
+        data_loader,
+        sampler,
+        model,
+        criterion,
+        optimizer,
+        scheduler,
+        config,
+        device=torch.device("cpu"),
+    ):
+        """Initialize trainer.
+
+        Args:
+            steps (int): Initial global steps.
+            epochs (int): Initial global epochs.
+            data_loader (dict): Dict of data loaders. It must contrain "train" and "dev" loaders.
+            model (dict): Dict of models. It must contrain "generator" and "discriminator" models.
+            criterion (dict): Dict of criterions. It must contrain "stft" and "mse" criterions.
+            optimizer (dict): Dict of optimizers. It must contrain "generator" and "discriminator" optimizers.
+            scheduler (dict): Dict of schedulers. It must contrain "generator" and "discriminator" schedulers.
+            config (dict): Config dict loaded from yaml format configuration file.
+            device (torch.deive): Pytorch device instance.
+
+        """
+        self.steps = steps
+        self.epochs = epochs
+        self.data_loader = data_loader
+        self.sampler = sampler
+        self.model = model
+        self.criterion = criterion
+        self.optimizer = optimizer
+        self.scheduler = scheduler
+        self.config = config
+        self.device = device
+        self.writer = SummaryWriter(config["outdir"])
+        self.finish_train = False
+        self.total_train_loss = defaultdict(float)
+        self.total_eval_loss = defaultdict(float)
+
+    def run(self):
+        """Run training."""
+        self.tqdm = tqdm(
+            initial=self.steps, total=self.config["train_max_steps"], desc="[train]"
+        )
+        while True:
+            # train one epoch
+            self._train_epoch()
+
+            # check whether training is finished
+            if self.finish_train:
+                break
+
+        self.tqdm.close()
+        logging.info("Finished training.")
+
+    def save_checkpoint(self, checkpoint_path):
+        """Save checkpoint.
+
+        Args:
+            checkpoint_path (str): Checkpoint path to be saved.
+
+        """
+        state_dict = {
+            "optimizer": {
+                "generator": self.optimizer["generator"].state_dict(),
+                "discriminator": self.optimizer["discriminator"].state_dict(),
+            },
+            "scheduler": {
+                "generator": self.scheduler["generator"].state_dict(),
+                "discriminator": self.scheduler["discriminator"].state_dict(),
+            },
+            "steps": self.steps,
+            "epochs": self.epochs,
+        }
+        if self.config["distributed"]:
+            state_dict["model"] = {
+                "generator": self.model["generator"].module.state_dict(),
+                "discriminator": self.model["discriminator"].module.state_dict(),
+            }
+        else:
+            state_dict["model"] = {
+                "generator": self.model["generator"].state_dict(),
+                "discriminator": self.model["discriminator"].state_dict(),
+            }
+
+        if not os.path.exists(os.path.dirname(checkpoint_path)):
+            os.makedirs(os.path.dirname(checkpoint_path))
+        torch.save(state_dict, checkpoint_path)
+
+    def load_checkpoint(self, checkpoint_path, load_only_params=False):
+        """Load checkpoint.
+
+        Args:
+            checkpoint_path (str): Checkpoint path to be loaded.
+            load_only_params (bool): Whether to load only model parameters.
+
+        """
+        state_dict = torch.load(checkpoint_path, map_location="cpu")
+        if self.config["distributed"]:
+            self.model["generator"].module.load_state_dict(
+                state_dict["model"]["generator"]
+            )
+            self.model["discriminator"].module.load_state_dict(
+                state_dict["model"]["discriminator"]
+            )
+        else:
+            self.model["generator"].load_state_dict(state_dict["model"]["generator"])
+            self.model["discriminator"].load_state_dict(
+                state_dict["model"]["discriminator"]
+            )
+        if not load_only_params:
+            self.steps = state_dict["steps"]
+            self.epochs = state_dict["epochs"]
+            self.optimizer["generator"].load_state_dict(
+                state_dict["optimizer"]["generator"]
+            )
+            self.optimizer["discriminator"].load_state_dict(
+                state_dict["optimizer"]["discriminator"]
+            )
+            self.scheduler["generator"].load_state_dict(
+                state_dict["scheduler"]["generator"]
+            )
+            self.scheduler["discriminator"].load_state_dict(
+                state_dict["scheduler"]["discriminator"]
+            )
+
+    def _train_step(self, batch):
+        """Train model one step."""
+        # parse batch
+        x, y = batch
+        x = tuple([x_.to(self.device) for x_ in x])
+        y = y.to(self.device)
+
+        #######################
+        #      Generator      #
+        #######################
+        if self.steps > self.config.get("generator_train_start_steps", 0):
+            y_ = self.model["generator"](*x)
+
+            # reconstruct the signal from multi-band signal
+            if self.config["generator_params"]["out_channels"] > 1:
+                y_mb_ = y_
+                y_ = self.criterion["pqmf"].synthesis(y_mb_)
+
+            # initialize
+            gen_loss = 0.0
+
+            # multi-resolution sfft loss
+            if self.config["use_stft_loss"]:
+                sc_loss, mag_loss = self.criterion["stft"](y_, y)
+                gen_loss += sc_loss + mag_loss
+                self.total_train_loss[
+                    "train/spectral_convergence_loss"
+                ] += sc_loss.item()
+                self.total_train_loss[
+                    "train/log_stft_magnitude_loss"
+                ] += mag_loss.item()
+
+            # subband multi-resolution stft loss
+            if self.config["use_subband_stft_loss"]:
+                gen_loss *= 0.5  # for balancing with subband stft loss
+                y_mb = self.criterion["pqmf"].analysis(y)
+                sub_sc_loss, sub_mag_loss = self.criterion["sub_stft"](y_mb_, y_mb)
+                gen_loss += 0.5 * (sub_sc_loss + sub_mag_loss)
+                self.total_train_loss[
+                    "train/sub_spectral_convergence_loss"
+                ] += sub_sc_loss.item()
+                self.total_train_loss[
+                    "train/sub_log_stft_magnitude_loss"
+                ] += sub_mag_loss.item()
+
+            # mel spectrogram loss
+            if self.config["use_mel_loss"]:
+                mel_loss = self.criterion["mel"](y_, y)
+                gen_loss += mel_loss
+                self.total_train_loss["train/mel_loss"] += mel_loss.item()
+
+            # weighting aux loss
+            gen_loss *= self.config.get("lambda_aux", 1.0)
+
+            # adversarial loss
+            if self.steps > self.config["discriminator_train_start_steps"]:
+                p_ = self.model["discriminator"](y_)
+                adv_loss = self.criterion["gen_adv"](p_)
+                self.total_train_loss["train/adversarial_loss"] += adv_loss.item()
+
+                # feature matching loss
+                if self.config["use_feat_match_loss"]:
+                    # no need to track gradients
+                    with torch.no_grad():
+                        p = self.model["discriminator"](y)
+                    fm_loss = self.criterion["feat_match"](p_, p)
+                    self.total_train_loss[
+                        "train/feature_matching_loss"
+                    ] += fm_loss.item()
+                    adv_loss += self.config["lambda_feat_match"] * fm_loss
+
+                # add adversarial loss to generator loss
+                gen_loss += self.config["lambda_adv"] * adv_loss
+
+            self.total_train_loss["train/generator_loss"] += gen_loss.item()
+
+            # update generator
+            self.optimizer["generator"].zero_grad()
+            gen_loss.backward()
+            if self.config["generator_grad_norm"] > 0:
+                torch.nn.utils.clip_grad_norm_(
+                    self.model["generator"].parameters(),
+                    self.config["generator_grad_norm"],
+                )
+            self.optimizer["generator"].step()
+            self.scheduler["generator"].step()
+
+        #######################
+        #    Discriminator    #
+        #######################
+        if self.steps > self.config["discriminator_train_start_steps"]:
+            # re-compute y_ which leads better quality
+            with torch.no_grad():
+                y_ = self.model["generator"](*x)
+            if self.config["generator_params"]["out_channels"] > 1:
+                y_ = self.criterion["pqmf"].synthesis(y_)
+
+            # discriminator loss
+            p = self.model["discriminator"](y)
+            p_ = self.model["discriminator"](y_.detach())
+            real_loss, fake_loss = self.criterion["dis_adv"](p_, p)
+            dis_loss = real_loss + fake_loss
+            self.total_train_loss["train/real_loss"] += real_loss.item()
+            self.total_train_loss["train/fake_loss"] += fake_loss.item()
+            self.total_train_loss["train/discriminator_loss"] += dis_loss.item()
+
+            # update discriminator
+            self.optimizer["discriminator"].zero_grad()
+            dis_loss.backward()
+            if self.config["discriminator_grad_norm"] > 0:
+                torch.nn.utils.clip_grad_norm_(
+                    self.model["discriminator"].parameters(),
+                    self.config["discriminator_grad_norm"],
+                )
+            self.optimizer["discriminator"].step()
+            self.scheduler["discriminator"].step()
+
+        # update counts
+        self.steps += 1
+        self.tqdm.update(1)
+        self._check_train_finish()
+
+    def _train_epoch(self):
+        """Train model one epoch."""
+        for train_steps_per_epoch, batch in enumerate(self.data_loader["train"], 1):
+            # train one step
+            self._train_step(batch)
+
+            # check interval
+            if self.config["rank"] == 0:
+                self._check_log_interval()
+                self._check_eval_interval()
+                self._check_save_interval()
+
+            # check whether training is finished
+            if self.finish_train:
+                return
+
+        # update
+        self.epochs += 1
+        self.train_steps_per_epoch = train_steps_per_epoch
+        logging.info(
+            f"(Steps: {self.steps}) Finished {self.epochs} epoch training "
+            f"({self.train_steps_per_epoch} steps per epoch)."
+        )
+
+        # needed for shuffle in distributed training
+        if self.config["distributed"]:
+            self.sampler["train"].set_epoch(self.epochs)
+
+    @torch.no_grad()
+    def _eval_step(self, batch):
+        """Evaluate model one step."""
+        # parse batch
+        x, y = batch
+        x = tuple([x_.to(self.device) for x_ in x])
+        y = y.to(self.device)
+
+        #######################
+        #      Generator      #
+        #######################
+        y_ = self.model["generator"](*x)
+        if self.config["generator_params"]["out_channels"] > 1:
+            y_mb_ = y_
+            y_ = self.criterion["pqmf"].synthesis(y_mb_)
+
+        # initialize
+        aux_loss = 0.0
+
+        # multi-resolution stft loss
+        if self.config["use_stft_loss"]:
+            sc_loss, mag_loss = self.criterion["stft"](y_, y)
+            aux_loss += sc_loss + mag_loss
+            self.total_eval_loss["eval/spectral_convergence_loss"] += sc_loss.item()
+            self.total_eval_loss["eval/log_stft_magnitude_loss"] += mag_loss.item()
+
+        # subband multi-resolution stft loss
+        if self.config.get("use_subband_stft_loss", False):
+            aux_loss *= 0.5  # for balancing with subband stft loss
+            y_mb = self.criterion["pqmf"].analysis(y)
+            sub_sc_loss, sub_mag_loss = self.criterion["sub_stft"](y_mb_, y_mb)
+            self.total_eval_loss[
+                "eval/sub_spectral_convergence_loss"
+            ] += sub_sc_loss.item()
+            self.total_eval_loss[
+                "eval/sub_log_stft_magnitude_loss"
+            ] += sub_mag_loss.item()
+            aux_loss += 0.5 * (sub_sc_loss + sub_mag_loss)
+
+        # mel spectrogram loss
+        if self.config["use_mel_loss"]:
+            mel_loss = self.criterion["mel"](y_, y)
+            aux_loss += mel_loss
+            self.total_eval_loss["eval/mel_loss"] += mel_loss.item()
+
+        # weighting stft loss
+        aux_loss *= self.config.get("lambda_aux", 1.0)
+
+        # adversarial loss
+        p_ = self.model["discriminator"](y_)
+        adv_loss = self.criterion["gen_adv"](p_)
+        gen_loss = aux_loss + self.config["lambda_adv"] * adv_loss
+
+        # feature matching loss
+        if self.config["use_feat_match_loss"]:
+            p = self.model["discriminator"](y)
+            fm_loss = self.criterion["feat_match"](p_, p)
+            self.total_eval_loss["eval/feature_matching_loss"] += fm_loss.item()
+            gen_loss += (
+                self.config["lambda_adv"] * self.config["lambda_feat_match"] * fm_loss
+            )
+
+        #######################
+        #    Discriminator    #
+        #######################
+        p = self.model["discriminator"](y)
+        p_ = self.model["discriminator"](y_)
+
+        # discriminator loss
+        real_loss, fake_loss = self.criterion["dis_adv"](p_, p)
+        dis_loss = real_loss + fake_loss
+
+        # add to total eval loss
+        self.total_eval_loss["eval/adversarial_loss"] += adv_loss.item()
+        self.total_eval_loss["eval/generator_loss"] += gen_loss.item()
+        self.total_eval_loss["eval/real_loss"] += real_loss.item()
+        self.total_eval_loss["eval/fake_loss"] += fake_loss.item()
+        self.total_eval_loss["eval/discriminator_loss"] += dis_loss.item()
+
+    def _eval_epoch(self):
+        """Evaluate model one epoch."""
+        logging.info(f"(Steps: {self.steps}) Start evaluation.")
+        # change mode
+        for key in self.model.keys():
+            self.model[key].eval()
+
+        # calculate loss for each batch
+        for eval_steps_per_epoch, batch in enumerate(
+            tqdm(self.data_loader["dev"], desc="[eval]"), 1
+        ):
+            # eval one step
+            self._eval_step(batch)
+
+            # save intermediate result
+            if eval_steps_per_epoch == 1:
+                self._genearete_and_save_intermediate_result(batch)
+
+        logging.info(
+            f"(Steps: {self.steps}) Finished evaluation "
+            f"({eval_steps_per_epoch} steps per epoch)."
+        )
+
+        # average loss
+        for key in self.total_eval_loss.keys():
+            self.total_eval_loss[key] /= eval_steps_per_epoch
+            logging.info(
+                f"(Steps: {self.steps}) {key} = {self.total_eval_loss[key]:.4f}."
+            )
+
+        # record
+        self._write_to_tensorboard(self.total_eval_loss)
+
+        # reset
+        self.total_eval_loss = defaultdict(float)
+
+        # restore mode
+        for key in self.model.keys():
+            self.model[key].train()
+
+    @torch.no_grad()
+    def _genearete_and_save_intermediate_result(self, batch):
+        """Generate and save intermediate result."""
+        # delayed import to avoid error related backend error
+        import matplotlib.pyplot as plt
+
+        # generate
+        x_batch, y_batch = batch
+        x_batch = tuple([x.to(self.device) for x in x_batch])
+        y_batch = y_batch.to(self.device)
+        y_batch_ = self.model["generator"](*x_batch)
+        if self.config["generator_params"]["out_channels"] > 1:
+            y_batch_ = self.criterion["pqmf"].synthesis(y_batch_)
+
+        # check directory
+        dirname = os.path.join(self.config["outdir"], f"predictions/{self.steps}steps")
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+
+        for idx, (y, y_) in enumerate(zip(y_batch, y_batch_), 1):
+            # convert to ndarray
+            y, y_ = y.view(-1).cpu().numpy(), y_.view(-1).cpu().numpy()
+
+            # plot figure and save it
+            figname = os.path.join(dirname, f"{idx}.png")
+            plt.subplot(2, 1, 1)
+            plt.plot(y)
+            plt.title("groundtruth speech")
+            plt.subplot(2, 1, 2)
+            plt.plot(y_)
+            plt.title(f"generated speech @ {self.steps} steps")
+            plt.tight_layout()
+            plt.savefig(figname)
+            plt.close()
+
+            # save as wavfile
+            y = np.clip(y, -1, 1)
+            y_ = np.clip(y_, -1, 1)
+            sf.write(
+                figname.replace(".png", "_ref.wav"),
+                y,
+                self.config["sampling_rate"],
+                "PCM_16",
+            )
+            sf.write(
+                figname.replace(".png", "_gen.wav"),
+                y_,
+                self.config["sampling_rate"],
+                "PCM_16",
+            )
+
+            if idx >= self.config["num_save_intermediate_results"]:
+                break
+
+    def _write_to_tensorboard(self, loss):
+        """Write to tensorboard."""
+        for key, value in loss.items():
+            self.writer.add_scalar(key, value, self.steps)
+
+    def _check_save_interval(self):
+        if self.steps % self.config["save_interval_steps"] == 0:
+            self.save_checkpoint(
+                os.path.join(self.config["outdir"], f"checkpoint-{self.steps}steps.pkl")
+            )
+            logging.info(f"Successfully saved checkpoint @ {self.steps} steps.")
+
+    def _check_eval_interval(self):
+        if self.steps % self.config["eval_interval_steps"] == 0:
+            self._eval_epoch()
+
+    def _check_log_interval(self):
+        if self.steps % self.config["log_interval_steps"] == 0:
+            for key in self.total_train_loss.keys():
+                self.total_train_loss[key] /= self.config["log_interval_steps"]
+                logging.info(
+                    f"(Steps: {self.steps}) {key} = {self.total_train_loss[key]:.4f}."
+                )
+            self._write_to_tensorboard(self.total_train_loss)
+
+            # reset
+            self.total_train_loss = defaultdict(float)
+
+    def _check_train_finish(self):
+        if self.steps >= self.config["train_max_steps"]:
+            self.finish_train = True
+
+
+class Collater(object):
+    """Customized collater for Pytorch DataLoader in training."""
+
+    def __init__(
+        self,
+        batch_max_steps=20480,
+        hop_size=256,
+        win_length=1024,
+        aux_context_window=2,
+        use_noise_input=False,
+    ):
+        """Initialize customized collater for PyTorch DataLoader.
+
+        Args:
+            batch_max_steps (int): The maximum length of input signal in batch.
+            hop_size (int): Hop size of auxiliary features.
+            aux_context_window (int): Context window size for auxiliary feature conv.
+            use_noise_input (bool): Whether to use noise input.
+
+        """
+        if batch_max_steps % hop_size != 0:
+            batch_max_steps += -(batch_max_steps % hop_size)
+        assert batch_max_steps % hop_size == 0
+        self.batch_max_steps = batch_max_steps
+        self.batch_max_frames = batch_max_steps // hop_size
+        self.hop_size = hop_size
+        self.win_length = win_length
+        self.aux_context_window = aux_context_window
+        self.use_noise_input = use_noise_input
+
+        # set useful values in random cutting
+        self.start_offset = aux_context_window
+        self.end_offset = -(self.batch_max_frames + aux_context_window)
+        self.mel_threshold = self.batch_max_frames + 2 * aux_context_window
+
+    def __call__(self, batch):
+        """Convert into batch tensors.
+
+        Args:
+            batch (list): list of tuple of the pair of audio and features.
+
+        Returns:
+            Tensor: Gaussian noise batch (B, 1, T).
+            Tensor: Auxiliary feature batch (B, C, T'), where
+                T = (T' - 2 * aux_context_window) * hop_size.
+            Tensor: Target signal batch (B, 1, T).
+
+        """
+        # check length
+        batch = [
+            self._adjust_length(*b) for b in batch if len(b[1]) > self.mel_threshold
+        ]
+        xs, cs = [b[0] for b in batch], [b[1] for b in batch]
+
+        # make batch with random cut
+        c_lengths = [len(c) for c in cs]
+        start_frames = np.array(
+            [
+                np.random.randint(self.start_offset, cl + self.end_offset)
+                for cl in c_lengths
+            ]
+        )
+        x_starts = start_frames * self.hop_size
+        x_ends = x_starts + self.batch_max_steps
+        c_starts = start_frames - self.aux_context_window
+        c_ends = start_frames + self.batch_max_frames + self.aux_context_window
+        y_batch = np.asarray([x[start:end] for x, start, end in zip(xs, x_starts, x_ends)])
+        c_batch = np.asarray([c[start:end] for c, start, end in zip(cs, c_starts, c_ends)])
+
+        # convert each batch to tensor, asuume that each item in batch has the same length
+        y_batch = torch.tensor(y_batch, dtype=torch.float).unsqueeze(1)  # (B, 1, T)
+        c_batch = torch.tensor(c_batch, dtype=torch.float).transpose(2, 1)  # (B, C, T')
+
+        # make input noise signal batch tensor
+        if self.use_noise_input:
+            z_batch = torch.randn(y_batch.size())  # (B, 1, T)
+            return (z_batch, c_batch), y_batch
+        else:
+            return (c_batch,), y_batch
+
+    def _adjust_length(self, x, c):
+        """Adjust the audio and feature lengths.
+
+        Note:
+            Basically we assume that the length of x and c are adjusted
+            through preprocessing stage, but if we use other library processed
+            features, this process will be needed.
+
+        """
+        # logging.info(f"{len(x)}, {len(c)}, {self.hop_size}") 
+        if len(x) > len(c) * self.hop_size:
+            x = x[(self.win_length-self.hop_size) // 2:]
+            x = x[:len(c)*self.hop_size]
+        # if len(x) < len(c) * self.hop_size:
+            # logging.info("Caught one case")
+
+        # check the legnth is valid
+        assert len(x) == len(c) * self.hop_size, f"{len(x)}, {len(c) * self.hop_size}"
+
+        return x, c
+
+
+def main():
+    """Run training process."""
+    parser = argparse.ArgumentParser(
+        description="Train Parallel WaveGAN (See detail in parallel_wavegan/bin/train.py)."
+    )
+    parser.add_argument(
+        "--train-wav-scp",
+        default=None,
+        type=str,
+        help="kaldi-style wav.scp file for training. "
+        "you need to specify either train-*-scp or train-dumpdir.",
+    )
+    parser.add_argument(
+        "--train-feats-scp",
+        default=None,
+        type=str,
+        help="kaldi-style feats.scp file for training. "
+        "you need to specify either train-*-scp or train-dumpdir.",
+    )
+    parser.add_argument(
+        "--train-segments",
+        default=None,
+        type=str,
+        help="kaldi-style segments file for training.",
+    )
+    parser.add_argument(
+        "--train-num-frames", 
+        default=None, 
+        type=str,
+        help="kaldi-style utt2num_frames file for training.",
+    )
+    parser.add_argument(
+        "--train-dumpdir",
+        default=None,
+        type=str,
+        help="directory including training data. "
+        "you need to specify either train-*-scp or train-dumpdir.",
+    )
+    parser.add_argument(
+        "--dev-wav-scp",
+        default=None,
+        type=str,
+        help="kaldi-style wav.scp file for validation. "
+        "you need to specify either dev-*-scp or dev-dumpdir.",
+    )
+    parser.add_argument(
+        "--dev-feats-scp",
+        default=None,
+        type=str,
+        help="kaldi-style feats.scp file for vaidation. "
+        "you need to specify either dev-*-scp or dev-dumpdir.",
+    )
+    parser.add_argument(
+        "--dev-segments",
+        default=None,
+        type=str,
+        help="kaldi-style segments file for validation.",
+    )
+    parser.add_argument(
+        "--dev-num-frames", 
+        default=None, 
+        type=str,
+        help="kaldi-style utt2num_frames file for validation.",
+    )
+    parser.add_argument(
+        "--dev-dumpdir",
+        default=None,
+        type=str,
+        help="directory including development data. "
+        "you need to specify either dev-*-scp or dev-dumpdir.",
+    )
+    parser.add_argument(
+        "--outdir",
+        type=str,
+        required=True,
+        help="directory to save checkpoints.",
+    )
+    parser.add_argument(
+        "--config",
+        type=str,
+        required=True,
+        help="yaml format configuration file.",
+    )
+    parser.add_argument(
+        "--pretrain",
+        default="",
+        type=str,
+        nargs="?",
+        help='checkpoint file path to load pretrained params. (default="")',
+    )
+    parser.add_argument(
+        "--resume",
+        default="",
+        type=str,
+        nargs="?",
+        help='checkpoint file path to resume training. (default="")',
+    )
+    parser.add_argument(
+        "--verbose",
+        type=int,
+        default=1,
+        help="logging level. higher is more logging. (default=1)",
+    )
+    parser.add_argument(
+        "--world-size",
+        default=1,
+        type=int,
+        help="world size for distributed training. no need to explictly specify.",
+    )
+    parser.add_argument(
+        "--rank",
+        default=0,
+        type=int,
+        help="rank for distributed training. no need to explictly specify.",
+    )
+    parser.add_argument(
+        "--distributed-init",
+        default="/tmp/init",
+        type=str,
+        help="File path for init_process_group in distributed training.",
+    )
+    parser.add_argument("--sampling-rate", type=int)
+    parser.add_argument("--hop-size", type=int)
+    parser.add_argument("--win-length", type=int)
+    args = parser.parse_args()
+
+    # init distributed training
+    if not torch.cuda.is_available():
+        device = torch.device("cpu")
+        args.distributed = False
+    else:
+        device = torch.device("cuda")
+        # effective when using fixed size inputs
+        # see https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
+        torch.backends.cudnn.benchmark = True
+        # setup for distributed training
+        # see example: https://github.com/NVIDIA/apex/tree/master/examples/simple/distributed
+        args.distributed = args.world_size > 1
+        if args.world_size == 1:
+            assert args.rank == 0
+
+    # set logger
+    if args.verbose > 1:
+        logging.basicConfig(
+            level=logging.DEBUG,
+            stream=sys.stdout,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    elif args.verbose > 0:
+        logging.basicConfig(
+            level=logging.INFO,
+            stream=sys.stdout,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+    else:
+        logging.basicConfig(
+            level=logging.WARN,
+            stream=sys.stdout,
+            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+        )
+        logging.warning("Skip DEBUG/INFO messages")
+
+    # check directory existence
+    if not os.path.exists(args.outdir):
+        os.makedirs(args.outdir)
+
+    # init process group
+    if args.distributed:
+        logging.info("Synchronizing between all workers.")
+        torch.distributed.init_process_group(backend="nccl", init_method="file://%s" % args.distributed_init, world_size=args.world_size, rank=args.rank)
+        logging.info("Finished init process group.")
+    else:
+        logging.info("Training on a single GPU.")
+
+    # check arguments
+    if (args.train_feats_scp is not None and args.train_dumpdir is not None) or (
+        args.train_feats_scp is None and args.train_dumpdir is None
+    ):
+        raise ValueError("Please specify either --train-dumpdir or --train-*-scp.")
+    if (args.dev_feats_scp is not None and args.dev_dumpdir is not None) or (
+        args.dev_feats_scp is None and args.dev_dumpdir is None
+    ):
+        raise ValueError("Please specify either --dev-dumpdir or --dev-*-scp.")
+
+    # load and save config
+    with open(args.config) as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+    config.update(vars(args))
+    config["version"] = parallel_wavegan.__version__  # add version info
+    with open(os.path.join(args.outdir, "config.yml"), "w") as f:
+        yaml.dump(config, f, Dumper=yaml.Dumper)
+    for key, value in config.items():
+        logging.info(f"{key} = {value}")
+
+    # get dataset
+    if config["remove_short_samples"]:
+        mel_length_threshold = config["batch_max_steps"] // config[
+            "hop_size"
+        ] + 2 * config["generator_params"].get("aux_context_window", 0)
+    else:
+        mel_length_threshold = None
+    if args.train_wav_scp is None or args.dev_wav_scp is None:
+        if config["format"] == "hdf5":
+            audio_query, mel_query = "*.h5", "*.h5"
+            audio_load_fn = lambda x: read_hdf5(x, "wave")  # NOQA
+            mel_load_fn = lambda x: read_hdf5(x, "feats")  # NOQA
+        elif config["format"] == "npy":
+            audio_query, mel_query = "*-wave.npy", "*-feats.npy"
+            audio_load_fn = np.load
+            mel_load_fn = np.load
+        else:
+            raise ValueError("support only hdf5 or npy format.")
+    if args.train_dumpdir is not None:
+        train_dataset = AudioMelDataset(
+            root_dir=args.train_dumpdir,
+            audio_query=audio_query,
+            mel_query=mel_query,
+            audio_load_fn=audio_load_fn,
+            mel_load_fn=mel_load_fn,
+            mel_length_threshold=mel_length_threshold,
+            allow_cache=config.get("allow_cache", False),  # keep compatibility
+        )
+    else:
+        train_dataset = AudioMelSCPDataset(
+            wav_scp=args.train_wav_scp,
+            feats_scp=args.train_feats_scp,
+            utt2num_frames=args.train_num_frames,
+            segments=args.train_segments,
+            mel_length_threshold=mel_length_threshold,
+            allow_cache=config.get("allow_cache", False),  # keep compatibility
+        )
+    logging.info(f"The number of training files = {len(train_dataset)}.")
+    if args.dev_dumpdir is not None:
+        dev_dataset = AudioMelDataset(
+            root_dir=args.dev_dumpdir,
+            audio_query=audio_query,
+            mel_query=mel_query,
+            audio_load_fn=audio_load_fn,
+            mel_load_fn=mel_load_fn,
+            mel_length_threshold=mel_length_threshold,
+            allow_cache=config.get("allow_cache", False),  # keep compatibility
+        )
+    else:
+        dev_dataset = AudioMelSCPDataset(
+            wav_scp=args.dev_wav_scp,
+            feats_scp=args.dev_feats_scp,
+            utt2num_frames=args.dev_num_frames,
+            segments=args.dev_segments,
+            mel_length_threshold=mel_length_threshold,
+            allow_cache=config.get("allow_cache", False),  # keep compatibility
+        )
+    logging.info(f"The number of development files = {len(dev_dataset)}.")
+    dataset = {
+        "train": train_dataset,
+        "dev": dev_dataset,
+    }
+
+    # get data loader
+    collater = Collater(
+        batch_max_steps=config["batch_max_steps"],
+        hop_size=config["hop_size"],
+        win_length=config["win_length"],
+        # keep compatibility
+        aux_context_window=config["generator_params"].get("aux_context_window", 0),
+        # keep compatibility
+        use_noise_input=config.get("generator_type", "ParallelWaveGANGenerator")
+        in ["ParallelWaveGANGenerator"],
+    )
+    sampler = {"train": None, "dev": None}
+    if args.distributed:
+        # setup sampler for distributed training
+        from torch.utils.data.distributed import DistributedSampler
+
+        sampler["train"] = DistributedSampler(
+            dataset=dataset["train"],
+            num_replicas=args.world_size,
+            rank=args.rank,
+            shuffle=True,
+        )
+        sampler["dev"] = DistributedSampler(
+            dataset=dataset["dev"],
+            num_replicas=args.world_size,
+            rank=args.rank,
+            shuffle=False,
+        )
+    data_loader = {
+        "train": DataLoader(
+            dataset=dataset["train"],
+            shuffle=False if args.distributed else True,
+            collate_fn=collater,
+            batch_size=config["batch_size"] // args.world_size,
+            num_workers=config["num_workers"],
+            sampler=sampler["train"],
+            pin_memory=config["pin_memory"],
+        ),
+        "dev": DataLoader(
+            dataset=dataset["dev"],
+            shuffle=False if args.distributed else True,
+            collate_fn=collater,
+            batch_size=config["batch_size"] // args.world_size,
+            num_workers=config["num_workers"],
+            sampler=sampler["dev"],
+            pin_memory=config["pin_memory"],
+        ),
+    }
+
+    # define models
+    generator_class = getattr(
+        parallel_wavegan.models,
+        # keep compatibility
+        config.get("generator_type", "ParallelWaveGANGenerator"),
+    )
+    discriminator_class = getattr(
+        parallel_wavegan.models,
+        # keep compatibility
+        config.get("discriminator_type", "ParallelWaveGANDiscriminator"),
+    )
+    model = {
+        "generator": generator_class(
+            **config["generator_params"],
+        ).to(device),
+        "discriminator": discriminator_class(
+            **config["discriminator_params"],
+        ).to(device),
+    }
+
+    # define criterions
+    criterion = {
+        "gen_adv": GeneratorAdversarialLoss(
+            # keep compatibility
+            **config.get("generator_adv_loss_params", {})
+        ).to(device),
+        "dis_adv": DiscriminatorAdversarialLoss(
+            # keep compatibility
+            **config.get("discriminator_adv_loss_params", {})
+        ).to(device),
+    }
+    if config.get("use_stft_loss", True):  # keep compatibility
+        config["use_stft_loss"] = True
+        criterion["stft"] = MultiResolutionSTFTLoss(
+            **config["stft_loss_params"],
+        ).to(device)
+    if config.get("use_subband_stft_loss", False):  # keep compatibility
+        assert config["generator_params"]["out_channels"] > 1
+        criterion["sub_stft"] = MultiResolutionSTFTLoss(
+            **config["subband_stft_loss_params"],
+        ).to(device)
+    else:
+        config["use_subband_stft_loss"] = False
+    if config.get("use_feat_match_loss", False):  # keep compatibility
+        criterion["feat_match"] = FeatureMatchLoss(
+            # keep compatibility
+            **config.get("feat_match_loss_params", {}),
+        ).to(device)
+    else:
+        config["use_feat_match_loss"] = False
+    if config.get("use_mel_loss", False):  # keep compatibility
+        if config.get("mel_loss_params", None) is None:
+            criterion["mel"] = MelSpectrogramLoss(
+                fs=config["sampling_rate"],
+                fft_size=config["fft_size"],
+                hop_size=config["hop_size"],
+                win_length=config["win_length"],
+                window=config["window"],
+                num_mels=config["num_mels"],
+                fmin=config["fmin"],
+                fmax=config["fmax"],
+            ).to(device)
+        else:
+            criterion["mel"] = MelSpectrogramLoss(
+                **config["mel_loss_params"],
+            ).to(device)
+    else:
+        config["use_mel_loss"] = False
+
+    # define special module for subband processing
+    if config["generator_params"]["out_channels"] > 1:
+        criterion["pqmf"] = PQMF(
+            subbands=config["generator_params"]["out_channels"],
+            # keep compatibility
+            **config.get("pqmf_params", {}),
+        ).to(device)
+
+    # define optimizers and schedulers
+    generator_optimizer_class = getattr(
+        parallel_wavegan.optimizers,
+        # keep compatibility
+        config.get("generator_optimizer_type", "RAdam"),
+    )
+    discriminator_optimizer_class = getattr(
+        parallel_wavegan.optimizers,
+        # keep compatibility
+        config.get("discriminator_optimizer_type", "RAdam"),
+    )
+    optimizer = {
+        "generator": generator_optimizer_class(
+            model["generator"].parameters(),
+            **config["generator_optimizer_params"],
+        ),
+        "discriminator": discriminator_optimizer_class(
+            model["discriminator"].parameters(),
+            **config["discriminator_optimizer_params"],
+        ),
+    }
+    generator_scheduler_class = getattr(
+        torch.optim.lr_scheduler,
+        # keep compatibility
+        config.get("generator_scheduler_type", "StepLR"),
+    )
+    discriminator_scheduler_class = getattr(
+        torch.optim.lr_scheduler,
+        # keep compatibility
+        config.get("discriminator_scheduler_type", "StepLR"),
+    )
+    scheduler = {
+        "generator": generator_scheduler_class(
+            optimizer=optimizer["generator"],
+            **config["generator_scheduler_params"],
+        ),
+        "discriminator": discriminator_scheduler_class(
+            optimizer=optimizer["discriminator"],
+            **config["discriminator_scheduler_params"],
+        ),
+    }
+    if args.distributed:
+        # wrap model for distributed training
+        try:
+            from apex.parallel import DistributedDataParallel
+        except ImportError:
+            raise ImportError(
+                "apex is not installed. please check https://github.com/NVIDIA/apex."
+            )
+        model["generator"] = DistributedDataParallel(model["generator"])
+        model["discriminator"] = DistributedDataParallel(model["discriminator"])
+
+    # show settings
+    logging.info(model["generator"])
+    logging.info(model["discriminator"])
+    logging.info(optimizer["generator"])
+    logging.info(optimizer["discriminator"])
+
+    # define trainer
+    trainer = Trainer(
+        steps=0,
+        epochs=0,
+        data_loader=data_loader,
+        sampler=sampler,
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+        scheduler=scheduler,
+        config=config,
+        device=device,
+    )
+
+    # load pretrained parameters from checkpoint
+    if len(args.pretrain) != 0:
+        trainer.load_checkpoint(args.pretrain, load_only_params=True)
+        logging.info(f"Successfully load parameters from {args.pretrain}.")
+
+    # resume from checkpoint
+    if len(args.resume) != 0:
+        trainer.load_checkpoint(args.resume)
+        logging.info(f"Successfully resumed from {args.resume}.")
+
+    # run training loop
+    try:
+        trainer.run()
+    finally:
+        trainer.save_checkpoint(
+            os.path.join(config["outdir"], f"checkpoint-{trainer.steps}steps.pkl")
+        )
+        logging.info(f"Successfully saved checkpoint @ {trainer.steps}steps.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hifigan/parallel_wavegan/datasets/__init__.py b/hifigan/parallel_wavegan/datasets/__init__.py
new file mode 100644
index 0000000..e3f7a99
--- /dev/null
+++ b/hifigan/parallel_wavegan/datasets/__init__.py
@@ -0,0 +1,2 @@
+from .audio_mel_dataset import *  # NOQA
+from .scp_dataset import *  # NOQA
diff --git a/hifigan/parallel_wavegan/datasets/audio_mel_dataset.py b/hifigan/parallel_wavegan/datasets/audio_mel_dataset.py
new file mode 100644
index 0000000..ccda58b
--- /dev/null
+++ b/hifigan/parallel_wavegan/datasets/audio_mel_dataset.py
@@ -0,0 +1,343 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Dataset modules."""
+
+import logging
+import os
+
+from multiprocessing import Manager
+
+import numpy as np
+
+from torch.utils.data import Dataset
+
+from parallel_wavegan.utils import find_files
+from parallel_wavegan.utils import read_hdf5
+
+
+class AudioMelDataset(Dataset):
+    """PyTorch compatible audio and mel dataset."""
+
+    def __init__(
+        self,
+        root_dir,
+        audio_query="*.h5",
+        mel_query="*.h5",
+        audio_load_fn=lambda x: read_hdf5(x, "wave"),
+        mel_load_fn=lambda x: read_hdf5(x, "feats"),
+        audio_length_threshold=None,
+        mel_length_threshold=None,
+        return_utt_id=False,
+        allow_cache=False,
+    ):
+        """Initialize dataset.
+
+        Args:
+            root_dir (str): Root directory including dumped files.
+            audio_query (str): Query to find audio files in root_dir.
+            mel_query (str): Query to find feature files in root_dir.
+            audio_load_fn (func): Function to load audio file.
+            mel_load_fn (func): Function to load feature file.
+            audio_length_threshold (int): Threshold to remove short audio files.
+            mel_length_threshold (int): Threshold to remove short feature files.
+            return_utt_id (bool): Whether to return the utterance id with arrays.
+            allow_cache (bool): Whether to allow cache of the loaded files.
+
+        """
+        # find all of audio and mel files
+        audio_files = sorted(find_files(root_dir, audio_query))
+        mel_files = sorted(find_files(root_dir, mel_query))
+
+        # filter by threshold
+        if audio_length_threshold is not None:
+            audio_lengths = [audio_load_fn(f).shape[0] for f in audio_files]
+            idxs = [
+                idx
+                for idx in range(len(audio_files))
+                if audio_lengths[idx] > audio_length_threshold
+            ]
+            if len(audio_files) != len(idxs):
+                logging.warning(
+                    f"Some files are filtered by audio length threshold "
+                    f"({len(audio_files)} -> {len(idxs)})."
+                )
+            audio_files = [audio_files[idx] for idx in idxs]
+            mel_files = [mel_files[idx] for idx in idxs]
+        if mel_length_threshold is not None:
+            mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files]
+            idxs = [
+                idx
+                for idx in range(len(mel_files))
+                if mel_lengths[idx] > mel_length_threshold
+            ]
+            if len(mel_files) != len(idxs):
+                logging.warning(
+                    f"Some files are filtered by mel length threshold "
+                    f"({len(mel_files)} -> {len(idxs)})."
+                )
+            audio_files = [audio_files[idx] for idx in idxs]
+            mel_files = [mel_files[idx] for idx in idxs]
+
+        # assert the number of files
+        assert len(audio_files) != 0, f"Not found any audio files in ${root_dir}."
+        assert len(audio_files) == len(
+            mel_files
+        ), f"Number of audio and mel files are different ({len(audio_files)} vs {len(mel_files)})."
+
+        self.audio_files = audio_files
+        self.audio_load_fn = audio_load_fn
+        self.mel_load_fn = mel_load_fn
+        self.mel_files = mel_files
+        if ".npy" in audio_query:
+            self.utt_ids = [
+                os.path.basename(f).replace("-wave.npy", "") for f in audio_files
+            ]
+        else:
+            self.utt_ids = [
+                os.path.splitext(os.path.basename(f))[0] for f in audio_files
+            ]
+        self.return_utt_id = return_utt_id
+        self.allow_cache = allow_cache
+        if allow_cache:
+            # NOTE(kan-bayashi): Manager is need to share memory in dataloader with num_workers > 0
+            self.manager = Manager()
+            self.caches = self.manager.list()
+            self.caches += [() for _ in range(len(audio_files))]
+
+    def __getitem__(self, idx):
+        """Get specified idx items.
+
+        Args:
+            idx (int): Index of the item.
+
+        Returns:
+            str: Utterance id (only in return_utt_id = True).
+            ndarray: Audio signal (T,).
+            ndarray: Feature (T', C).
+
+        """
+        if self.allow_cache and len(self.caches[idx]) != 0:
+            return self.caches[idx]
+
+        utt_id = self.utt_ids[idx]
+        audio = self.audio_load_fn(self.audio_files[idx])
+        mel = self.mel_load_fn(self.mel_files[idx])
+
+        if self.return_utt_id:
+            items = utt_id, audio, mel
+        else:
+            items = audio, mel
+
+        if self.allow_cache:
+            self.caches[idx] = items
+
+        return items
+
+    def __len__(self):
+        """Return dataset length.
+
+        Returns:
+            int: The length of dataset.
+
+        """
+        return len(self.audio_files)
+
+
+class AudioDataset(Dataset):
+    """PyTorch compatible audio dataset."""
+
+    def __init__(
+        self,
+        root_dir,
+        audio_query="*-wave.npy",
+        audio_length_threshold=None,
+        audio_load_fn=np.load,
+        return_utt_id=False,
+        allow_cache=False,
+    ):
+        """Initialize dataset.
+
+        Args:
+            root_dir (str): Root directory including dumped files.
+            audio_query (str): Query to find audio files in root_dir.
+            audio_load_fn (func): Function to load audio file.
+            audio_length_threshold (int): Threshold to remove short audio files.
+            return_utt_id (bool): Whether to return the utterance id with arrays.
+            allow_cache (bool): Whether to allow cache of the loaded files.
+
+        """
+        # find all of audio and mel files
+        audio_files = sorted(find_files(root_dir, audio_query))
+
+        # filter by threshold
+        if audio_length_threshold is not None:
+            audio_lengths = [audio_load_fn(f).shape[0] for f in audio_files]
+            idxs = [
+                idx
+                for idx in range(len(audio_files))
+                if audio_lengths[idx] > audio_length_threshold
+            ]
+            if len(audio_files) != len(idxs):
+                logging.waning(
+                    f"some files are filtered by audio length threshold "
+                    f"({len(audio_files)} -> {len(idxs)})."
+                )
+            audio_files = [audio_files[idx] for idx in idxs]
+
+        # assert the number of files
+        assert len(audio_files) != 0, f"Not found any audio files in ${root_dir}."
+
+        self.audio_files = audio_files
+        self.audio_load_fn = audio_load_fn
+        self.return_utt_id = return_utt_id
+        if ".npy" in audio_query:
+            self.utt_ids = [
+                os.path.basename(f).replace("-wave.npy", "") for f in audio_files
+            ]
+        else:
+            self.utt_ids = [
+                os.path.splitext(os.path.basename(f))[0] for f in audio_files
+            ]
+        self.allow_cache = allow_cache
+        if allow_cache:
+            # NOTE(kan-bayashi): Manager is need to share memory in dataloader with num_workers > 0
+            self.manager = Manager()
+            self.caches = self.manager.list()
+            self.caches += [() for _ in range(len(audio_files))]
+
+    def __getitem__(self, idx):
+        """Get specified idx items.
+
+        Args:
+            idx (int): Index of the item.
+
+        Returns:
+            str: Utterance id (only in return_utt_id = True).
+            ndarray: Audio (T,).
+
+        """
+        if self.allow_cache and len(self.caches[idx]) != 0:
+            return self.caches[idx]
+
+        utt_id = self.utt_ids[idx]
+        audio = self.audio_load_fn(self.audio_files[idx])
+
+        if self.return_utt_id:
+            items = utt_id, audio
+        else:
+            items = audio
+
+        if self.allow_cache:
+            self.caches[idx] = items
+
+        return items
+
+    def __len__(self):
+        """Return dataset length.
+
+        Returns:
+            int: The length of dataset.
+
+        """
+        return len(self.audio_files)
+
+
+class MelDataset(Dataset):
+    """PyTorch compatible mel dataset."""
+
+    def __init__(
+        self,
+        root_dir,
+        mel_query="*-feats.npy",
+        mel_length_threshold=None,
+        mel_load_fn=np.load,
+        return_utt_id=False,
+        allow_cache=False,
+    ):
+        """Initialize dataset.
+
+        Args:
+            root_dir (str): Root directory including dumped files.
+            mel_query (str): Query to find feature files in root_dir.
+            mel_load_fn (func): Function to load feature file.
+            mel_length_threshold (int): Threshold to remove short feature files.
+            return_utt_id (bool): Whether to return the utterance id with arrays.
+            allow_cache (bool): Whether to allow cache of the loaded files.
+
+        """
+        # find all of the mel files
+        mel_files = sorted(find_files(root_dir, mel_query))
+
+        # filter by threshold
+        if mel_length_threshold is not None:
+            mel_lengths = [mel_load_fn(f).shape[0] for f in mel_files]
+            idxs = [
+                idx
+                for idx in range(len(mel_files))
+                if mel_lengths[idx] > mel_length_threshold
+            ]
+            if len(mel_files) != len(idxs):
+                logging.warning(
+                    f"Some files are filtered by mel length threshold "
+                    f"({len(mel_files)} -> {len(idxs)})."
+                )
+            mel_files = [mel_files[idx] for idx in idxs]
+
+        # assert the number of files
+        assert len(mel_files) != 0, f"Not found any mel files in ${root_dir}."
+
+        self.mel_files = mel_files
+        self.mel_load_fn = mel_load_fn
+        self.utt_ids = [os.path.splitext(os.path.basename(f))[0] for f in mel_files]
+        if ".npy" in mel_query:
+            self.utt_ids = [
+                os.path.basename(f).replace("-feats.npy", "") for f in mel_files
+            ]
+        else:
+            self.utt_ids = [os.path.splitext(os.path.basename(f))[0] for f in mel_files]
+        self.return_utt_id = return_utt_id
+        self.allow_cache = allow_cache
+        if allow_cache:
+            # NOTE(kan-bayashi): Manager is need to share memory in dataloader with num_workers > 0
+            self.manager = Manager()
+            self.caches = self.manager.list()
+            self.caches += [() for _ in range(len(mel_files))]
+
+    def __getitem__(self, idx):
+        """Get specified idx items.
+
+        Args:
+            idx (int): Index of the item.
+
+        Returns:
+            str: Utterance id (only in return_utt_id = True).
+            ndarray: Feature (T', C).
+
+        """
+        if self.allow_cache and len(self.caches[idx]) != 0:
+            return self.caches[idx]
+
+        utt_id = self.utt_ids[idx]
+        mel = self.mel_load_fn(self.mel_files[idx])
+
+        if self.return_utt_id:
+            items = utt_id, mel
+        else:
+            items = mel
+
+        if self.allow_cache:
+            self.caches[idx] = items
+
+        return items
+
+    def __len__(self):
+        """Return dataset length.
+
+        Returns:
+            int: The length of dataset.
+
+        """
+        return len(self.mel_files)
diff --git a/hifigan/parallel_wavegan/datasets/scp_dataset.py b/hifigan/parallel_wavegan/datasets/scp_dataset.py
new file mode 100644
index 0000000..baffffb
--- /dev/null
+++ b/hifigan/parallel_wavegan/datasets/scp_dataset.py
@@ -0,0 +1,368 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Dataset modules based on kaldi-style scp files."""
+
+import logging
+
+from multiprocessing import Manager
+
+import kaldiio
+import numpy as np
+
+from torch.utils.data import Dataset
+
+from parallel_wavegan.utils import HDF5ScpLoader
+from parallel_wavegan.utils import NpyScpLoader
+
+
+def _get_feats_scp_loader(feats_scp):
+    # read the first line of feats.scp file
+    with open(feats_scp) as f:
+        key, value = f.readlines()[0].replace("\n", "").split()
+
+    # check scp type
+    if ":" in value:
+        value_1, value_2 = value.split(":")
+        if value_1.endswith(".ark"):
+            # kaldi-ark case: utt_id_1 /path/to/utt_id_1.ark:index
+            return kaldiio.load_scp(feats_scp)
+        elif value_1.endswith(".h5"):
+            # hdf5 case with path in hdf5: utt_id_1 /path/to/utt_id_1.h5:feats
+            return HDF5ScpLoader(feats_scp)
+        else:
+            raise ValueError("Not supported feats.scp type.")
+    else:
+        if value.endswith(".h5"):
+            # hdf5 case without path in hdf5: utt_id_1 /path/to/utt_id_1.h5
+            return HDF5ScpLoader(feats_scp)
+        elif value.endswith(".npy"):
+            # npy case: utt_id_1 /path/to/utt_id_1.npy
+            return NpyScpLoader(feats_scp)
+        else:
+            raise ValueError("Not supported feats.scp type.")
+
+
+class AudioMelSCPDataset(Dataset):
+    """PyTorch compatible audio and mel dataset based on kaldi-stype scp files."""
+
+    def __init__(
+        self,
+        wav_scp,
+        feats_scp,
+        utt2num_frames=None,
+        segments=None,
+        audio_length_threshold=None,
+        mel_length_threshold=None,
+        return_utt_id=False,
+        return_sampling_rate=False,
+        allow_cache=False,
+    ):
+        """Initialize dataset.
+
+        Args:
+            wav_scp (str): Kaldi-style wav.scp file.
+            feats_scp (str): Kaldi-style fests.scp file.
+            segments (str): Kaldi-style segments file.
+            audio_length_threshold (int): Threshold to remove short audio files.
+            mel_length_threshold (int): Threshold to remove short feature files.
+            return_utt_id (bool): Whether to return utterance id.
+            return_sampling_rate (bool): Wheter to return sampling rate.
+            allow_cache (bool): Whether to allow cache of the loaded files.
+
+        """
+        # load scp as lazy dict
+        audio_loader = kaldiio.load_scp(wav_scp, segments=segments)
+        mel_loader = _get_feats_scp_loader(feats_scp)
+        audio_keys = list(audio_loader.keys())
+        mel_keys = list(mel_loader.keys())
+
+        utt2num_frames_loader = None
+        if utt2num_frames is not None:
+            with open(utt2num_frames, 'r') as f:
+                utt2num_frames_loader = dict([x.split() for x in f.readlines()])
+
+        # filter by threshold
+        if audio_length_threshold is not None:
+            audio_lengths = [audio.shape[0] for _, audio in audio_loader.values()]
+            idxs = [
+                idx
+                for idx in range(len(audio_keys))
+                if audio_lengths[idx] > audio_length_threshold
+            ]
+            if len(audio_keys) != len(idxs):
+                logging.warning(
+                    f"Some files are filtered by audio length threshold "
+                    f"({len(audio_keys)} -> {len(idxs)})."
+                )
+            audio_keys = [audio_keys[idx] for idx in idxs]
+            mel_keys = audio_keys
+        if mel_length_threshold is not None:
+            if utt2num_frames_loader is None:
+                mel_lengths = [mel.shape[0] for mel in mel_loader.values()]
+            else:
+                mel_lengths = [int(utt2num_frames_loader[key]) for key in mel_keys]
+            idxs = [
+                idx
+                for idx in range(len(mel_keys))
+                if mel_lengths[idx] > mel_length_threshold
+            ]
+            if len(mel_keys) != len(idxs):
+                logging.warning(
+                    f"Some files are filtered by mel length threshold "
+                    f"({len(mel_keys)} -> {len(idxs)})."
+                )
+            mel_keys = [mel_keys[idx] for idx in idxs]
+            audio_keys = mel_keys
+
+        self.audio_loader = audio_loader
+        self.mel_loader = mel_loader
+        self.utt_ids = audio_keys
+        self.return_utt_id = return_utt_id
+        self.return_sampling_rate = return_sampling_rate
+        self.allow_cache = allow_cache
+
+        if allow_cache:
+            # NOTE(kan-bayashi): Manager is need to share memory in dataloader with num_workers > 0
+            self.manager = Manager()
+            self.caches = self.manager.list()
+            self.caches += [() for _ in range(len(self.utt_ids))]
+
+    def __getitem__(self, idx):
+        """Get specified idx items.
+
+        Args:
+            idx (int): Index of the item.
+
+        Returns:
+            str: Utterance id (only in return_utt_id = True).
+            ndarray or tuple: Audio signal (T,) or (w/ sampling rate if return_sampling_rate = True).
+            ndarray: Feature (T', C).
+
+        """
+        if self.allow_cache and len(self.caches[idx]) != 0:
+            return self.caches[idx]
+
+        utt_id = self.utt_ids[idx]
+        fs, audio = self.audio_loader[utt_id]
+        mel = self.mel_loader[utt_id]
+
+        # normalize audio signal to be [-1, 1]
+        audio = audio.astype(np.float32)
+        audio /= 1 << (16 - 1)  # assume that wav is PCM 16 bit
+
+        if self.return_sampling_rate:
+            audio = (audio, fs)
+
+        if self.return_utt_id:
+            items = utt_id, audio, mel
+        else:
+            items = audio, mel
+
+        if self.allow_cache:
+            self.caches[idx] = items
+
+        return items
+
+    def __len__(self):
+        """Return dataset length.
+
+        Returns:
+            int: The length of dataset.
+
+        """
+        return len(self.utt_ids)
+
+
+class AudioSCPDataset(Dataset):
+    """PyTorch compatible audio dataset based on kaldi-stype scp files."""
+
+    def __init__(
+        self,
+        wav_scp,
+        segments=None,
+        audio_length_threshold=None,
+        return_utt_id=False,
+        return_sampling_rate=False,
+        allow_cache=False,
+    ):
+        """Initialize dataset.
+
+        Args:
+            wav_scp (str): Kaldi-style wav.scp file.
+            segments (str): Kaldi-style segments file.
+            audio_length_threshold (int): Threshold to remove short audio files.
+            return_utt_id (bool): Whether to return utterance id.
+            return_sampling_rate (bool): Wheter to return sampling rate.
+            allow_cache (bool): Whether to allow cache of the loaded files.
+
+        """
+        # load scp as lazy dict
+        audio_loader = kaldiio.load_scp(wav_scp, segments=segments)
+        audio_keys = list(audio_loader.keys())
+
+        # filter by threshold
+        if audio_length_threshold is not None:
+            audio_lengths = [audio.shape[0] for _, audio in audio_loader.values()]
+            idxs = [
+                idx
+                for idx in range(len(audio_keys))
+                if audio_lengths[idx] > audio_length_threshold
+            ]
+            if len(audio_keys) != len(idxs):
+                logging.warning(
+                    f"Some files are filtered by audio length threshold "
+                    f"({len(audio_keys)} -> {len(idxs)})."
+                )
+            audio_keys = [audio_keys[idx] for idx in idxs]
+
+        self.audio_loader = audio_loader
+        self.utt_ids = audio_keys
+        self.return_utt_id = return_utt_id
+        self.return_sampling_rate = return_sampling_rate
+        self.allow_cache = allow_cache
+
+        if allow_cache:
+            # NOTE(kan-bayashi): Manager is need to share memory in dataloader with num_workers > 0
+            self.manager = Manager()
+            self.caches = self.manager.list()
+            self.caches += [() for _ in range(len(self.utt_ids))]
+
+    def __getitem__(self, idx):
+        """Get specified idx items.
+
+        Args:
+            idx (int): Index of the item.
+
+        Returns:
+            str: Utterance id (only in return_utt_id = True).
+            ndarray or tuple: Audio signal (T,) or (w/ sampling rate if return_sampling_rate = True).
+
+        """
+        if self.allow_cache and len(self.caches[idx]) != 0:
+            return self.caches[idx]
+
+        utt_id = self.utt_ids[idx]
+        fs, audio = self.audio_loader[utt_id]
+
+        # normalize audio signal to be [-1, 1]
+        audio = audio.astype(np.float32)
+        audio /= 1 << (16 - 1)  # assume that wav is PCM 16 bit
+
+        if self.return_sampling_rate:
+            audio = (audio, fs)
+
+        if self.return_utt_id:
+            items = utt_id, audio
+        else:
+            items = audio
+
+        if self.allow_cache:
+            self.caches[idx] = items
+
+        return items
+
+    def __len__(self):
+        """Return dataset length.
+
+        Returns:
+            int: The length of dataset.
+
+        """
+        return len(self.utt_ids)
+
+
+class MelSCPDataset(Dataset):
+    """PyTorch compatible mel dataset based on kaldi-stype scp files."""
+
+    def __init__(
+        self,
+        feats_scp,
+        utt2num_frames=None,
+        mel_length_threshold=None,
+        return_utt_id=False,
+        allow_cache=False,
+    ):
+        """Initialize dataset.
+
+        Args:
+            feats_scp (str): Kaldi-style fests.scp file.
+            mel_length_threshold (int): Threshold to remove short feature files.
+            return_utt_id (bool): Whether to return utterance id.
+            allow_cache (bool): Whether to allow cache of the loaded files.
+
+        """
+        # load scp as lazy dict
+        mel_loader = _get_feats_scp_loader(feats_scp)
+        mel_keys = list(mel_loader.keys())
+
+        utt2num_frames_loader = None
+        if utt2num_frames is not None:
+            with open(utt2num_frames, 'r') as f:
+                utt2num_frames_loader = dict([(x.split()[0], int(x.split()[1])) for x in f.readlines()])
+        else:
+            utt2num_frames_loader = dict([(k, mel.shape[0]) for k, mel in mel_loader.items()])
+
+        # filter by threshold
+        if mel_length_threshold is not None:
+            mel_lengths = [utt2num_frames_loader[key] for key in mel_keys]
+            idxs = [
+                idx
+                for idx in range(len(mel_keys))
+                if mel_lengths[idx] > mel_length_threshold
+            ]
+            if len(mel_keys) != len(idxs):
+                logging.warning(
+                    f"Some files are filtered by mel length threshold "
+                    f"({len(mel_keys)} -> {len(idxs)})."
+                )
+            mel_keys = [mel_keys[idx] for idx in idxs]
+
+        self.mel_loader = mel_loader
+        self.utt_ids = mel_keys
+        self.return_utt_id = return_utt_id
+        self.allow_cache = allow_cache
+
+        if allow_cache:
+            # NOTE(kan-bayashi): Manager is need to share memory in dataloader with num_workers > 0
+            self.manager = Manager()
+            self.caches = self.manager.list()
+            self.caches += [() for _ in range(len(self.utt_ids))]
+
+    def __getitem__(self, idx):
+        """Get specified idx items.
+
+        Args:
+            idx (int): Index of the item.
+
+        Returns:
+            str: Utterance id (only in return_utt_id = True).
+            ndarray: Feature (T', C).
+
+        """
+        if self.allow_cache and len(self.caches[idx]) != 0:
+            return self.caches[idx]
+
+        utt_id = self.utt_ids[idx]
+        mel = self.mel_loader[utt_id]
+
+        if self.return_utt_id:
+            items = utt_id, mel
+        else:
+            items = mel
+
+        if self.allow_cache:
+            self.caches[idx] = items
+
+        return items
+
+    def __len__(self):
+        """Return dataset length.
+
+        Returns:
+            int: The length of dataset.
+
+        """
+        return len(self.utt_ids)
diff --git a/hifigan/parallel_wavegan/distributed/__init__.py b/hifigan/parallel_wavegan/distributed/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/hifigan/parallel_wavegan/distributed/launch.py b/hifigan/parallel_wavegan/distributed/launch.py
new file mode 100644
index 0000000..292f2a9
--- /dev/null
+++ b/hifigan/parallel_wavegan/distributed/launch.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""Distributed process launcher.
+
+This code is modified from https://github.com/pytorch/pytorch/blob/v1.3.0/torch/distributed/launch.py.
+
+"""
+import os
+import subprocess
+import sys
+
+from argparse import ArgumentParser
+from argparse import REMAINDER
+
+
+def parse_args():
+    """Parse arguments."""
+    parser = ArgumentParser(
+        description="PyTorch distributed training launch "
+        "helper utilty that will spawn up "
+        "multiple distributed processes"
+    )
+
+    # Optional arguments for the launch helper
+    parser.add_argument(
+        "--nnodes",
+        type=int,
+        default=1,
+        help="The number of nodes to use for distributed " "training",
+    )
+    parser.add_argument(
+        "--node_rank",
+        type=int,
+        default=0,
+        help="The rank of the node for multi-node distributed " "training",
+    )
+    parser.add_argument(
+        "--nproc_per_node",
+        type=int,
+        default=1,
+        help="The number of processes to launch on each node, "
+        "for GPU training, this is recommended to be set "
+        "to the number of GPUs in your system so that "
+        "each process can be bound to a single GPU.",
+    )
+    parser.add_argument(
+        "--master_addr",
+        default="127.0.0.1",
+        type=str,
+        help="Master node (rank 0)'s address, should be either "
+        "the IP address or the hostname of node 0, for "
+        "single node multi-proc training, the "
+        "--master_addr can simply be 127.0.0.1",
+    )
+    parser.add_argument(
+        "--master_port",
+        default=29500,
+        type=int,
+        help="Master node (rank 0)'s free port that needs to "
+        "be used for communciation during distributed "
+        "training",
+    )
+    parser.add_argument(
+        "--use_env",
+        default=False,
+        action="store_true",
+        help="Use environment variable to pass "
+        "'local rank'. For legacy reasons, the default value is False. "
+        "If set to True, the script will not pass "
+        "--local_rank as argument, and will instead set LOCAL_RANK.",
+    )
+    parser.add_argument(
+        "-m",
+        "--module",
+        default=False,
+        action="store_true",
+        help="Changes each process to interpret the launch script "
+        "as a python module, executing with the same behavior as"
+        "'python -m'.",
+    )
+    parser.add_argument(
+        "-c",
+        "--command",
+        default=False,
+        action="store_true",
+        help="Changes each process to interpret the launch script " "as a command.",
+    )
+
+    # positional
+    parser.add_argument(
+        "training_script",
+        type=str,
+        help="The full path to the single GPU training "
+        "program/script/command to be launched in parallel, "
+        "followed by all the arguments for the "
+        "training script",
+    )
+
+    # rest from the training program
+    parser.add_argument("training_script_args", nargs=REMAINDER)
+    return parser.parse_args()
+
+
+def main():
+    """Launch distributed processes."""
+    args = parse_args()
+
+    # world size in terms of number of processes
+    dist_world_size = args.nproc_per_node * args.nnodes
+
+    # set PyTorch distributed related environmental variables
+    current_env = os.environ.copy()
+    current_env["MASTER_ADDR"] = args.master_addr
+    current_env["MASTER_PORT"] = str(args.master_port)
+    current_env["WORLD_SIZE"] = str(dist_world_size)
+
+    processes = []
+
+    if "OMP_NUM_THREADS" not in os.environ and args.nproc_per_node > 1:
+        current_env["OMP_NUM_THREADS"] = str(1)
+        print(
+            "*****************************************\n"
+            "Setting OMP_NUM_THREADS environment variable for each process "
+            "to be {} in default, to avoid your system being overloaded, "
+            "please further tune the variable for optimal performance in "
+            "your application as needed. \n"
+            "*****************************************".format(
+                current_env["OMP_NUM_THREADS"]
+            )
+        )
+
+    for local_rank in range(0, args.nproc_per_node):
+        # each process's rank
+        dist_rank = args.nproc_per_node * args.node_rank + local_rank
+        current_env["RANK"] = str(dist_rank)
+        current_env["LOCAL_RANK"] = str(local_rank)
+
+        # spawn the processes
+        if args.command:
+            cmd = [args.training_script]
+        else:
+            cmd = [sys.executable, "-u"]
+            if args.module:
+                cmd.append("-m")
+            cmd.append(args.training_script)
+
+        if not args.use_env:
+            cmd.append("--local_rank={}".format(local_rank))
+
+        cmd.extend(args.training_script_args)
+
+        process = subprocess.Popen(cmd, env=current_env)
+        processes.append(process)
+
+    for process in processes:
+        process.wait()
+        if process.returncode != 0:
+            raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/hifigan/parallel_wavegan/hh b/hifigan/parallel_wavegan/hh
new file mode 100644
index 0000000..530c304
--- /dev/null
+++ b/hifigan/parallel_wavegan/hh
@@ -0,0 +1,61 @@
+./losses/adversarial_loss.py
+./losses/feat_match_loss.py
+./losses/mel_loss.py
+./losses/__init__.py
+./losses/stft_loss.py
+./losses/__pycache__/feat_match_loss.cpython-36.pyc
+./losses/__pycache__/stft_loss.cpython-36.pyc
+./losses/__pycache__/mel_loss.cpython-36.pyc
+./losses/__pycache__/__init__.cpython-36.pyc
+./losses/__pycache__/adversarial_loss.cpython-36.pyc
+./models/melgan.py
+./models/parallel_wavegan.py
+./models/tf_models.py
+./models/__init__.py
+./models/hifigan.py
+./models/__pycache__/parallel_wavegan.cpython-36.pyc
+./models/__pycache__/style_melgan.cpython-36.pyc
+./models/__pycache__/melgan.cpython-36.pyc
+./models/__pycache__/__init__.cpython-36.pyc
+./models/__pycache__/hifigan.cpython-36.pyc
+./models/style_melgan.py
+./optimizers/radam.py
+./optimizers/__init__.py
+./optimizers/__pycache__/radam.cpython-36.pyc
+./optimizers/__pycache__/__init__.cpython-36.pyc
+./layers/upsample.py
+./layers/residual_block.py
+./layers/tf_layers.py
+./layers/pqmf.py
+./layers/causal_conv.py
+./layers/__init__.py
+./layers/tade_res_block.py
+./layers/residual_stack.py
+./layers/__pycache__/pqmf.cpython-36.pyc
+./layers/__pycache__/residual_block.cpython-36.pyc
+./layers/__pycache__/residual_stack.cpython-36.pyc
+./layers/__pycache__/upsample.cpython-36.pyc
+./layers/__pycache__/causal_conv.cpython-36.pyc
+./layers/__pycache__/__init__.cpython-36.pyc
+./layers/__pycache__/tade_res_block.cpython-36.pyc
+./utils/utils.py
+./utils/__init__.py
+./utils/__pycache__/utils.cpython-36.pyc
+./utils/__pycache__/__init__.cpython-36.pyc
+./bin/normalize.py
+./bin/decode.py
+./bin/preprocess.py
+./bin/compute_statistics.py
+./bin/__init__.py
+./bin/train.py
+./datasets/audio_mel_dataset.py
+./datasets/__init__.py
+./datasets/__pycache__/audio_mel_dataset.cpython-36.pyc
+./datasets/__pycache__/__init__.cpython-36.pyc
+./datasets/__pycache__/scp_dataset.cpython-36.pyc
+./datasets/scp_dataset.py
+./__init__.py
+./distributed/launch.py
+./distributed/__init__.py
+./hh
+./__pycache__/__init__.cpython-36.pyc
diff --git a/hifigan/parallel_wavegan/layers/__init__.py b/hifigan/parallel_wavegan/layers/__init__.py
new file mode 100644
index 0000000..ac0b7f1
--- /dev/null
+++ b/hifigan/parallel_wavegan/layers/__init__.py
@@ -0,0 +1,6 @@
+from .causal_conv import *  # NOQA
+from .pqmf import *  # NOQA
+from .residual_block import *  # NOQA
+from .residual_stack import *  # NOQA
+from .tade_res_block import *  # NOQA
+from .upsample import *  # NOQA
diff --git a/hifigan/parallel_wavegan/layers/causal_conv.py b/hifigan/parallel_wavegan/layers/causal_conv.py
new file mode 100644
index 0000000..abf51b8
--- /dev/null
+++ b/hifigan/parallel_wavegan/layers/causal_conv.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Causal convolusion layer modules."""
+
+
+import torch
+
+
+class CausalConv1d(torch.nn.Module):
+    """CausalConv1d module with customized initialization."""
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        dilation=1,
+        bias=True,
+        pad="ConstantPad1d",
+        pad_params={"value": 0.0},
+    ):
+        """Initialize CausalConv1d module."""
+        super(CausalConv1d, self).__init__()
+        self.pad = getattr(torch.nn, pad)((kernel_size - 1) * dilation, **pad_params)
+        self.conv = torch.nn.Conv1d(
+            in_channels, out_channels, kernel_size, dilation=dilation, bias=bias
+        )
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, in_channels, T).
+
+        Returns:
+            Tensor: Output tensor (B, out_channels, T).
+
+        """
+        return self.conv(self.pad(x))[:, :, : x.size(2)]
+
+
+class CausalConvTranspose1d(torch.nn.Module):
+    """CausalConvTranspose1d module with customized initialization."""
+
+    def __init__(self, in_channels, out_channels, kernel_size, stride, bias=True):
+        """Initialize CausalConvTranspose1d module."""
+        super(CausalConvTranspose1d, self).__init__()
+        self.deconv = torch.nn.ConvTranspose1d(
+            in_channels, out_channels, kernel_size, stride, bias=bias
+        )
+        self.stride = stride
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, in_channels, T_in).
+
+        Returns:
+            Tensor: Output tensor (B, out_channels, T_out).
+
+        """
+        return self.deconv(x)[:, :, : -self.stride]
diff --git a/hifigan/parallel_wavegan/layers/pqmf.py b/hifigan/parallel_wavegan/layers/pqmf.py
new file mode 100644
index 0000000..0bd46a3
--- /dev/null
+++ b/hifigan/parallel_wavegan/layers/pqmf.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Pseudo QMF modules."""
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from scipy.signal import kaiser
+
+
+def design_prototype_filter(taps=62, cutoff_ratio=0.142, beta=9.0):
+    """Design prototype filter for PQMF.
+
+    This method is based on `A Kaiser window approach for the design of prototype
+    filters of cosine modulated filterbanks`_.
+
+    Args:
+        taps (int): The number of filter taps.
+        cutoff_ratio (float): Cut-off frequency ratio.
+        beta (float): Beta coefficient for kaiser window.
+
+    Returns:
+        ndarray: Impluse response of prototype filter (taps + 1,).
+
+    .. _`A Kaiser window approach for the design of prototype filters of cosine modulated filterbanks`:
+        https://ieeexplore.ieee.org/abstract/document/681427
+
+    """
+    # check the arguments are valid
+    assert taps % 2 == 0, "The number of taps mush be even number."
+    assert 0.0 < cutoff_ratio < 1.0, "Cutoff ratio must be > 0.0 and < 1.0."
+
+    # make initial filter
+    omega_c = np.pi * cutoff_ratio
+    with np.errstate(invalid="ignore"):
+        h_i = np.sin(omega_c * (np.arange(taps + 1) - 0.5 * taps)) / (
+            np.pi * (np.arange(taps + 1) - 0.5 * taps)
+        )
+    h_i[taps // 2] = np.cos(0) * cutoff_ratio  # fix nan due to indeterminate form
+
+    # apply kaiser window
+    w = kaiser(taps + 1, beta)
+    h = h_i * w
+
+    return h
+
+
+class PQMF(torch.nn.Module):
+    """PQMF module.
+
+    This module is based on `Near-perfect-reconstruction pseudo-QMF banks`_.
+
+    .. _`Near-perfect-reconstruction pseudo-QMF banks`:
+        https://ieeexplore.ieee.org/document/258122
+
+    """
+
+    def __init__(self, subbands=4, taps=62, cutoff_ratio=0.142, beta=9.0):
+        """Initilize PQMF module.
+
+        The cutoff_ratio and beta parameters are optimized for #subbands = 4.
+        See dicussion in https://github.com/kan-bayashi/ParallelWaveGAN/issues/195.
+
+        Args:
+            subbands (int): The number of subbands.
+            taps (int): The number of filter taps.
+            cutoff_ratio (float): Cut-off frequency ratio.
+            beta (float): Beta coefficient for kaiser window.
+
+        """
+        super(PQMF, self).__init__()
+
+        # build analysis & synthesis filter coefficients
+        h_proto = design_prototype_filter(taps, cutoff_ratio, beta)
+        h_analysis = np.zeros((subbands, len(h_proto)))
+        h_synthesis = np.zeros((subbands, len(h_proto)))
+        for k in range(subbands):
+            h_analysis[k] = (
+                2
+                * h_proto
+                * np.cos(
+                    (2 * k + 1)
+                    * (np.pi / (2 * subbands))
+                    * (np.arange(taps + 1) - (taps / 2))
+                    + (-1) ** k * np.pi / 4
+                )
+            )
+            h_synthesis[k] = (
+                2
+                * h_proto
+                * np.cos(
+                    (2 * k + 1)
+                    * (np.pi / (2 * subbands))
+                    * (np.arange(taps + 1) - (taps / 2))
+                    - (-1) ** k * np.pi / 4
+                )
+            )
+
+        # convert to tensor
+        analysis_filter = torch.from_numpy(h_analysis).float().unsqueeze(1)
+        synthesis_filter = torch.from_numpy(h_synthesis).float().unsqueeze(0)
+
+        # register coefficients as beffer
+        self.register_buffer("analysis_filter", analysis_filter)
+        self.register_buffer("synthesis_filter", synthesis_filter)
+
+        # filter for downsampling & upsampling
+        updown_filter = torch.zeros((subbands, subbands, subbands)).float()
+        for k in range(subbands):
+            updown_filter[k, k, 0] = 1.0
+        self.register_buffer("updown_filter", updown_filter)
+        self.subbands = subbands
+
+        # keep padding info
+        self.pad_fn = torch.nn.ConstantPad1d(taps // 2, 0.0)
+
+    def analysis(self, x):
+        """Analysis with PQMF.
+
+        Args:
+            x (Tensor): Input tensor (B, 1, T).
+
+        Returns:
+            Tensor: Output tensor (B, subbands, T // subbands).
+
+        """
+        x = F.conv1d(self.pad_fn(x), self.analysis_filter)
+        return F.conv1d(x, self.updown_filter, stride=self.subbands)
+
+    def synthesis(self, x):
+        """Synthesis with PQMF.
+
+        Args:
+            x (Tensor): Input tensor (B, subbands, T // subbands).
+
+        Returns:
+            Tensor: Output tensor (B, 1, T).
+
+        """
+        # NOTE(kan-bayashi): Power will be dreased so here multipy by # subbands.
+        #   Not sure this is the correct way, it is better to check again.
+        # TODO(kan-bayashi): Understand the reconstruction procedure
+        x = F.conv_transpose1d(
+            x, self.updown_filter * self.subbands, stride=self.subbands
+        )
+        return F.conv1d(self.pad_fn(x), self.synthesis_filter)
diff --git a/hifigan/parallel_wavegan/layers/residual_block.py b/hifigan/parallel_wavegan/layers/residual_block.py
new file mode 100644
index 0000000..e0e9d6d
--- /dev/null
+++ b/hifigan/parallel_wavegan/layers/residual_block.py
@@ -0,0 +1,222 @@
+# -*- coding: utf-8 -*-
+
+"""Residual block modules.
+
+References:
+    - https://github.com/r9y9/wavenet_vocoder
+    - https://github.com/jik876/hifi-gan
+
+"""
+
+import math
+
+import torch
+import torch.nn.functional as F
+
+
+class Conv1d(torch.nn.Conv1d):
+    """Conv1d module with customized initialization."""
+
+    def __init__(self, *args, **kwargs):
+        """Initialize Conv1d module."""
+        super(Conv1d, self).__init__(*args, **kwargs)
+
+    def reset_parameters(self):
+        """Reset parameters."""
+        torch.nn.init.kaiming_normal_(self.weight, nonlinearity="relu")
+        if self.bias is not None:
+            torch.nn.init.constant_(self.bias, 0.0)
+
+
+class Conv1d1x1(Conv1d):
+    """1x1 Conv1d with customized initialization."""
+
+    def __init__(self, in_channels, out_channels, bias):
+        """Initialize 1x1 Conv1d module."""
+        super(Conv1d1x1, self).__init__(
+            in_channels, out_channels, kernel_size=1, padding=0, dilation=1, bias=bias
+        )
+
+
+class WaveNetResidualBlock(torch.nn.Module):
+    """Residual block module in WaveNet."""
+
+    def __init__(
+        self,
+        kernel_size=3,
+        residual_channels=64,
+        gate_channels=128,
+        skip_channels=64,
+        aux_channels=80,
+        dropout=0.0,
+        dilation=1,
+        bias=True,
+        use_causal_conv=False,
+    ):
+        """Initialize WaveNetResidualBlock module.
+
+        Args:
+            kernel_size (int): Kernel size of dilation convolution layer.
+            residual_channels (int): Number of channels for residual connection.
+            skip_channels (int): Number of channels for skip connection.
+            aux_channels (int): Local conditioning channels i.e. auxiliary input dimension.
+            dropout (float): Dropout probability.
+            dilation (int): Dilation factor.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            use_causal_conv (bool): Whether to use use_causal_conv or non-use_causal_conv convolution.
+
+        """
+        super().__init__()
+        self.dropout = dropout
+        # no future time stamps available
+        if use_causal_conv:
+            padding = (kernel_size - 1) * dilation
+        else:
+            assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size."
+            padding = (kernel_size - 1) // 2 * dilation
+        self.use_causal_conv = use_causal_conv
+
+        # dilation conv
+        self.conv = Conv1d(
+            residual_channels,
+            gate_channels,
+            kernel_size,
+            padding=padding,
+            dilation=dilation,
+            bias=bias,
+        )
+
+        # local conditioning
+        if aux_channels > 0:
+            self.conv1x1_aux = Conv1d1x1(aux_channels, gate_channels, bias=False)
+        else:
+            self.conv1x1_aux = None
+
+        # conv output is split into two groups
+        gate_out_channels = gate_channels // 2
+        self.conv1x1_out = Conv1d1x1(gate_out_channels, residual_channels, bias=bias)
+        self.conv1x1_skip = Conv1d1x1(gate_out_channels, skip_channels, bias=bias)
+
+    def forward(self, x, c):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, residual_channels, T).
+            c (Tensor): Local conditioning auxiliary tensor (B, aux_channels, T).
+
+        Returns:
+            Tensor: Output tensor for residual connection (B, residual_channels, T).
+            Tensor: Output tensor for skip connection (B, skip_channels, T).
+
+        """
+        residual = x
+        x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.conv(x)
+
+        # remove future time steps if use_causal_conv conv
+        x = x[:, :, : residual.size(-1)] if self.use_causal_conv else x
+
+        # split into two part for gated activation
+        splitdim = 1
+        xa, xb = x.split(x.size(splitdim) // 2, dim=splitdim)
+
+        # local conditioning
+        if c is not None:
+            assert self.conv1x1_aux is not None
+            c = self.conv1x1_aux(c)
+            ca, cb = c.split(c.size(splitdim) // 2, dim=splitdim)
+            xa, xb = xa + ca, xb + cb
+
+        x = torch.tanh(xa) * torch.sigmoid(xb)
+
+        # for skip connection
+        s = self.conv1x1_skip(x)
+
+        # for residual connection
+        x = (self.conv1x1_out(x) + residual) * math.sqrt(0.5)
+
+        return x, s
+
+
+class HiFiGANResidualBlock(torch.nn.Module):
+    """Residual block module in HiFiGAN."""
+
+    def __init__(
+        self,
+        kernel_size=3,
+        channels=512,
+        dilations=(1, 3, 5),
+        bias=True,
+        use_additional_convs=True,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.1},
+    ):
+        """Initialize HiFiGANResidualBlock module.
+
+        Args:
+            kernel_size (int): Kernel size of dilation convolution layer.
+            channels (int): Number of channels for convolution layer.
+            dilations (List[int]): List of dilation factors.
+            use_additional_convs (bool): Whether to use additional convolution layers.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+
+        """
+        super().__init__()
+        self.use_additional_convs = use_additional_convs
+        self.convs1 = torch.nn.ModuleList()
+        if use_additional_convs:
+            self.convs2 = torch.nn.ModuleList()
+        assert kernel_size % 2 == 1, "Kernel size must be odd number."
+        for dilation in dilations:
+            self.convs1 += [
+                torch.nn.Sequential(
+                    getattr(torch.nn, nonlinear_activation)(
+                        **nonlinear_activation_params
+                    ),
+                    torch.nn.Conv1d(
+                        channels,
+                        channels,
+                        kernel_size,
+                        1,
+                        dilation=dilation,
+                        bias=bias,
+                        padding=(kernel_size - 1) // 2 * dilation,
+                    ),
+                )
+            ]
+            if use_additional_convs:
+                self.convs2 += [
+                    torch.nn.Sequential(
+                        getattr(torch.nn, nonlinear_activation)(
+                            **nonlinear_activation_params
+                        ),
+                        torch.nn.Conv1d(
+                            channels,
+                            channels,
+                            kernel_size,
+                            1,
+                            dilation=1,
+                            bias=bias,
+                            padding=(kernel_size - 1) // 2,
+                        ),
+                    )
+                ]
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, channels, T).
+
+        Returns:
+            Tensor: Output tensor (B, channels, T).
+
+        """
+        for idx in range(len(self.convs1)):
+            xt = self.convs1[idx](x)
+            if self.use_additional_convs:
+                xt = self.convs2[idx](xt)
+            x = xt + x
+        return x
diff --git a/hifigan/parallel_wavegan/layers/residual_stack.py b/hifigan/parallel_wavegan/layers/residual_stack.py
new file mode 100644
index 0000000..b1d2788
--- /dev/null
+++ b/hifigan/parallel_wavegan/layers/residual_stack.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Residual stack module in MelGAN."""
+
+import torch
+
+from parallel_wavegan.layers import CausalConv1d
+
+
+class ResidualStack(torch.nn.Module):
+    """Residual stack module introduced in MelGAN."""
+
+    def __init__(
+        self,
+        kernel_size=3,
+        channels=32,
+        dilation=1,
+        bias=True,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.2},
+        pad="ReflectionPad1d",
+        pad_params={},
+        use_causal_conv=False,
+    ):
+        """Initialize ResidualStack module.
+
+        Args:
+            kernel_size (int): Kernel size of dilation convolution layer.
+            channels (int): Number of channels of convolution layers.
+            dilation (int): Dilation factor.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+            pad (str): Padding function module name before dilated convolution layer.
+            pad_params (dict): Hyperparameters for padding function.
+            use_causal_conv (bool): Whether to use causal convolution.
+
+        """
+        super(ResidualStack, self).__init__()
+
+        # defile residual stack part
+        if not use_causal_conv:
+            assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size."
+            self.stack = torch.nn.Sequential(
+                getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params),
+                getattr(torch.nn, pad)((kernel_size - 1) // 2 * dilation, **pad_params),
+                torch.nn.Conv1d(
+                    channels, channels, kernel_size, dilation=dilation, bias=bias
+                ),
+                getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params),
+                torch.nn.Conv1d(channels, channels, 1, bias=bias),
+            )
+        else:
+            self.stack = torch.nn.Sequential(
+                getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params),
+                CausalConv1d(
+                    channels,
+                    channels,
+                    kernel_size,
+                    dilation=dilation,
+                    bias=bias,
+                    pad=pad,
+                    pad_params=pad_params,
+                ),
+                getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params),
+                torch.nn.Conv1d(channels, channels, 1, bias=bias),
+            )
+
+        # defile extra layer for skip connection
+        self.skip_layer = torch.nn.Conv1d(channels, channels, 1, bias=bias)
+
+    def forward(self, c):
+        """Calculate forward propagation.
+
+        Args:
+            c (Tensor): Input tensor (B, channels, T).
+
+        Returns:
+            Tensor: Output tensor (B, chennels, T).
+
+        """
+        return self.stack(c) + self.skip_layer(c)
diff --git a/hifigan/parallel_wavegan/layers/tade_res_block.py b/hifigan/parallel_wavegan/layers/tade_res_block.py
new file mode 100644
index 0000000..bcad421
--- /dev/null
+++ b/hifigan/parallel_wavegan/layers/tade_res_block.py
@@ -0,0 +1,160 @@
+# Copyright 2021 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""StyleMelGAN's TADEResBlock Modules."""
+
+from functools import partial
+
+import torch
+
+
+class TADELayer(torch.nn.Module):
+    """TADE Layer module."""
+
+    def __init__(
+        self,
+        in_channels=64,
+        aux_channels=80,
+        kernel_size=9,
+        bias=True,
+        upsample_factor=2,
+        upsample_mode="nearest",
+    ):
+        """Initilize TADE layer."""
+        super().__init__()
+        self.norm = torch.nn.InstanceNorm1d(in_channels)
+        self.aux_conv = torch.nn.Sequential(
+            torch.nn.Conv1d(
+                aux_channels,
+                in_channels,
+                kernel_size,
+                1,
+                bias=bias,
+                padding=(kernel_size - 1) // 2,
+            ),
+            # NOTE(kan-bayashi): Use non-linear activation?
+        )
+        self.gated_conv = torch.nn.Sequential(
+            torch.nn.Conv1d(
+                in_channels,
+                in_channels * 2,
+                kernel_size,
+                1,
+                bias=bias,
+                padding=(kernel_size - 1) // 2,
+            ),
+            # NOTE(kan-bayashi): Use non-linear activation?
+        )
+        self.upsample = torch.nn.Upsample(
+            scale_factor=upsample_factor, mode=upsample_mode
+        )
+
+    def forward(self, x, c):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, in_channels, T).
+            c (Tensor): Auxiliary input tensor (B, aux_channels, T').
+
+        Returns:
+            Tensor: Output tensor (B, in_channels, T * in_upsample_factor).
+            Tensor: Upsampled aux tensor (B, in_channels, T * aux_upsample_factor).
+
+        """
+        x = self.norm(x)
+        c = self.upsample(c)
+        c = self.aux_conv(c)
+        cg = self.gated_conv(c)
+        cg1, cg2 = cg.split(cg.size(1) // 2, dim=1)
+        # NOTE(kan-bayashi): Use upsample for noise input here?
+        y = cg1 * self.upsample(x) + cg2
+        # NOTE(kan-bayashi): Return upsampled aux here?
+        return y, c
+
+
+class TADEResBlock(torch.nn.Module):
+    """TADEResBlock module."""
+
+    def __init__(
+        self,
+        in_channels=64,
+        aux_channels=80,
+        kernel_size=9,
+        dilation=2,
+        bias=True,
+        upsample_factor=2,
+        upsample_mode="nearest",
+        gated_function="softmax",
+    ):
+        """Initialize TADEResBlock module."""
+        super().__init__()
+        self.tade1 = TADELayer(
+            in_channels=in_channels,
+            aux_channels=aux_channels,
+            kernel_size=kernel_size,
+            bias=bias,
+            # NOTE(kan-bayashi): Use upsample in the first TADE layer?
+            upsample_factor=1,
+            upsample_mode=upsample_mode,
+        )
+        self.gated_conv1 = torch.nn.Conv1d(
+            in_channels,
+            in_channels * 2,
+            kernel_size,
+            1,
+            bias=bias,
+            padding=(kernel_size - 1) // 2,
+        )
+        self.tade2 = TADELayer(
+            in_channels=in_channels,
+            aux_channels=in_channels,
+            kernel_size=kernel_size,
+            bias=bias,
+            upsample_factor=upsample_factor,
+            upsample_mode=upsample_mode,
+        )
+        self.gated_conv2 = torch.nn.Conv1d(
+            in_channels,
+            in_channels * 2,
+            kernel_size,
+            1,
+            bias=bias,
+            dilation=dilation,
+            padding=(kernel_size - 1) // 2 * dilation,
+        )
+        self.upsample = torch.nn.Upsample(
+            scale_factor=upsample_factor, mode=upsample_mode
+        )
+        if gated_function == "softmax":
+            self.gated_function = partial(torch.softmax, dim=1)
+        elif gated_function == "sigmoid":
+            self.gated_function = torch.sigmoid
+        else:
+            raise ValueError(f"{gated_function} is not supported.")
+
+    def forward(self, x, c):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, in_channels, T).
+            c (Tensor): Auxiliary input tensor (B, aux_channels, T').
+
+        Returns:
+            Tensor: Output tensor (B, in_channels, T * in_upsample_factor).
+            Tensor: Upsampled auxirialy tensor (B, in_channels, T * in_upsample_factor).
+
+        """
+        residual = x
+
+        x, c = self.tade1(x, c)
+        x = self.gated_conv1(x)
+        xa, xb = x.split(x.size(1) // 2, dim=1)
+        x = self.gated_function(xa) * torch.tanh(xb)
+
+        x, c = self.tade2(x, c)
+        x = self.gated_conv2(x)
+        xa, xb = x.split(x.size(1) // 2, dim=1)
+        x = self.gated_function(xa) * torch.tanh(xb)
+
+        # NOTE(kan-bayashi): Return upsampled aux here?
+        return self.upsample(residual) + x, c
diff --git a/hifigan/parallel_wavegan/layers/tf_layers.py b/hifigan/parallel_wavegan/layers/tf_layers.py
new file mode 100644
index 0000000..e06ffc0
--- /dev/null
+++ b/hifigan/parallel_wavegan/layers/tf_layers.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 MINH ANH (@dathudeptrai)
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Tensorflow Layer modules complatible with pytorch."""
+
+import tensorflow as tf
+
+
+class TFReflectionPad1d(tf.keras.layers.Layer):
+    """Tensorflow ReflectionPad1d module."""
+
+    def __init__(self, padding_size):
+        """Initialize TFReflectionPad1d module.
+
+        Args:
+            padding_size (int): Padding size.
+
+        """
+        super(TFReflectionPad1d, self).__init__()
+        self.padding_size = padding_size
+
+    @tf.function
+    def call(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, T, 1, C).
+
+        Returns:
+            Tensor: Padded tensor (B, T + 2 * padding_size, 1, C).
+
+        """
+        return tf.pad(
+            x,
+            [[0, 0], [self.padding_size, self.padding_size], [0, 0], [0, 0]],
+            "REFLECT",
+        )
+
+
+class TFConvTranspose1d(tf.keras.layers.Layer):
+    """Tensorflow ConvTranspose1d module."""
+
+    def __init__(self, channels, kernel_size, stride, padding):
+        """Initialize TFConvTranspose1d( module.
+
+        Args:
+            channels (int): Number of channels.
+            kernel_size (int): kernel size.
+            strides (int): Stride width.
+            padding (str): Padding type ("same" or "valid").
+
+        """
+        super(TFConvTranspose1d, self).__init__()
+        self.conv1d_transpose = tf.keras.layers.Conv2DTranspose(
+            filters=channels,
+            kernel_size=(kernel_size, 1),
+            strides=(stride, 1),
+            padding=padding,
+        )
+
+    @tf.function
+    def call(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, T, 1, C).
+
+        Returns:
+            Tensors: Output tensor (B, T', 1, C').
+
+        """
+        x = self.conv1d_transpose(x)
+        return x
+
+
+class TFResidualStack(tf.keras.layers.Layer):
+    """Tensorflow ResidualStack module."""
+
+    def __init__(
+        self,
+        kernel_size,
+        channels,
+        dilation,
+        bias,
+        nonlinear_activation,
+        nonlinear_activation_params,
+        padding,
+    ):
+        """Initialize TFResidualStack module.
+
+        Args:
+            kernel_size (int): Kernel size.
+            channles (int): Number of channels.
+            dilation (int): Dilation ine.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+            padding (str): Padding type ("same" or "valid").
+
+        """
+        super(TFResidualStack, self).__init__()
+        self.block = [
+            getattr(tf.keras.layers, nonlinear_activation)(
+                **nonlinear_activation_params
+            ),
+            TFReflectionPad1d(dilation),
+            tf.keras.layers.Conv2D(
+                filters=channels,
+                kernel_size=(kernel_size, 1),
+                dilation_rate=(dilation, 1),
+                use_bias=bias,
+                padding="valid",
+            ),
+            getattr(tf.keras.layers, nonlinear_activation)(
+                **nonlinear_activation_params
+            ),
+            tf.keras.layers.Conv2D(filters=channels, kernel_size=1, use_bias=bias),
+        ]
+        self.shortcut = tf.keras.layers.Conv2D(
+            filters=channels, kernel_size=1, use_bias=bias
+        )
+
+    @tf.function
+    def call(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, T, 1, C).
+
+        Returns:
+            Tensor: Output tensor (B, T, 1, C).
+
+        """
+        _x = tf.identity(x)
+        for i, layer in enumerate(self.block):
+            _x = layer(_x)
+        shortcut = self.shortcut(x)
+        return shortcut + _x
diff --git a/hifigan/parallel_wavegan/layers/upsample.py b/hifigan/parallel_wavegan/layers/upsample.py
new file mode 100644
index 0000000..8cc9f2d
--- /dev/null
+++ b/hifigan/parallel_wavegan/layers/upsample.py
@@ -0,0 +1,194 @@
+# -*- coding: utf-8 -*-
+
+"""Upsampling module.
+
+This code is modified from https://github.com/r9y9/wavenet_vocoder.
+
+"""
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from parallel_wavegan.layers import Conv1d
+
+
+class Stretch2d(torch.nn.Module):
+    """Stretch2d module."""
+
+    def __init__(self, x_scale, y_scale, mode="nearest"):
+        """Initialize Stretch2d module.
+
+        Args:
+            x_scale (int): X scaling factor (Time axis in spectrogram).
+            y_scale (int): Y scaling factor (Frequency axis in spectrogram).
+            mode (str): Interpolation mode.
+
+        """
+        super(Stretch2d, self).__init__()
+        self.x_scale = x_scale
+        self.y_scale = y_scale
+        self.mode = mode
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, C, F, T).
+
+        Returns:
+            Tensor: Interpolated tensor (B, C, F * y_scale, T * x_scale),
+
+        """
+        return F.interpolate(
+            x, scale_factor=(self.y_scale, self.x_scale), mode=self.mode
+        )
+
+
+class Conv2d(torch.nn.Conv2d):
+    """Conv2d module with customized initialization."""
+
+    def __init__(self, *args, **kwargs):
+        """Initialize Conv2d module."""
+        super(Conv2d, self).__init__(*args, **kwargs)
+
+    def reset_parameters(self):
+        """Reset parameters."""
+        self.weight.data.fill_(1.0 / np.prod(self.kernel_size))
+        if self.bias is not None:
+            torch.nn.init.constant_(self.bias, 0.0)
+
+
+class UpsampleNetwork(torch.nn.Module):
+    """Upsampling network module."""
+
+    def __init__(
+        self,
+        upsample_scales,
+        nonlinear_activation=None,
+        nonlinear_activation_params={},
+        interpolate_mode="nearest",
+        freq_axis_kernel_size=1,
+        use_causal_conv=False,
+    ):
+        """Initialize upsampling network module.
+
+        Args:
+            upsample_scales (list): List of upsampling scales.
+            nonlinear_activation (str): Activation function name.
+            nonlinear_activation_params (dict): Arguments for specified activation function.
+            interpolate_mode (str): Interpolation mode.
+            freq_axis_kernel_size (int): Kernel size in the direction of frequency axis.
+
+        """
+        super(UpsampleNetwork, self).__init__()
+        self.use_causal_conv = use_causal_conv
+        self.up_layers = torch.nn.ModuleList()
+        for scale in upsample_scales:
+            # interpolation layer
+            stretch = Stretch2d(scale, 1, interpolate_mode)
+            self.up_layers += [stretch]
+
+            # conv layer
+            assert (
+                freq_axis_kernel_size - 1
+            ) % 2 == 0, "Not support even number freq axis kernel size."
+            freq_axis_padding = (freq_axis_kernel_size - 1) // 2
+            kernel_size = (freq_axis_kernel_size, scale * 2 + 1)
+            if use_causal_conv:
+                padding = (freq_axis_padding, scale * 2)
+            else:
+                padding = (freq_axis_padding, scale)
+            conv = Conv2d(1, 1, kernel_size=kernel_size, padding=padding, bias=False)
+            self.up_layers += [conv]
+
+            # nonlinear
+            if nonlinear_activation is not None:
+                nonlinear = getattr(torch.nn, nonlinear_activation)(
+                    **nonlinear_activation_params
+                )
+                self.up_layers += [nonlinear]
+
+    def forward(self, c):
+        """Calculate forward propagation.
+
+        Args:
+            c : Input tensor (B, C, T).
+
+        Returns:
+            Tensor: Upsampled tensor (B, C, T'), where T' = T * prod(upsample_scales).
+
+        """
+        c = c.unsqueeze(1)  # (B, 1, C, T)
+        for f in self.up_layers:
+            if self.use_causal_conv and isinstance(f, Conv2d):
+                c = f(c)[..., : c.size(-1)]
+            else:
+                c = f(c)
+        return c.squeeze(1)  # (B, C, T')
+
+
+class ConvInUpsampleNetwork(torch.nn.Module):
+    """Convolution + upsampling network module."""
+
+    def __init__(
+        self,
+        upsample_scales,
+        nonlinear_activation=None,
+        nonlinear_activation_params={},
+        interpolate_mode="nearest",
+        freq_axis_kernel_size=1,
+        aux_channels=80,
+        aux_context_window=0,
+        use_causal_conv=False,
+    ):
+        """Initialize convolution + upsampling network module.
+
+        Args:
+            upsample_scales (list): List of upsampling scales.
+            nonlinear_activation (str): Activation function name.
+            nonlinear_activation_params (dict): Arguments for specified activation function.
+            mode (str): Interpolation mode.
+            freq_axis_kernel_size (int): Kernel size in the direction of frequency axis.
+            aux_channels (int): Number of channels of pre-convolutional layer.
+            aux_context_window (int): Context window size of the pre-convolutional layer.
+            use_causal_conv (bool): Whether to use causal structure.
+
+        """
+        super(ConvInUpsampleNetwork, self).__init__()
+        self.aux_context_window = aux_context_window
+        self.use_causal_conv = use_causal_conv and aux_context_window > 0
+        # To capture wide-context information in conditional features
+        kernel_size = (
+            aux_context_window + 1 if use_causal_conv else 2 * aux_context_window + 1
+        )
+        # NOTE(kan-bayashi): Here do not use padding because the input is already padded
+        self.conv_in = Conv1d(
+            aux_channels, aux_channels, kernel_size=kernel_size, bias=False
+        )
+        self.upsample = UpsampleNetwork(
+            upsample_scales=upsample_scales,
+            nonlinear_activation=nonlinear_activation,
+            nonlinear_activation_params=nonlinear_activation_params,
+            interpolate_mode=interpolate_mode,
+            freq_axis_kernel_size=freq_axis_kernel_size,
+            use_causal_conv=use_causal_conv,
+        )
+
+    def forward(self, c):
+        """Calculate forward propagation.
+
+        Args:
+            c : Input tensor (B, C, T').
+
+        Returns:
+            Tensor: Upsampled tensor (B, C, T),
+                where T = (T' - aux_context_window * 2) * prod(upsample_scales).
+
+        Note:
+            The length of inputs considers the context window size.
+
+        """
+        c_ = self.conv_in(c)
+        c = c_[:, :, : -self.aux_context_window] if self.use_causal_conv else c_
+        return self.upsample(c)
diff --git a/hifigan/parallel_wavegan/losses/__init__.py b/hifigan/parallel_wavegan/losses/__init__.py
new file mode 100644
index 0000000..adb36e6
--- /dev/null
+++ b/hifigan/parallel_wavegan/losses/__init__.py
@@ -0,0 +1,4 @@
+from .adversarial_loss import *  # NOQA
+from .feat_match_loss import *  # NOQA
+from .mel_loss import *  # NOQA
+from .stft_loss import *  # NOQA
diff --git a/hifigan/parallel_wavegan/losses/adversarial_loss.py b/hifigan/parallel_wavegan/losses/adversarial_loss.py
new file mode 100644
index 0000000..c7624fa
--- /dev/null
+++ b/hifigan/parallel_wavegan/losses/adversarial_loss.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Adversarial loss modules."""
+
+import torch
+import torch.nn.functional as F
+
+
+class GeneratorAdversarialLoss(torch.nn.Module):
+    """Generator adversarial loss module."""
+
+    def __init__(
+        self,
+        average_by_discriminators=True,
+        loss_type="mse",
+    ):
+        """Initialize GeneratorAversarialLoss module."""
+        super().__init__()
+        self.average_by_discriminators = average_by_discriminators
+        assert loss_type in ["mse", "hinge"], f"{loss_type} is not supported."
+        if loss_type == "mse":
+            self.criterion = self._mse_loss
+        else:
+            self.criterion = self._hinge_loss
+
+    def forward(self, outputs):
+        """Calcualate generator adversarial loss.
+
+        Args:
+            outputs (Tensor or list): Discriminator outputs or list of
+                discriminator outputs.
+
+        Returns:
+            Tensor: Generator adversarial loss value.
+
+        """
+        if isinstance(outputs, (tuple, list)):
+            adv_loss = 0.0
+            for i, outputs_ in enumerate(outputs):
+                if isinstance(outputs_, (tuple, list)):
+                    # NOTE(kan-bayashi): case including feature maps
+                    outputs_ = outputs_[-1]
+                adv_loss += self.criterion(outputs_)
+            if self.average_by_discriminators:
+                adv_loss /= i + 1
+        else:
+            adv_loss = self.criterion(outputs)
+
+        return adv_loss
+
+    def _mse_loss(self, x):
+        return F.mse_loss(x, x.new_ones(x.size()))
+
+    def _hinge_loss(self, x):
+        return -x.mean()
+
+
+class DiscriminatorAdversarialLoss(torch.nn.Module):
+    """Discriminator adversarial loss module."""
+
+    def __init__(
+        self,
+        average_by_discriminators=True,
+        loss_type="mse",
+    ):
+        """Initialize DiscriminatorAversarialLoss module."""
+        super().__init__()
+        self.average_by_discriminators = average_by_discriminators
+        assert loss_type in ["mse", "hinge"], f"{loss_type} is not supported."
+        if loss_type == "mse":
+            self.fake_criterion = self._mse_fake_loss
+            self.real_criterion = self._mse_real_loss
+        else:
+            self.fake_criterion = self._hinge_fake_loss
+            self.real_criterion = self._hinge_real_loss
+
+    def forward(self, outputs_hat, outputs):
+        """Calcualate discriminator adversarial loss.
+
+        Args:
+            outputs_hat (Tensor or list): Discriminator outputs or list of
+                discriminator outputs calculated from generator outputs.
+            outputs (Tensor or list): Discriminator outputs or list of
+                discriminator outputs calculated from groundtruth.
+
+        Returns:
+            Tensor: Discriminator real loss value.
+            Tensor: Discriminator fake loss value.
+
+        """
+        if isinstance(outputs, (tuple, list)):
+            real_loss = 0.0
+            fake_loss = 0.0
+            for i, (outputs_hat_, outputs_) in enumerate(zip(outputs_hat, outputs)):
+                if isinstance(outputs_hat_, (tuple, list)):
+                    # NOTE(kan-bayashi): case including feature maps
+                    outputs_hat_ = outputs_hat_[-1]
+                    outputs_ = outputs_[-1]
+                real_loss += self.real_criterion(outputs_)
+                fake_loss += self.fake_criterion(outputs_hat_)
+            if self.average_by_discriminators:
+                fake_loss /= i + 1
+                real_loss /= i + 1
+        else:
+            real_loss = self.real_criterion(outputs)
+            fake_loss = self.fake_criterion(outputs_hat)
+
+        return real_loss, fake_loss
+
+    def _mse_real_loss(self, x):
+        return F.mse_loss(x, x.new_ones(x.size()))
+
+    def _mse_fake_loss(self, x):
+        return F.mse_loss(x, x.new_zeros(x.size()))
+
+    def _hinge_real_loss(self, x):
+        return -torch.mean(torch.min(x - 1, x.new_zeros(x.size())))
+
+    def _hinge_fake_loss(self, x):
+        return -torch.mean(torch.min(-x - 1, x.new_zeros(x.size())))
diff --git a/hifigan/parallel_wavegan/losses/feat_match_loss.py b/hifigan/parallel_wavegan/losses/feat_match_loss.py
new file mode 100644
index 0000000..9cee14d
--- /dev/null
+++ b/hifigan/parallel_wavegan/losses/feat_match_loss.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Feature matching loss modules."""
+
+import torch
+import torch.nn.functional as F
+
+
+class FeatureMatchLoss(torch.nn.Module):
+    """Feature matching loss module."""
+
+    def __init__(
+        self,
+        average_by_layers=True,
+        average_by_discriminators=True,
+        include_final_outputs=False,
+    ):
+        """Initialize FeatureMatchLoss module."""
+        super().__init__()
+        self.average_by_layers = average_by_layers
+        self.average_by_discriminators = average_by_discriminators
+        self.include_final_outputs = include_final_outputs
+
+    def forward(self, feats_hat, feats):
+        """Calcualate feature matching loss.
+
+        Args:
+            feats_hat (list): List of list of discriminator outputs
+                calcuated from generater outputs.
+            feats (list): List of list of discriminator outputs
+                calcuated from groundtruth.
+
+        Returns:
+            Tensor: Feature matching loss value.
+
+        """
+        feat_match_loss = 0.0
+        for i, (feats_hat_, feats_) in enumerate(zip(feats_hat, feats)):
+            feat_match_loss_ = 0.0
+            if not self.include_final_outputs:
+                feats_hat_ = feats_hat_[:-1]
+                feats_ = feats_[:-1]
+            for j, (feat_hat_, feat_) in enumerate(zip(feats_hat_, feats_)):
+                feat_match_loss_ += F.l1_loss(feat_hat_, feat_.detach())
+            if self.average_by_layers:
+                feat_match_loss_ /= j + 1
+            feat_match_loss += feat_match_loss_
+        if self.average_by_discriminators:
+            feat_match_loss /= i + 1
+
+        return feat_match_loss
diff --git a/hifigan/parallel_wavegan/losses/mel_loss.py b/hifigan/parallel_wavegan/losses/mel_loss.py
new file mode 100644
index 0000000..58b12bb
--- /dev/null
+++ b/hifigan/parallel_wavegan/losses/mel_loss.py
@@ -0,0 +1,166 @@
+# Copyright 2021 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Mel-spectrogram loss modules."""
+
+from distutils.version import LooseVersion
+
+import librosa
+import torch
+import torch.nn.functional as F
+
+
+is_pytorch_17plus = LooseVersion(torch.__version__) >= LooseVersion("1.7")
+
+
+class MelSpectrogram(torch.nn.Module):
+    """Calculate Mel-spectrogram."""
+
+    def __init__(
+        self,
+        fs=22050,
+        fft_size=1024,
+        hop_size=256,
+        win_length=None,
+        window="hann",
+        num_mels=80,
+        fmin=80,
+        fmax=7600,
+        center=True,
+        normalized=False,
+        onesided=True,
+        eps=1e-10,
+        log_base=10.0,
+    ):
+        """Initialize MelSpectrogram module."""
+        super().__init__()
+        self.fft_size = fft_size
+        if win_length is None:
+            self.win_length = fft_size
+        else:
+            self.win_length = win_length
+        self.hop_size = hop_size
+        self.center = center
+        self.normalized = normalized
+        self.onesided = onesided
+        if window is not None and not hasattr(torch, f"{window}_window"):
+            raise ValueError(f"{window} window is not implemented")
+        self.window = window
+        self.eps = eps
+
+        fmin = 0 if fmin is None else fmin
+        fmax = fs / 2 if fmax is None else fmax
+        melmat = librosa.filters.mel(
+            sr=fs,
+            n_fft=fft_size,
+            n_mels=num_mels,
+            fmin=fmin,
+            fmax=fmax,
+        )
+        self.register_buffer("melmat", torch.from_numpy(melmat.T).float())
+        self.stft_params = {
+            "n_fft": self.fft_size,
+            "win_length": self.win_length,
+            "hop_length": self.hop_size,
+            "center": self.center,
+            "normalized": self.normalized,
+            "onesided": self.onesided,
+        }
+        if is_pytorch_17plus:
+            self.stft_params["return_complex"] = False
+
+        self.log_base = log_base
+        if self.log_base is None:
+            self.log = torch.log
+        elif self.log_base == 2.0:
+            self.log = torch.log2
+        elif self.log_base == 10.0:
+            self.log = torch.log10
+        else:
+            raise ValueError(f"log_base: {log_base} is not supported.")
+
+    def forward(self, x):
+        """Calculate Mel-spectrogram.
+
+        Args:
+            x (Tensor): Input waveform tensor (B, T) or (B, 1, T).
+
+        Returns:
+            Tensor: Mel-spectrogram (B, #mels, #frames).
+
+        """
+        if x.dim() == 3:
+            # (B, C, T) -> (B*C, T)
+            x = x.reshape(-1, x.size(2))
+
+        if self.window is not None:
+            window_func = getattr(torch, f"{self.window}_window")
+            window = window_func(self.win_length, dtype=x.dtype, device=x.device)
+        else:
+            window = None
+
+        x_stft = torch.stft(x, window=window, **self.stft_params)
+        # (B, #freqs, #frames, 2) -> (B, $frames, #freqs, 2)
+        x_stft = x_stft.transpose(1, 2)
+        x_power = x_stft[..., 0] ** 2 + x_stft[..., 1] ** 2
+        x_amp = torch.sqrt(torch.clamp(x_power, min=self.eps))
+
+        x_mel = torch.matmul(x_amp, self.melmat)
+        x_mel = torch.clamp(x_mel, min=self.eps)
+
+        return self.log(x_mel).transpose(1, 2)
+
+
+class MelSpectrogramLoss(torch.nn.Module):
+    """Mel-spectrogram loss."""
+
+    def __init__(
+        self,
+        fs=22050,
+        fft_size=1024,
+        hop_size=256,
+        win_length=None,
+        window="hann",
+        num_mels=80,
+        fmin=80,
+        fmax=7600,
+        center=True,
+        normalized=False,
+        onesided=True,
+        eps=1e-10,
+        log_base=10.0,
+    ):
+        """Initialize Mel-spectrogram loss."""
+        super().__init__()
+        self.mel_spectrogram = MelSpectrogram(
+            fs=fs,
+            fft_size=fft_size,
+            hop_size=hop_size,
+            win_length=win_length,
+            window=window,
+            num_mels=num_mels,
+            fmin=fmin,
+            fmax=fmax,
+            center=center,
+            normalized=normalized,
+            onesided=onesided,
+            eps=eps,
+            log_base=log_base,
+        )
+
+    def forward(self, y_hat, y):
+        """Calculate Mel-spectrogram loss.
+
+        Args:
+            y_hat (Tensor): Generated single tensor (B, 1, T).
+            y (Tensor): Groundtruth single tensor (B, 1, T).
+
+        Returns:
+            Tensor: Mel-spectrogram loss value.
+
+        """
+        mel_hat = self.mel_spectrogram(y_hat)
+        mel = self.mel_spectrogram(y)
+        mel_loss = F.l1_loss(mel_hat, mel)
+
+        return mel_loss
diff --git a/hifigan/parallel_wavegan/losses/stft_loss.py b/hifigan/parallel_wavegan/losses/stft_loss.py
new file mode 100644
index 0000000..b592355
--- /dev/null
+++ b/hifigan/parallel_wavegan/losses/stft_loss.py
@@ -0,0 +1,170 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""STFT-based Loss modules."""
+
+import torch
+import torch.nn.functional as F
+
+from distutils.version import LooseVersion
+
+is_pytorch_17plus = LooseVersion(torch.__version__) >= LooseVersion("1.7")
+
+
+def stft(x, fft_size, hop_size, win_length, window):
+    """Perform STFT and convert to magnitude spectrogram.
+
+    Args:
+        x (Tensor): Input signal tensor (B, T).
+        fft_size (int): FFT size.
+        hop_size (int): Hop size.
+        win_length (int): Window length.
+        window (str): Window function type.
+
+    Returns:
+        Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
+
+    """
+    if is_pytorch_17plus:
+        x_stft = torch.stft(
+            x, fft_size, hop_size, win_length, window, return_complex=False
+        )
+    else:
+        x_stft = torch.stft(x, fft_size, hop_size, win_length, window)
+    real = x_stft[..., 0]
+    imag = x_stft[..., 1]
+
+    # NOTE(kan-bayashi): clamp is needed to avoid nan or inf
+    return torch.sqrt(torch.clamp(real ** 2 + imag ** 2, min=1e-7)).transpose(2, 1)
+
+
+class SpectralConvergenceLoss(torch.nn.Module):
+    """Spectral convergence loss module."""
+
+    def __init__(self):
+        """Initilize spectral convergence loss module."""
+        super(SpectralConvergenceLoss, self).__init__()
+
+    def forward(self, x_mag, y_mag):
+        """Calculate forward propagation.
+
+        Args:
+            x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
+            y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
+
+        Returns:
+            Tensor: Spectral convergence loss value.
+
+        """
+        return torch.norm(y_mag - x_mag, p="fro") / torch.norm(y_mag, p="fro")
+
+
+class LogSTFTMagnitudeLoss(torch.nn.Module):
+    """Log STFT magnitude loss module."""
+
+    def __init__(self):
+        """Initilize los STFT magnitude loss module."""
+        super(LogSTFTMagnitudeLoss, self).__init__()
+
+    def forward(self, x_mag, y_mag):
+        """Calculate forward propagation.
+
+        Args:
+            x_mag (Tensor): Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
+            y_mag (Tensor): Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
+
+        Returns:
+            Tensor: Log STFT magnitude loss value.
+
+        """
+        return F.l1_loss(torch.log(y_mag), torch.log(x_mag))
+
+
+class STFTLoss(torch.nn.Module):
+    """STFT loss module."""
+
+    def __init__(
+        self, fft_size=1024, shift_size=120, win_length=600, window="hann_window"
+    ):
+        """Initialize STFT loss module."""
+        super(STFTLoss, self).__init__()
+        self.fft_size = fft_size
+        self.shift_size = shift_size
+        self.win_length = win_length
+        self.spectral_convergence_loss = SpectralConvergenceLoss()
+        self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss()
+        # NOTE(kan-bayashi): Use register_buffer to fix #223
+        self.register_buffer("window", getattr(torch, window)(win_length))
+
+    def forward(self, x, y):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Predicted signal (B, T).
+            y (Tensor): Groundtruth signal (B, T).
+
+        Returns:
+            Tensor: Spectral convergence loss value.
+            Tensor: Log STFT magnitude loss value.
+
+        """
+        x_mag = stft(x, self.fft_size, self.shift_size, self.win_length, self.window)
+        y_mag = stft(y, self.fft_size, self.shift_size, self.win_length, self.window)
+        sc_loss = self.spectral_convergence_loss(x_mag, y_mag)
+        mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag)
+
+        return sc_loss, mag_loss
+
+
+class MultiResolutionSTFTLoss(torch.nn.Module):
+    """Multi resolution STFT loss module."""
+
+    def __init__(
+        self,
+        fft_sizes=[1024, 2048, 512],
+        hop_sizes=[120, 240, 50],
+        win_lengths=[600, 1200, 240],
+        window="hann_window",
+    ):
+        """Initialize Multi resolution STFT loss module.
+
+        Args:
+            fft_sizes (list): List of FFT sizes.
+            hop_sizes (list): List of hop sizes.
+            win_lengths (list): List of window lengths.
+            window (str): Window function type.
+
+        """
+        super(MultiResolutionSTFTLoss, self).__init__()
+        assert len(fft_sizes) == len(hop_sizes) == len(win_lengths)
+        self.stft_losses = torch.nn.ModuleList()
+        for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths):
+            self.stft_losses += [STFTLoss(fs, ss, wl, window)]
+
+    def forward(self, x, y):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Predicted signal (B, T) or (B, #subband, T).
+            y (Tensor): Groundtruth signal (B, T) or (B, #subband, T).
+
+        Returns:
+            Tensor: Multi resolution spectral convergence loss value.
+            Tensor: Multi resolution log STFT magnitude loss value.
+
+        """
+        if len(x.shape) == 3:
+            x = x.view(-1, x.size(2))  # (B, C, T) -> (B x C, T)
+            y = y.view(-1, y.size(2))  # (B, C, T) -> (B x C, T)
+        sc_loss = 0.0
+        mag_loss = 0.0
+        for f in self.stft_losses:
+            sc_l, mag_l = f(x, y)
+            sc_loss += sc_l
+            mag_loss += mag_l
+        sc_loss /= len(self.stft_losses)
+        mag_loss /= len(self.stft_losses)
+
+        return sc_loss, mag_loss
diff --git a/hifigan/parallel_wavegan/models/__init__.py b/hifigan/parallel_wavegan/models/__init__.py
new file mode 100644
index 0000000..4744280
--- /dev/null
+++ b/hifigan/parallel_wavegan/models/__init__.py
@@ -0,0 +1,4 @@
+from .hifigan import *  # NOQA
+from .melgan import *  # NOQA
+from .parallel_wavegan import *  # NOQA
+from .style_melgan import *  # NOQA
diff --git a/hifigan/parallel_wavegan/models/hifigan.py b/hifigan/parallel_wavegan/models/hifigan.py
new file mode 100644
index 0000000..b0b0287
--- /dev/null
+++ b/hifigan/parallel_wavegan/models/hifigan.py
@@ -0,0 +1,732 @@
+# -*- coding: utf-8 -*-
+
+"""HiFi-GAN Modules.
+
+This code is based on https://github.com/jik876/hifi-gan.
+
+"""
+
+import copy
+import logging
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from parallel_wavegan.layers import HiFiGANResidualBlock as ResidualBlock
+from parallel_wavegan.utils import read_hdf5
+
+
+class HiFiGANGenerator(torch.nn.Module):
+    """HiFiGAN generator module."""
+
+    def __init__(
+        self,
+        in_channels=80,
+        out_channels=1,
+        channels=512,
+        kernel_size=7,
+        upsample_scales=(8, 8, 2, 2),
+        upsample_kernel_sizes=(16, 16, 4, 4),
+        resblock_kernel_sizes=(3, 7, 11),
+        resblock_dilations=[(1, 3, 5), (1, 3, 5), (1, 3, 5)],
+        use_additional_convs=True,
+        bias=True,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.1},
+        use_weight_norm=True,
+    ):
+        """Initialize HiFiGANGenerator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            channels (int): Number of hidden representation channels.
+            kernel_size (int): Kernel size of initial and final conv layer.
+            upsample_scales (list): List of upsampling scales.
+            upsample_kernel_sizes (list): List of kernel sizes for upsampling layers.
+            resblock_kernel_sizes (list): List of kernel sizes for residual blocks.
+            resblock_dilations (list): List of dilation list for residual blocks.
+            use_additional_convs (bool): Whether to use additional conv layers in residual blocks.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+            use_weight_norm (bool): Whether to use weight norm.
+                If set to true, it will be applied to all of the conv layers.
+
+        """
+        super().__init__()
+
+        # check hyperparameters are valid
+        assert kernel_size % 2 == 1, "Kernel size must be odd number."
+        assert len(upsample_scales) == len(upsample_kernel_sizes)
+        assert len(resblock_dilations) == len(resblock_kernel_sizes)
+
+        # define modules
+        self.num_upsamples = len(upsample_kernel_sizes)
+        self.num_blocks = len(resblock_kernel_sizes)
+        self.input_conv = torch.nn.Conv1d(
+            in_channels,
+            channels,
+            kernel_size,
+            1,
+            padding=(kernel_size - 1) // 2,
+        )
+        self.upsamples = torch.nn.ModuleList()
+        self.blocks = torch.nn.ModuleList()
+        for i in range(len(upsample_kernel_sizes)):
+            assert upsample_kernel_sizes[i] == 2 * upsample_scales[i]
+            self.upsamples += [
+                torch.nn.Sequential(
+                    getattr(torch.nn, nonlinear_activation)(
+                        **nonlinear_activation_params
+                    ),
+                    torch.nn.ConvTranspose1d(
+                        channels // (2 ** i),
+                        channels // (2 ** (i + 1)),
+                        upsample_kernel_sizes[i],
+                        upsample_scales[i],
+                        padding=upsample_scales[i] // 2 + upsample_scales[i] % 2,
+                        output_padding=upsample_scales[i] % 2,
+                    ),
+                )
+            ]
+            for j in range(len(resblock_kernel_sizes)):
+                self.blocks += [
+                    ResidualBlock(
+                        kernel_size=resblock_kernel_sizes[j],
+                        channels=channels // (2 ** (i + 1)),
+                        dilations=resblock_dilations[j],
+                        bias=bias,
+                        use_additional_convs=use_additional_convs,
+                        nonlinear_activation=nonlinear_activation,
+                        nonlinear_activation_params=nonlinear_activation_params,
+                    )
+                ]
+        self.output_conv = torch.nn.Sequential(
+            # NOTE(kan-bayashi): follow official implementation but why
+            #   using different slope parameter here? (0.1 vs. 0.01)
+            torch.nn.LeakyReLU(),
+            torch.nn.Conv1d(
+                channels // (2 ** (i + 1)),
+                out_channels,
+                kernel_size,
+                1,
+                padding=(kernel_size - 1) // 2,
+            ),
+            torch.nn.Tanh(),
+        )
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+        # reset parameters
+        self.reset_parameters()
+
+    def forward(self, c):
+        """Calculate forward propagation.
+
+        Args:
+            c (Tensor): Input tensor (B, in_channels, T).
+
+        Returns:
+            Tensor: Output tensor (B, out_channels, T).
+
+        """
+        c = self.input_conv(c)
+        for i in range(self.num_upsamples):
+            c = self.upsamples[i](c)
+            cs = 0.0  # initialize
+            for j in range(self.num_blocks):
+                cs += self.blocks[i * self.num_blocks + j](c)
+            c = cs / self.num_blocks
+        c = self.output_conv(c)
+
+        return c
+
+    def reset_parameters(self):
+        """Reset parameters.
+
+        This initialization follows the official implementation manner.
+        https://github.com/jik876/hifi-gan/blob/master/models.py
+
+        """
+
+        def _reset_parameters(m):
+            if isinstance(m, (torch.nn.Conv1d, torch.nn.ConvTranspose1d)):
+                m.weight.data.normal_(0.0, 0.01)
+                logging.debug(f"Reset parameters in {m}.")
+
+        self.apply(_reset_parameters)
+
+    def remove_weight_norm(self):
+        """Remove weight normalization module from all of the layers."""
+
+        def _remove_weight_norm(m):
+            try:
+                logging.debug(f"Weight norm is removed from {m}.")
+                torch.nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(
+                m, torch.nn.ConvTranspose1d
+            ):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    def register_stats(self, stats):
+        """Register stats for de-normalization as buffer.
+
+        Args:
+            stats (str): Path of statistics file (".npy" or ".h5").
+
+        """
+        assert stats.endswith(".h5") or stats.endswith(".npy")
+        if stats.endswith(".h5"):
+            mean = read_hdf5(stats, "mean").reshape(-1)
+            scale = read_hdf5(stats, "scale").reshape(-1)
+        else:
+            mean = np.load(stats)[0].reshape(-1)
+            scale = np.load(stats)[1].reshape(-1)
+        self.register_buffer("mean", torch.from_numpy(mean).float())
+        self.register_buffer("scale", torch.from_numpy(scale).float())
+        logging.info("Successfully registered stats as buffer.")
+
+    def inference(self, c, normalize_before=False):
+        """Perform inference.
+
+        Args:
+            c (Union[Tensor, ndarray]): Input tensor (T, in_channels).
+            normalize_before (bool): Whether to perform normalization.
+
+        Returns:
+            Tensor: Output tensor (T ** prod(upsample_scales), out_channels).
+
+        """
+        if not isinstance(c, torch.Tensor):
+            c = torch.tensor(c, dtype=torch.float).to(next(self.parameters()).device)
+        if normalize_before:
+            c = (c - self.mean) / self.scale
+        c = self.forward(c.transpose(1, 0).unsqueeze(0))
+        return c.squeeze(0).transpose(1, 0)
+
+
+class HiFiGANPeriodDiscriminator(torch.nn.Module):
+    """HiFiGAN period discriminator module."""
+
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        period=3,
+        kernel_sizes=[5, 3],
+        channels=32,
+        downsample_scales=[3, 3, 3, 3, 1],
+        max_downsample_channels=1024,
+        bias=True,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.1},
+        use_weight_norm=True,
+        use_spectral_norm=False,
+    ):
+        """Initialize HiFiGANPeriodDiscriminator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            period (int): Period.
+            kernel_sizes (list): Kernel sizes of initial conv layers and the final conv layer.
+            channels (int): Number of initial channels.
+            downsample_scales (list): List of downsampling scales.
+            max_downsample_channels (int): Number of maximum downsampling channels.
+            use_additional_convs (bool): Whether to use additional conv layers in residual blocks.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+            use_weight_norm (bool): Whether to use weight norm.
+                If set to true, it will be applied to all of the conv layers.
+            use_spectral_norm (bool): Whether to use spectral norm.
+                If set to true, it will be applied to all of the conv layers.
+
+        """
+        super().__init__()
+        assert len(kernel_sizes) == 2
+        assert kernel_sizes[0] % 2 == 1, "Kernel size must be odd number."
+        assert kernel_sizes[1] % 2 == 1, "Kernel size must be odd number."
+
+        self.period = period
+        self.convs = torch.nn.ModuleList()
+        in_chs = in_channels
+        out_chs = channels
+        for downsample_scale in downsample_scales:
+            self.convs += [
+                torch.nn.Sequential(
+                    torch.nn.Conv2d(
+                        in_chs,
+                        out_chs,
+                        (kernel_sizes[0], 1),
+                        (downsample_scale, 1),
+                        padding=((kernel_sizes[0] - 1) // 2, 0),
+                    ),
+                    getattr(torch.nn, nonlinear_activation)(
+                        **nonlinear_activation_params
+                    ),
+                )
+            ]
+            in_chs = out_chs
+            # NOTE(kan-bayashi): Use downsample_scale + 1?
+            out_chs = min(out_chs * 4, max_downsample_channels)
+        self.output_conv = torch.nn.Conv2d(
+            out_chs,
+            out_channels,
+            (kernel_sizes[1] - 1, 1),
+            1,
+            padding=((kernel_sizes[1] - 1) // 2, 0),
+        )
+
+        if use_weight_norm and use_spectral_norm:
+            raise ValueError("Either use use_weight_norm or use_spectral_norm.")
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+        # apply spectral norm
+        if use_spectral_norm:
+            self.apply_spectral_norm()
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            c (Tensor): Input tensor (B, in_channels, T).
+
+        Returns:
+            list: List of each layer's tensors.
+
+        """
+        # transform 1d to 2d -> (B, C, T/P, P)
+        b, c, t = x.shape
+        if t % self.period != 0:
+            n_pad = self.period - (t % self.period)
+            x = F.pad(x, (0, n_pad), "reflect")
+            t += n_pad
+        x = x.view(b, c, t // self.period, self.period)
+
+        # forward conv
+        outs = []
+        for layer in self.convs:
+            x = layer(x)
+            outs += [x]
+        x = self.output_conv(x)
+        x = torch.flatten(x, 1, -1)
+        outs += [x]
+
+        return outs
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    def apply_spectral_norm(self):
+        """Apply spectral normalization module from all of the layers."""
+
+        def _apply_spectral_norm(m):
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.utils.spectral_norm(m)
+                logging.debug(f"Spectral norm is applied to {m}.")
+
+        self.apply(_apply_spectral_norm)
+
+
+class HiFiGANMultiPeriodDiscriminator(torch.nn.Module):
+    """HiFiGAN multi-period discriminator module."""
+
+    def __init__(
+        self,
+        periods=[2, 3, 5, 7, 11],
+        discriminator_params={
+            "in_channels": 1,
+            "out_channels": 1,
+            "kernel_sizes": [5, 3],
+            "channels": 32,
+            "downsample_scales": [3, 3, 3, 3, 1],
+            "max_downsample_channels": 1024,
+            "bias": True,
+            "nonlinear_activation": "LeakyReLU",
+            "nonlinear_activation_params": {"negative_slope": 0.1},
+            "use_weight_norm": True,
+            "use_spectral_norm": False,
+        },
+    ):
+        """Initialize HiFiGANMultiPeriodDiscriminator module.
+
+        Args:
+            periods (list): List of periods.
+            discriminator_params (dict): Parameters for hifi-gan period discriminator module.
+                The period parameter will be overwritten.
+
+        """
+        super().__init__()
+        self.discriminators = torch.nn.ModuleList()
+        for period in periods:
+            params = copy.deepcopy(discriminator_params)
+            params["period"] = period
+            self.discriminators += [HiFiGANPeriodDiscriminator(**params)]
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input noise signal (B, 1, T).
+
+        Returns:
+            List: List of list of each discriminator outputs, which consists of each layer output tensors.
+
+        """
+        outs = []
+        for f in self.discriminators:
+            outs += [f(x)]
+
+        return outs
+
+
+class HiFiGANScaleDiscriminator(torch.nn.Module):
+    """HiFi-GAN scale discriminator module."""
+
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        kernel_sizes=[15, 41, 5, 3],
+        channels=128,
+        max_downsample_channels=1024,
+        max_groups=16,
+        bias=True,
+        downsample_scales=[2, 2, 4, 4, 1],
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.1},
+        use_weight_norm=True,
+        use_spectral_norm=False,
+    ):
+        """Initilize HiFiGAN scale discriminator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            kernel_sizes (list): List of four kernel sizes. The first will be used for the first conv layer,
+                and the second is for downsampling part, and the remaining two are for output layers.
+            channels (int): Initial number of channels for conv layer.
+            max_downsample_channels (int): Maximum number of channels for downsampling layers.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            downsample_scales (list): List of downsampling scales.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+            use_weight_norm (bool): Whether to use weight norm.
+                If set to true, it will be applied to all of the conv layers.
+            use_spectral_norm (bool): Whether to use spectral norm.
+                If set to true, it will be applied to all of the conv layers.
+
+        """
+        super().__init__()
+        self.layers = torch.nn.ModuleList()
+
+        # check kernel size is valid
+        assert len(kernel_sizes) == 4
+        for ks in kernel_sizes:
+            assert ks % 2 == 1
+
+        # add first layer
+        self.layers += [
+            torch.nn.Sequential(
+                torch.nn.Conv1d(
+                    in_channels,
+                    channels,
+                    # NOTE(kan-bayashi): Use always the same kernel size
+                    kernel_sizes[0],
+                    bias=bias,
+                    padding=(kernel_sizes[0] - 1) // 2,
+                ),
+                getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params),
+            )
+        ]
+
+        # add downsample layers
+        in_chs = channels
+        out_chs = channels
+        # NOTE(kan-bayashi): Remove hard coding?
+        groups = 4
+        for downsample_scale in downsample_scales:
+            self.layers += [
+                torch.nn.Sequential(
+                    torch.nn.Conv1d(
+                        in_chs,
+                        out_chs,
+                        kernel_size=kernel_sizes[1],
+                        stride=downsample_scale,
+                        padding=(kernel_sizes[1] - 1) // 2,
+                        groups=groups,
+                        bias=bias,
+                    ),
+                    getattr(torch.nn, nonlinear_activation)(
+                        **nonlinear_activation_params
+                    ),
+                )
+            ]
+            in_chs = out_chs
+            # NOTE(kan-bayashi): Remove hard coding?
+            out_chs = min(in_chs * 2, max_downsample_channels)
+            # NOTE(kan-bayashi): Remove hard coding?
+            groups = min(groups * 4, max_groups)
+
+        # add final layers
+        out_chs = min(in_chs * 2, max_downsample_channels)
+        self.layers += [
+            torch.nn.Sequential(
+                torch.nn.Conv1d(
+                    in_chs,
+                    out_chs,
+                    kernel_size=kernel_sizes[2],
+                    stride=1,
+                    padding=(kernel_sizes[2] - 1) // 2,
+                    bias=bias,
+                ),
+                getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params),
+            )
+        ]
+        self.layers += [
+            torch.nn.Conv1d(
+                out_chs,
+                out_channels,
+                kernel_size=kernel_sizes[3],
+                stride=1,
+                padding=(kernel_sizes[3] - 1) // 2,
+                bias=bias,
+            ),
+        ]
+
+        if use_weight_norm and use_spectral_norm:
+            raise ValueError("Either use use_weight_norm or use_spectral_norm.")
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+        # apply spectral norm
+        if use_spectral_norm:
+            self.apply_spectral_norm()
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input noise signal (B, 1, T).
+
+        Returns:
+            List: List of output tensors of each layer.
+
+        """
+        outs = []
+        for f in self.layers:
+            x = f(x)
+            outs += [x]
+
+        return outs
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    def apply_spectral_norm(self):
+        """Apply spectral normalization module from all of the layers."""
+
+        def _apply_spectral_norm(m):
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.utils.spectral_norm(m)
+                logging.debug(f"Spectral norm is applied to {m}.")
+
+        self.apply(_apply_spectral_norm)
+
+
+class HiFiGANMultiScaleDiscriminator(torch.nn.Module):
+    """HiFi-GAN multi-scale discriminator module."""
+
+    def __init__(
+        self,
+        scales=3,
+        downsample_pooling="AvgPool1d",
+        # follow the official implementation setting
+        downsample_pooling_params={
+            "kernel_size": 4,
+            "stride": 2,
+            "padding": 2,
+        },
+        discriminator_params={
+            "in_channels": 1,
+            "out_channels": 1,
+            "kernel_sizes": [15, 41, 5, 3],
+            "channels": 128,
+            "max_downsample_channels": 1024,
+            "max_groups": 16,
+            "bias": True,
+            "downsample_scales": [2, 2, 4, 4, 1],
+            "nonlinear_activation": "LeakyReLU",
+            "nonlinear_activation_params": {"negative_slope": 0.1},
+        },
+        follow_official_norm=False,
+    ):
+        """Initilize HiFiGAN multi-scale discriminator module.
+
+        Args:
+            scales (int): Number of multi-scales.
+            downsample_pooling (str): Pooling module name for downsampling of the inputs.
+            downsample_pooling_params (dict): Parameters for the above pooling module.
+            discriminator_params (dict): Parameters for hifi-gan scale discriminator module.
+            follow_official_norm (bool): Whether to follow the norm setting of the official
+                implementaion. The first discriminator uses spectral norm and the other
+                discriminators use weight norm.
+
+        """
+        super().__init__()
+        self.discriminators = torch.nn.ModuleList()
+
+        # add discriminators
+        for i in range(scales):
+            params = copy.deepcopy(discriminator_params)
+            if follow_official_norm:
+                if i == 0:
+                    params["use_weight_norm"] = False
+                    params["use_spectral_norm"] = True
+                else:
+                    params["use_weight_norm"] = True
+                    params["use_spectral_norm"] = False
+            self.discriminators += [HiFiGANScaleDiscriminator(**params)]
+        self.pooling = getattr(torch.nn, downsample_pooling)(
+            **downsample_pooling_params
+        )
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input noise signal (B, 1, T).
+
+        Returns:
+            List: List of list of each discriminator outputs, which consists of each layer output tensors.
+
+        """
+        outs = []
+        for f in self.discriminators:
+            outs += [f(x)]
+            x = self.pooling(x)
+
+        return outs
+
+
+class HiFiGANMultiScaleMultiPeriodDiscriminator(torch.nn.Module):
+    """HiFi-GAN multi-scale + multi-period discriminator module."""
+
+    def __init__(
+        self,
+        # Multi-scale discriminator related
+        scales=3,
+        scale_downsample_pooling="AvgPool1d",
+        scale_downsample_pooling_params={
+            "kernel_size": 4,
+            "stride": 2,
+            "padding": 2,
+        },
+        scale_discriminator_params={
+            "in_channels": 1,
+            "out_channels": 1,
+            "kernel_sizes": [15, 41, 5, 3],
+            "channels": 128,
+            "max_downsample_channels": 1024,
+            "max_groups": 16,
+            "bias": True,
+            "downsample_scales": [2, 2, 4, 4, 1],
+            "nonlinear_activation": "LeakyReLU",
+            "nonlinear_activation_params": {"negative_slope": 0.1},
+        },
+        follow_official_norm=True,
+        # Multi-period discriminator related
+        periods=[2, 3, 5, 7, 11],
+        period_discriminator_params={
+            "in_channels": 1,
+            "out_channels": 1,
+            "kernel_sizes": [5, 3],
+            "channels": 32,
+            "downsample_scales": [3, 3, 3, 3, 1],
+            "max_downsample_channels": 1024,
+            "bias": True,
+            "nonlinear_activation": "LeakyReLU",
+            "nonlinear_activation_params": {"negative_slope": 0.1},
+            "use_weight_norm": True,
+            "use_spectral_norm": False,
+        },
+    ):
+        """Initilize HiFiGAN multi-scale + multi-period discriminator module.
+
+        Args:
+            scales (int): Number of multi-scales.
+            scale_downsample_pooling (str): Pooling module name for downsampling of the inputs.
+            scale_downsample_pooling_params (dict): Parameters for the above pooling module.
+            scale_discriminator_params (dict): Parameters for hifi-gan scale discriminator module.
+            follow_official_norm (bool): Whether to follow the norm setting of the official
+                implementaion. The first discriminator uses spectral norm and the other
+                discriminators use weight norm.
+            periods (list): List of periods.
+            period_discriminator_params (dict): Parameters for hifi-gan period discriminator module.
+                The period parameter will be overwritten.
+
+        """
+        super().__init__()
+        self.msd = HiFiGANMultiScaleDiscriminator(
+            scales=scales,
+            downsample_pooling=scale_downsample_pooling,
+            downsample_pooling_params=scale_downsample_pooling_params,
+            discriminator_params=scale_discriminator_params,
+            follow_official_norm=follow_official_norm,
+        )
+        self.mpd = HiFiGANMultiPeriodDiscriminator(
+            periods=periods,
+            discriminator_params=period_discriminator_params,
+        )
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input noise signal (B, 1, T).
+
+        Returns:
+            List: List of list of each discriminator outputs,
+                which consists of each layer output tensors.
+                Multi scale and multi period ones are concatenated.
+
+        """
+        msd_outs = self.msd(x)
+        mpd_outs = self.mpd(x)
+        return msd_outs + mpd_outs
diff --git a/hifigan/parallel_wavegan/models/melgan.py b/hifigan/parallel_wavegan/models/melgan.py
new file mode 100644
index 0000000..cbc9c4e
--- /dev/null
+++ b/hifigan/parallel_wavegan/models/melgan.py
@@ -0,0 +1,516 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""MelGAN Modules."""
+
+import logging
+
+import numpy as np
+import torch
+
+from parallel_wavegan.layers import CausalConv1d
+from parallel_wavegan.layers import CausalConvTranspose1d
+from parallel_wavegan.layers import ResidualStack
+from parallel_wavegan.utils import read_hdf5
+
+
+class MelGANGenerator(torch.nn.Module):
+    """MelGAN generator module."""
+
+    def __init__(
+        self,
+        in_channels=80,
+        out_channels=1,
+        kernel_size=7,
+        channels=512,
+        bias=True,
+        upsample_scales=[8, 8, 2, 2],
+        stack_kernel_size=3,
+        stacks=3,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.2},
+        pad="ReflectionPad1d",
+        pad_params={},
+        use_final_nonlinear_activation=True,
+        use_weight_norm=True,
+        use_causal_conv=False,
+    ):
+        """Initialize MelGANGenerator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            kernel_size (int): Kernel size of initial and final conv layer.
+            channels (int): Initial number of channels for conv layer.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            upsample_scales (list): List of upsampling scales.
+            stack_kernel_size (int): Kernel size of dilated conv layers in residual stack.
+            stacks (int): Number of stacks in a single residual stack.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+            pad (str): Padding function module name before dilated convolution layer.
+            pad_params (dict): Hyperparameters for padding function.
+            use_final_nonlinear_activation (torch.nn.Module): Activation function for the final layer.
+            use_weight_norm (bool): Whether to use weight norm.
+                If set to true, it will be applied to all of the conv layers.
+            use_causal_conv (bool): Whether to use causal convolution.
+
+        """
+        super(MelGANGenerator, self).__init__()
+
+        # check hyper parameters is valid
+        assert channels >= np.prod(upsample_scales)
+        assert channels % (2 ** len(upsample_scales)) == 0
+        if not use_causal_conv:
+            assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size."
+
+        # add initial layer
+        layers = []
+        if not use_causal_conv:
+            layers += [
+                getattr(torch.nn, pad)((kernel_size - 1) // 2, **pad_params),
+                torch.nn.Conv1d(in_channels, channels, kernel_size, bias=bias),
+            ]
+        else:
+            layers += [
+                CausalConv1d(
+                    in_channels,
+                    channels,
+                    kernel_size,
+                    bias=bias,
+                    pad=pad,
+                    pad_params=pad_params,
+                ),
+            ]
+
+        for i, upsample_scale in enumerate(upsample_scales):
+            # add upsampling layer
+            layers += [
+                getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params)
+            ]
+            if not use_causal_conv:
+                layers += [
+                    torch.nn.ConvTranspose1d(
+                        channels // (2 ** i),
+                        channels // (2 ** (i + 1)),
+                        upsample_scale * 2,
+                        stride=upsample_scale,
+                        padding=upsample_scale // 2 + upsample_scale % 2,
+                        output_padding=upsample_scale % 2,
+                        bias=bias,
+                    )
+                ]
+            else:
+                layers += [
+                    CausalConvTranspose1d(
+                        channels // (2 ** i),
+                        channels // (2 ** (i + 1)),
+                        upsample_scale * 2,
+                        stride=upsample_scale,
+                        bias=bias,
+                    )
+                ]
+
+            # add residual stack
+            for j in range(stacks):
+                layers += [
+                    ResidualStack(
+                        kernel_size=stack_kernel_size,
+                        channels=channels // (2 ** (i + 1)),
+                        dilation=stack_kernel_size ** j,
+                        bias=bias,
+                        nonlinear_activation=nonlinear_activation,
+                        nonlinear_activation_params=nonlinear_activation_params,
+                        pad=pad,
+                        pad_params=pad_params,
+                        use_causal_conv=use_causal_conv,
+                    )
+                ]
+
+        # add final layer
+        layers += [
+            getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params)
+        ]
+        if not use_causal_conv:
+            layers += [
+                getattr(torch.nn, pad)((kernel_size - 1) // 2, **pad_params),
+                torch.nn.Conv1d(
+                    channels // (2 ** (i + 1)), out_channels, kernel_size, bias=bias
+                ),
+            ]
+        else:
+            layers += [
+                CausalConv1d(
+                    channels // (2 ** (i + 1)),
+                    out_channels,
+                    kernel_size,
+                    bias=bias,
+                    pad=pad,
+                    pad_params=pad_params,
+                ),
+            ]
+        if use_final_nonlinear_activation:
+            layers += [torch.nn.Tanh()]
+
+        # define the model as a single function
+        self.melgan = torch.nn.Sequential(*layers)
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+        # reset parameters
+        self.reset_parameters()
+
+        # initialize pqmf for inference
+        self.pqmf = None
+
+    def forward(self, c):
+        """Calculate forward propagation.
+
+        Args:
+            c (Tensor): Input tensor (B, channels, T).
+
+        Returns:
+            Tensor: Output tensor (B, 1, T ** prod(upsample_scales)).
+
+        """
+        return self.melgan(c)
+
+    def remove_weight_norm(self):
+        """Remove weight normalization module from all of the layers."""
+
+        def _remove_weight_norm(m):
+            try:
+                logging.debug(f"Weight norm is removed from {m}.")
+                torch.nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(
+                m, torch.nn.ConvTranspose1d
+            ):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    def reset_parameters(self):
+        """Reset parameters.
+
+        This initialization follows official implementation manner.
+        https://github.com/descriptinc/melgan-neurips/blob/master/mel2wav/modules.py
+
+        """
+
+        def _reset_parameters(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(
+                m, torch.nn.ConvTranspose1d
+            ):
+                m.weight.data.normal_(0.0, 0.02)
+                logging.debug(f"Reset parameters in {m}.")
+
+        self.apply(_reset_parameters)
+
+    def register_stats(self, stats):
+        """Register stats for de-normalization as buffer.
+
+        Args:
+            stats (str): Path of statistics file (".npy" or ".h5").
+
+        """
+        assert stats.endswith(".h5") or stats.endswith(".npy")
+        if stats.endswith(".h5"):
+            mean = read_hdf5(stats, "mean").reshape(-1)
+            scale = read_hdf5(stats, "scale").reshape(-1)
+        else:
+            mean = np.load(stats)[0].reshape(-1)
+            scale = np.load(stats)[1].reshape(-1)
+        self.register_buffer("mean", torch.from_numpy(mean).float())
+        self.register_buffer("scale", torch.from_numpy(scale).float())
+        logging.info("Successfully registered stats as buffer.")
+
+    def inference(self, c, normalize_before=False):
+        """Perform inference.
+
+        Args:
+            c (Union[Tensor, ndarray]): Input tensor (T, in_channels).
+            normalize_before (bool): Whether to perform normalization.
+
+        Returns:
+            Tensor: Output tensor (T ** prod(upsample_scales), out_channels).
+
+        """
+        if not isinstance(c, torch.Tensor):
+            c = torch.tensor(c, dtype=torch.float).to(next(self.parameters()).device)
+        if normalize_before:
+            c = (c - self.mean) / self.scale
+        c = self.melgan(c.transpose(1, 0).unsqueeze(0))
+        if self.pqmf is not None:
+            c = self.pqmf.synthesis(c)
+        return c.squeeze(0).transpose(1, 0)
+
+
+class MelGANDiscriminator(torch.nn.Module):
+    """MelGAN discriminator module."""
+
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        kernel_sizes=[5, 3],
+        channels=16,
+        max_downsample_channels=1024,
+        bias=True,
+        downsample_scales=[4, 4, 4, 4],
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.2},
+        pad="ReflectionPad1d",
+        pad_params={},
+    ):
+        """Initilize MelGAN discriminator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            kernel_sizes (list): List of two kernel sizes. The prod will be used for the first conv layer,
+                and the first and the second kernel sizes will be used for the last two layers.
+                For example if kernel_sizes = [5, 3], the first layer kernel size will be 5 * 3 = 15,
+                the last two layers' kernel size will be 5 and 3, respectively.
+            channels (int): Initial number of channels for conv layer.
+            max_downsample_channels (int): Maximum number of channels for downsampling layers.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            downsample_scales (list): List of downsampling scales.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+            pad (str): Padding function module name before dilated convolution layer.
+            pad_params (dict): Hyperparameters for padding function.
+
+        """
+        super(MelGANDiscriminator, self).__init__()
+        self.layers = torch.nn.ModuleList()
+
+        # check kernel size is valid
+        assert len(kernel_sizes) == 2
+        assert kernel_sizes[0] % 2 == 1
+        assert kernel_sizes[1] % 2 == 1
+
+        # add first layer
+        self.layers += [
+            torch.nn.Sequential(
+                getattr(torch.nn, pad)((np.prod(kernel_sizes) - 1) // 2, **pad_params),
+                torch.nn.Conv1d(
+                    in_channels, channels, np.prod(kernel_sizes), bias=bias
+                ),
+                getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params),
+            )
+        ]
+
+        # add downsample layers
+        in_chs = channels
+        for downsample_scale in downsample_scales:
+            out_chs = min(in_chs * downsample_scale, max_downsample_channels)
+            self.layers += [
+                torch.nn.Sequential(
+                    torch.nn.Conv1d(
+                        in_chs,
+                        out_chs,
+                        kernel_size=downsample_scale * 10 + 1,
+                        stride=downsample_scale,
+                        padding=downsample_scale * 5,
+                        groups=in_chs // 4,
+                        bias=bias,
+                    ),
+                    getattr(torch.nn, nonlinear_activation)(
+                        **nonlinear_activation_params
+                    ),
+                )
+            ]
+            in_chs = out_chs
+
+        # add final layers
+        out_chs = min(in_chs * 2, max_downsample_channels)
+        self.layers += [
+            torch.nn.Sequential(
+                torch.nn.Conv1d(
+                    in_chs,
+                    out_chs,
+                    kernel_sizes[0],
+                    padding=(kernel_sizes[0] - 1) // 2,
+                    bias=bias,
+                ),
+                getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params),
+            )
+        ]
+        self.layers += [
+            torch.nn.Conv1d(
+                out_chs,
+                out_channels,
+                kernel_sizes[1],
+                padding=(kernel_sizes[1] - 1) // 2,
+                bias=bias,
+            ),
+        ]
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input noise signal (B, 1, T).
+
+        Returns:
+            List: List of output tensors of each layer.
+
+        """
+        outs = []
+        for f in self.layers:
+            x = f(x)
+            outs += [x]
+
+        return outs
+
+
+class MelGANMultiScaleDiscriminator(torch.nn.Module):
+    """MelGAN multi-scale discriminator module."""
+
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        scales=3,
+        downsample_pooling="AvgPool1d",
+        # follow the official implementation setting
+        downsample_pooling_params={
+            "kernel_size": 4,
+            "stride": 2,
+            "padding": 1,
+            "count_include_pad": False,
+        },
+        kernel_sizes=[5, 3],
+        channels=16,
+        max_downsample_channels=1024,
+        bias=True,
+        downsample_scales=[4, 4, 4, 4],
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.2},
+        pad="ReflectionPad1d",
+        pad_params={},
+        use_weight_norm=True,
+    ):
+        """Initilize MelGAN multi-scale discriminator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            scales (int): Number of multi-scales.
+            downsample_pooling (str): Pooling module name for downsampling of the inputs.
+            downsample_pooling_params (dict): Parameters for the above pooling module.
+            kernel_sizes (list): List of two kernel sizes. The sum will be used for the first conv layer,
+                and the first and the second kernel sizes will be used for the last two layers.
+            channels (int): Initial number of channels for conv layer.
+            max_downsample_channels (int): Maximum number of channels for downsampling layers.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            downsample_scales (list): List of downsampling scales.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+            pad (str): Padding function module name before dilated convolution layer.
+            pad_params (dict): Hyperparameters for padding function.
+            use_causal_conv (bool): Whether to use causal convolution.
+
+        """
+        super(MelGANMultiScaleDiscriminator, self).__init__()
+        self.discriminators = torch.nn.ModuleList()
+
+        # add discriminators
+        for _ in range(scales):
+            self.discriminators += [
+                MelGANDiscriminator(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    kernel_sizes=kernel_sizes,
+                    channels=channels,
+                    max_downsample_channels=max_downsample_channels,
+                    bias=bias,
+                    downsample_scales=downsample_scales,
+                    nonlinear_activation=nonlinear_activation,
+                    nonlinear_activation_params=nonlinear_activation_params,
+                    pad=pad,
+                    pad_params=pad_params,
+                )
+            ]
+        self.pooling = getattr(torch.nn, downsample_pooling)(
+            **downsample_pooling_params
+        )
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+        # reset parameters
+        self.reset_parameters()
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input noise signal (B, 1, T).
+
+        Returns:
+            List: List of list of each discriminator outputs, which consists of each layer output tensors.
+
+        """
+        outs = []
+        for f in self.discriminators:
+            outs += [f(x)]
+            x = self.pooling(x)
+
+        return outs
+
+    def remove_weight_norm(self):
+        """Remove weight normalization module from all of the layers."""
+
+        def _remove_weight_norm(m):
+            try:
+                logging.debug(f"Weight norm is removed from {m}.")
+                torch.nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(
+                m, torch.nn.ConvTranspose1d
+            ):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    def reset_parameters(self):
+        """Reset parameters.
+
+        This initialization follows official implementation manner.
+        https://github.com/descriptinc/melgan-neurips/blob/master/mel2wav/modules.py
+
+        """
+
+        def _reset_parameters(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(
+                m, torch.nn.ConvTranspose1d
+            ):
+                m.weight.data.normal_(0.0, 0.02)
+                logging.debug(f"Reset parameters in {m}.")
+
+        self.apply(_reset_parameters)
diff --git a/hifigan/parallel_wavegan/models/parallel_wavegan.py b/hifigan/parallel_wavegan/models/parallel_wavegan.py
new file mode 100644
index 0000000..8dcfabf
--- /dev/null
+++ b/hifigan/parallel_wavegan/models/parallel_wavegan.py
@@ -0,0 +1,516 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Parallel WaveGAN Modules."""
+
+import logging
+import math
+
+import numpy as np
+import torch
+
+from parallel_wavegan.layers import Conv1d
+from parallel_wavegan.layers import Conv1d1x1
+from parallel_wavegan.layers import upsample
+from parallel_wavegan.layers import WaveNetResidualBlock as ResidualBlock
+from parallel_wavegan import models
+from parallel_wavegan.utils import read_hdf5
+
+
+class ParallelWaveGANGenerator(torch.nn.Module):
+    """Parallel WaveGAN Generator module."""
+
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        kernel_size=3,
+        layers=30,
+        stacks=3,
+        residual_channels=64,
+        gate_channels=128,
+        skip_channels=64,
+        aux_channels=80,
+        aux_context_window=2,
+        dropout=0.0,
+        bias=True,
+        use_weight_norm=True,
+        use_causal_conv=False,
+        upsample_conditional_features=True,
+        upsample_net="ConvInUpsampleNetwork",
+        upsample_params={"upsample_scales": [4, 4, 4, 4]},
+    ):
+        """Initialize Parallel WaveGAN Generator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            kernel_size (int): Kernel size of dilated convolution.
+            layers (int): Number of residual block layers.
+            stacks (int): Number of stacks i.e., dilation cycles.
+            residual_channels (int): Number of channels in residual conv.
+            gate_channels (int):  Number of channels in gated conv.
+            skip_channels (int): Number of channels in skip conv.
+            aux_channels (int): Number of channels for auxiliary feature conv.
+            aux_context_window (int): Context window size for auxiliary feature.
+            dropout (float): Dropout rate. 0.0 means no dropout applied.
+            bias (bool): Whether to use bias parameter in conv layer.
+            use_weight_norm (bool): Whether to use weight norm.
+                If set to true, it will be applied to all of the conv layers.
+            use_causal_conv (bool): Whether to use causal structure.
+            upsample_conditional_features (bool): Whether to use upsampling network.
+            upsample_net (str): Upsampling network architecture.
+            upsample_params (dict): Upsampling network parameters.
+
+        """
+        super(ParallelWaveGANGenerator, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.aux_channels = aux_channels
+        self.aux_context_window = aux_context_window
+        self.layers = layers
+        self.stacks = stacks
+        self.kernel_size = kernel_size
+
+        # check the number of layers and stacks
+        assert layers % stacks == 0
+        layers_per_stack = layers // stacks
+
+        # define first convolution
+        self.first_conv = Conv1d1x1(in_channels, residual_channels, bias=True)
+
+        # define conv + upsampling network
+        if upsample_conditional_features:
+            upsample_params.update(
+                {
+                    "use_causal_conv": use_causal_conv,
+                }
+            )
+            if upsample_net == "MelGANGenerator":
+                assert aux_context_window == 0
+                upsample_params.update(
+                    {
+                        "use_weight_norm": False,  # not to apply twice
+                        "use_final_nonlinear_activation": False,
+                    }
+                )
+                self.upsample_net = getattr(models, upsample_net)(**upsample_params)
+            else:
+                if upsample_net == "ConvInUpsampleNetwork":
+                    upsample_params.update(
+                        {
+                            "aux_channels": aux_channels,
+                            "aux_context_window": aux_context_window,
+                        }
+                    )
+                self.upsample_net = getattr(upsample, upsample_net)(**upsample_params)
+            self.upsample_factor = np.prod(upsample_params["upsample_scales"])
+        else:
+            self.upsample_net = None
+            self.upsample_factor = 1
+
+        # define residual blocks
+        self.conv_layers = torch.nn.ModuleList()
+        for layer in range(layers):
+            dilation = 2 ** (layer % layers_per_stack)
+            conv = ResidualBlock(
+                kernel_size=kernel_size,
+                residual_channels=residual_channels,
+                gate_channels=gate_channels,
+                skip_channels=skip_channels,
+                aux_channels=aux_channels,
+                dilation=dilation,
+                dropout=dropout,
+                bias=bias,
+                use_causal_conv=use_causal_conv,
+            )
+            self.conv_layers += [conv]
+
+        # define output layers
+        self.last_conv_layers = torch.nn.ModuleList(
+            [
+                torch.nn.ReLU(inplace=True),
+                Conv1d1x1(skip_channels, skip_channels, bias=True),
+                torch.nn.ReLU(inplace=True),
+                Conv1d1x1(skip_channels, out_channels, bias=True),
+            ]
+        )
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+    def forward(self, x, c):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input noise signal (B, 1, T).
+            c (Tensor): Local conditioning auxiliary features (B, C ,T').
+
+        Returns:
+            Tensor: Output tensor (B, out_channels, T)
+
+        """
+        # perform upsampling
+        if c is not None and self.upsample_net is not None:
+            c = self.upsample_net(c)
+            assert c.size(-1) == x.size(-1)
+
+        # encode to hidden representation
+        x = self.first_conv(x)
+        skips = 0
+        for f in self.conv_layers:
+            x, h = f(x, c)
+            skips += h
+        skips *= math.sqrt(1.0 / len(self.conv_layers))
+
+        # apply final layers
+        x = skips
+        for f in self.last_conv_layers:
+            x = f(x)
+
+        return x
+
+    def remove_weight_norm(self):
+        """Remove weight normalization module from all of the layers."""
+
+        def _remove_weight_norm(m):
+            try:
+                logging.debug(f"Weight norm is removed from {m}.")
+                torch.nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Conv2d):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    @staticmethod
+    def _get_receptive_field_size(
+        layers, stacks, kernel_size, dilation=lambda x: 2 ** x
+    ):
+        assert layers % stacks == 0
+        layers_per_cycle = layers // stacks
+        dilations = [dilation(i % layers_per_cycle) for i in range(layers)]
+        return (kernel_size - 1) * sum(dilations) + 1
+
+    @property
+    def receptive_field_size(self):
+        """Return receptive field size."""
+        return self._get_receptive_field_size(
+            self.layers, self.stacks, self.kernel_size
+        )
+
+    def register_stats(self, stats):
+        """Register stats for de-normalization as buffer.
+
+        Args:
+            stats (str): Path of statistics file (".npy" or ".h5").
+
+        """
+        assert stats.endswith(".h5") or stats.endswith(".npy")
+        if stats.endswith(".h5"):
+            mean = read_hdf5(stats, "mean").reshape(-1)
+            scale = read_hdf5(stats, "scale").reshape(-1)
+        else:
+            mean = np.load(stats)[0].reshape(-1)
+            scale = np.load(stats)[1].reshape(-1)
+        self.register_buffer("mean", torch.from_numpy(mean).float())
+        self.register_buffer("scale", torch.from_numpy(scale).float())
+        logging.info("Successfully registered stats as buffer.")
+
+    def inference(self, c=None, x=None, normalize_before=False):
+        """Perform inference.
+
+        Args:
+            c (Union[Tensor, ndarray]): Local conditioning auxiliary features (T' ,C).
+            x (Union[Tensor, ndarray]): Input noise signal (T, 1).
+            normalize_before (bool): Whether to perform normalization.
+
+        Returns:
+            Tensor: Output tensor (T, out_channels)
+
+        """
+        if x is not None:
+            if not isinstance(x, torch.Tensor):
+                x = torch.tensor(x, dtype=torch.float).to(
+                    next(self.parameters()).device
+                )
+            x = x.transpose(1, 0).unsqueeze(0)
+        else:
+            assert c is not None
+            x = torch.randn(1, 1, len(c) * self.upsample_factor).to(
+                next(self.parameters()).device
+            )
+        if c is not None:
+            if not isinstance(c, torch.Tensor):
+                c = torch.tensor(c, dtype=torch.float).to(
+                    next(self.parameters()).device
+                )
+            if normalize_before:
+                c = (c - self.mean) / self.scale
+            c = c.transpose(1, 0).unsqueeze(0)
+            c = torch.nn.ReplicationPad1d(self.aux_context_window)(c)
+        return self.forward(x, c).squeeze(0).transpose(1, 0)
+
+
+class ParallelWaveGANDiscriminator(torch.nn.Module):
+    """Parallel WaveGAN Discriminator module."""
+
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        kernel_size=3,
+        layers=10,
+        conv_channels=64,
+        dilation_factor=1,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.2},
+        bias=True,
+        use_weight_norm=True,
+    ):
+        """Initialize Parallel WaveGAN Discriminator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            kernel_size (int): Number of output channels.
+            layers (int): Number of conv layers.
+            conv_channels (int): Number of chnn layers.
+            dilation_factor (int): Dilation factor. For example, if dilation_factor = 2,
+                the dilation will be 2, 4, 8, ..., and so on.
+            nonlinear_activation (str): Nonlinear function after each conv.
+            nonlinear_activation_params (dict): Nonlinear function parameters
+            bias (bool): Whether to use bias parameter in conv.
+            use_weight_norm (bool) Whether to use weight norm.
+                If set to true, it will be applied to all of the conv layers.
+
+        """
+        super(ParallelWaveGANDiscriminator, self).__init__()
+        assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size."
+        assert dilation_factor > 0, "Dilation factor must be > 0."
+        self.conv_layers = torch.nn.ModuleList()
+        conv_in_channels = in_channels
+        for i in range(layers - 1):
+            if i == 0:
+                dilation = 1
+            else:
+                dilation = i if dilation_factor == 1 else dilation_factor ** i
+                conv_in_channels = conv_channels
+            padding = (kernel_size - 1) // 2 * dilation
+            conv_layer = [
+                Conv1d(
+                    conv_in_channels,
+                    conv_channels,
+                    kernel_size=kernel_size,
+                    padding=padding,
+                    dilation=dilation,
+                    bias=bias,
+                ),
+                getattr(torch.nn, nonlinear_activation)(
+                    inplace=True, **nonlinear_activation_params
+                ),
+            ]
+            self.conv_layers += conv_layer
+        padding = (kernel_size - 1) // 2
+        last_conv_layer = Conv1d(
+            conv_in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            padding=padding,
+            bias=bias,
+        )
+        self.conv_layers += [last_conv_layer]
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input noise signal (B, 1, T).
+
+        Returns:
+            Tensor: Output tensor (B, 1, T)
+
+        """
+        for f in self.conv_layers:
+            x = f(x)
+        return x
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Conv2d):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    def remove_weight_norm(self):
+        """Remove weight normalization module from all of the layers."""
+
+        def _remove_weight_norm(m):
+            try:
+                logging.debug(f"Weight norm is removed from {m}.")
+                torch.nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
+
+
+class ResidualParallelWaveGANDiscriminator(torch.nn.Module):
+    """Parallel WaveGAN Discriminator module."""
+
+    def __init__(
+        self,
+        in_channels=1,
+        out_channels=1,
+        kernel_size=3,
+        layers=30,
+        stacks=3,
+        residual_channels=64,
+        gate_channels=128,
+        skip_channels=64,
+        dropout=0.0,
+        bias=True,
+        use_weight_norm=True,
+        use_causal_conv=False,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"negative_slope": 0.2},
+    ):
+        """Initialize Parallel WaveGAN Discriminator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            kernel_size (int): Kernel size of dilated convolution.
+            layers (int): Number of residual block layers.
+            stacks (int): Number of stacks i.e., dilation cycles.
+            residual_channels (int): Number of channels in residual conv.
+            gate_channels (int):  Number of channels in gated conv.
+            skip_channels (int): Number of channels in skip conv.
+            dropout (float): Dropout rate. 0.0 means no dropout applied.
+            bias (bool): Whether to use bias parameter in conv.
+            use_weight_norm (bool): Whether to use weight norm.
+                If set to true, it will be applied to all of the conv layers.
+            use_causal_conv (bool): Whether to use causal structure.
+            nonlinear_activation_params (dict): Nonlinear function parameters
+
+        """
+        super(ResidualParallelWaveGANDiscriminator, self).__init__()
+        assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size."
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.layers = layers
+        self.stacks = stacks
+        self.kernel_size = kernel_size
+
+        # check the number of layers and stacks
+        assert layers % stacks == 0
+        layers_per_stack = layers // stacks
+
+        # define first convolution
+        self.first_conv = torch.nn.Sequential(
+            Conv1d1x1(in_channels, residual_channels, bias=True),
+            getattr(torch.nn, nonlinear_activation)(
+                inplace=True, **nonlinear_activation_params
+            ),
+        )
+
+        # define residual blocks
+        self.conv_layers = torch.nn.ModuleList()
+        for layer in range(layers):
+            dilation = 2 ** (layer % layers_per_stack)
+            conv = ResidualBlock(
+                kernel_size=kernel_size,
+                residual_channels=residual_channels,
+                gate_channels=gate_channels,
+                skip_channels=skip_channels,
+                aux_channels=-1,
+                dilation=dilation,
+                dropout=dropout,
+                bias=bias,
+                use_causal_conv=use_causal_conv,
+            )
+            self.conv_layers += [conv]
+
+        # define output layers
+        self.last_conv_layers = torch.nn.ModuleList(
+            [
+                getattr(torch.nn, nonlinear_activation)(
+                    inplace=True, **nonlinear_activation_params
+                ),
+                Conv1d1x1(skip_channels, skip_channels, bias=True),
+                getattr(torch.nn, nonlinear_activation)(
+                    inplace=True, **nonlinear_activation_params
+                ),
+                Conv1d1x1(skip_channels, out_channels, bias=True),
+            ]
+        )
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input noise signal (B, 1, T).
+
+        Returns:
+            Tensor: Output tensor (B, 1, T)
+
+        """
+        x = self.first_conv(x)
+
+        skips = 0
+        for f in self.conv_layers:
+            x, h = f(x, None)
+            skips += h
+        skips *= math.sqrt(1.0 / len(self.conv_layers))
+
+        # apply final layers
+        x = skips
+        for f in self.last_conv_layers:
+            x = f(x)
+        return x
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Conv2d):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    def remove_weight_norm(self):
+        """Remove weight normalization module from all of the layers."""
+
+        def _remove_weight_norm(m):
+            try:
+                logging.debug(f"Weight norm is removed from {m}.")
+                torch.nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
diff --git a/hifigan/parallel_wavegan/models/style_melgan.py b/hifigan/parallel_wavegan/models/style_melgan.py
new file mode 100644
index 0000000..819ac34
--- /dev/null
+++ b/hifigan/parallel_wavegan/models/style_melgan.py
@@ -0,0 +1,362 @@
+# Copyright 2021 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""StyleMelGAN Modules."""
+
+import copy
+import logging
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from parallel_wavegan.layers import PQMF
+from parallel_wavegan.layers import TADEResBlock
+from parallel_wavegan.models import MelGANDiscriminator as BaseDiscriminator
+from parallel_wavegan.utils import read_hdf5
+
+
+class StyleMelGANGenerator(torch.nn.Module):
+    """Style MelGAN generator module."""
+
+    def __init__(
+        self,
+        in_channels=128,
+        aux_channels=80,
+        channels=64,
+        out_channels=1,
+        kernel_size=9,
+        dilation=2,
+        bias=True,
+        noise_upsample_scales=[11, 2, 2, 2],
+        noise_upsample_activation="LeakyReLU",
+        noise_upsample_activation_params={"negative_slope": 0.2},
+        upsample_scales=[2, 2, 2, 2, 2, 2, 2, 2, 1],
+        upsample_mode="nearest",
+        gated_function="softmax",
+        use_weight_norm=True,
+    ):
+        """Initilize Style MelGAN generator.
+
+        Args:
+            in_channels (int): Number of input noise channels.
+            aux_channels (int): Number of auxiliary input channels.
+            channels (int): Number of channels for conv layer.
+            out_channels (int): Number of output channels.
+            kernel_size (int): Kernel size of conv layers.
+            dilation (int): Dilation factor for conv layers.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            noise_upsample_scales (list): List of noise upsampling scales.
+            noise_upsample_activation (str): Activation function module name for noise upsampling.
+            noise_upsample_activation_params (dict): Hyperparameters for the above activation function.
+            upsample_scales (list): List of upsampling scales.
+            upsample_mode (str): Upsampling mode in TADE layer.
+            gated_function (str): Gated function in TADEResBlock ("softmax" or "sigmoid").
+            use_weight_norm (bool): Whether to use weight norm.
+                If set to true, it will be applied to all of the conv layers.
+
+        """
+        super().__init__()
+
+        self.in_channels = in_channels
+
+        noise_upsample = []
+        in_chs = in_channels
+        for noise_upsample_scale in noise_upsample_scales:
+            # NOTE(kan-bayashi): How should we design noise upsampling part?
+            noise_upsample += [
+                torch.nn.ConvTranspose1d(
+                    in_chs,
+                    channels,
+                    noise_upsample_scale * 2,
+                    stride=noise_upsample_scale,
+                    padding=noise_upsample_scale // 2 + noise_upsample_scale % 2,
+                    output_padding=noise_upsample_scale % 2,
+                    bias=bias,
+                )
+            ]
+            noise_upsample += [
+                getattr(torch.nn, noise_upsample_activation)(
+                    **noise_upsample_activation_params
+                )
+            ]
+            in_chs = channels
+        self.noise_upsample = torch.nn.Sequential(*noise_upsample)
+        self.noise_upsample_factor = np.prod(noise_upsample_scales)
+
+        self.blocks = torch.nn.ModuleList()
+        aux_chs = aux_channels
+        for upsample_scale in upsample_scales:
+            self.blocks += [
+                TADEResBlock(
+                    in_channels=channels,
+                    aux_channels=aux_chs,
+                    kernel_size=kernel_size,
+                    dilation=dilation,
+                    bias=bias,
+                    upsample_factor=upsample_scale,
+                    upsample_mode=upsample_mode,
+                    gated_function=gated_function,
+                ),
+            ]
+            aux_chs = channels
+        self.upsample_factor = np.prod(upsample_scales)
+
+        self.output_conv = torch.nn.Sequential(
+            torch.nn.Conv1d(
+                channels,
+                out_channels,
+                kernel_size,
+                1,
+                bias=bias,
+                padding=(kernel_size - 1) // 2,
+            ),
+            torch.nn.Tanh(),
+        )
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+        # reset parameters
+        self.reset_parameters()
+
+    def forward(self, c, z=None):
+        """Calculate forward propagation.
+
+        Args:
+            c (Tensor): Auxiliary input tensor (B, channels, T).
+            z (Tensor): Input noise tensor (B, in_channels, 1).
+
+        Returns:
+            Tensor: Output tensor (B, out_channels, T ** prod(upsample_scales)).
+
+        """
+        if z is None:
+            z = torch.randn(c.size(0), self.in_channels, 1).to(
+                device=c.device,
+                dtype=c.dtype,
+            )
+        x = self.noise_upsample(z)
+        for block in self.blocks:
+            x, c = block(x, c)
+        x = self.output_conv(x)
+        return x
+
+    def remove_weight_norm(self):
+        """Remove weight normalization module from all of the layers."""
+
+        def _remove_weight_norm(m):
+            try:
+                logging.debug(f"Weight norm is removed from {m}.")
+                torch.nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+
+        self.apply(_remove_weight_norm)
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(
+                m, torch.nn.ConvTranspose1d
+            ):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    def reset_parameters(self):
+        """Reset parameters."""
+
+        def _reset_parameters(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(
+                m, torch.nn.ConvTranspose1d
+            ):
+                m.weight.data.normal_(0.0, 0.02)
+                logging.debug(f"Reset parameters in {m}.")
+
+        self.apply(_reset_parameters)
+
+    def register_stats(self, stats):
+        """Register stats for de-normalization as buffer.
+
+        Args:
+            stats (str): Path of statistics file (".npy" or ".h5").
+
+        """
+        assert stats.endswith(".h5") or stats.endswith(".npy")
+        if stats.endswith(".h5"):
+            mean = read_hdf5(stats, "mean").reshape(-1)
+            scale = read_hdf5(stats, "scale").reshape(-1)
+        else:
+            mean = np.load(stats)[0].reshape(-1)
+            scale = np.load(stats)[1].reshape(-1)
+        self.register_buffer("mean", torch.from_numpy(mean).float())
+        self.register_buffer("scale", torch.from_numpy(scale).float())
+        logging.info("Successfully registered stats as buffer.")
+
+    def inference(self, c, normalize_before=False):
+        """Perform inference.
+
+        Args:
+            c (Union[Tensor, ndarray]): Input tensor (T, in_channels).
+            normalize_before (bool): Whether to perform normalization.
+
+        Returns:
+            Tensor: Output tensor (T ** prod(upsample_scales), out_channels).
+
+        """
+        if not isinstance(c, torch.Tensor):
+            c = torch.tensor(c, dtype=torch.float).to(next(self.parameters()).device)
+        if normalize_before:
+            c = (c - self.mean) / self.scale
+        c = c.transpose(1, 0).unsqueeze(0)
+
+        # prepare noise input
+        noise_size = (
+            1,
+            self.in_channels,
+            (c.size(2) - 1) // self.noise_upsample_factor + 1,
+        )
+        noise = torch.randn(*noise_size, dtype=torch.float).to(
+            next(self.parameters()).device
+        )
+        x = self.noise_upsample(noise)
+
+        # NOTE(kan-bayashi): To remove pop noise at the end of audio, perform padding
+        #    for feature sequence and after generation cut the generated audio. This
+        #    requires additional computation but it can prevent pop noise.
+        total_length = c.size(2) * self.upsample_factor
+        c = F.pad(c, (0, x.size(2) - c.size(2)), "replicate")
+
+        # This version causes pop noise.
+        # x = x[:, :, :c.size(2)]
+
+        for block in self.blocks:
+            x, c = block(x, c)
+        x = self.output_conv(x)[..., :total_length]
+
+        return x.squeeze(0).transpose(1, 0)
+
+
+class StyleMelGANDiscriminator(torch.nn.Module):
+    """Style MelGAN disciminator module."""
+
+    def __init__(
+        self,
+        repeats=2,
+        window_sizes=[512, 1024, 2048, 4096],
+        pqmf_params=[
+            [1, None, None, None],
+            [2, 62, 0.26700, 9.0],
+            [4, 62, 0.14200, 9.0],
+            [8, 62, 0.07949, 9.0],
+        ],
+        discriminator_params={
+            "out_channels": 1,
+            "kernel_sizes": [5, 3],
+            "channels": 16,
+            "max_downsample_channels": 512,
+            "bias": True,
+            "downsample_scales": [4, 4, 4, 1],
+            "nonlinear_activation": "LeakyReLU",
+            "nonlinear_activation_params": {"negative_slope": 0.2},
+            "pad": "ReflectionPad1d",
+            "pad_params": {},
+        },
+        use_weight_norm=True,
+    ):
+        """Initilize Style MelGAN discriminator.
+
+        Args:
+            repeats (int): Number of repititons to apply RWD.
+            window_sizes (list): List of random window sizes.
+            pqmf_params (list): List of list of Parameters for PQMF modules
+            discriminator_params (dict): Parameters for base discriminator module.
+            use_weight_nom (bool): Whether to apply weight normalization.
+
+        """
+        super().__init__()
+
+        # window size check
+        assert len(window_sizes) == len(pqmf_params)
+        sizes = [ws // p[0] for ws, p in zip(window_sizes, pqmf_params)]
+        assert len(window_sizes) == sum([sizes[0] == size for size in sizes])
+
+        self.repeats = repeats
+        self.window_sizes = window_sizes
+        self.pqmfs = torch.nn.ModuleList()
+        self.discriminators = torch.nn.ModuleList()
+        for pqmf_param in pqmf_params:
+            d_params = copy.deepcopy(discriminator_params)
+            d_params["in_channels"] = pqmf_param[0]
+            if pqmf_param[0] == 1:
+                self.pqmfs += [torch.nn.Identity()]
+            else:
+                self.pqmfs += [PQMF(*pqmf_param)]
+            self.discriminators += [BaseDiscriminator(**d_params)]
+
+        # apply weight norm
+        if use_weight_norm:
+            self.apply_weight_norm()
+
+        # reset parameters
+        self.reset_parameters()
+
+    def forward(self, x):
+        """Calculate forward propagation.
+
+        Args:
+            x (Tensor): Input tensor (B, 1, T).
+
+        Returns:
+            List: List of discriminator outputs, #items in the list will be
+                equal to repeats * #discriminators.
+
+        """
+        outs = []
+        for _ in range(self.repeats):
+            outs += self._forward(x)
+
+        return outs
+
+    def _forward(self, x):
+        outs = []
+        for idx, (ws, pqmf, disc) in enumerate(
+            zip(self.window_sizes, self.pqmfs, self.discriminators)
+        ):
+            # NOTE(kan-bayashi): Is it ok to apply different window for real and fake samples?
+            start_idx = np.random.randint(x.size(-1) - ws)
+            x_ = x[:, :, start_idx : start_idx + ws]
+            if idx == 0:
+                x_ = pqmf(x_)
+            else:
+                x_ = pqmf.analysis(x_)
+            outs += [disc(x_)]
+        return outs
+
+    def apply_weight_norm(self):
+        """Apply weight normalization module from all of the layers."""
+
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(
+                m, torch.nn.ConvTranspose1d
+            ):
+                torch.nn.utils.weight_norm(m)
+                logging.debug(f"Weight norm is applied to {m}.")
+
+        self.apply(_apply_weight_norm)
+
+    def reset_parameters(self):
+        """Reset parameters."""
+
+        def _reset_parameters(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(
+                m, torch.nn.ConvTranspose1d
+            ):
+                m.weight.data.normal_(0.0, 0.02)
+                logging.debug(f"Reset parameters in {m}.")
+
+        self.apply(_reset_parameters)
diff --git a/hifigan/parallel_wavegan/models/tf_models.py b/hifigan/parallel_wavegan/models/tf_models.py
new file mode 100644
index 0000000..286da21
--- /dev/null
+++ b/hifigan/parallel_wavegan/models/tf_models.py
@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 MINH ANH (@dathudeptrai)
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Tensorflow MelGAN modules complatible with pytorch."""
+
+import tensorflow as tf
+
+import numpy as np
+
+from parallel_wavegan.layers.tf_layers import TFConvTranspose1d
+from parallel_wavegan.layers.tf_layers import TFReflectionPad1d
+from parallel_wavegan.layers.tf_layers import TFResidualStack
+
+
+class TFMelGANGenerator(tf.keras.layers.Layer):
+    """Tensorflow MelGAN generator module."""
+
+    def __init__(
+        self,
+        in_channels=80,
+        out_channels=1,
+        kernel_size=7,
+        channels=512,
+        bias=True,
+        upsample_scales=[8, 8, 2, 2],
+        stack_kernel_size=3,
+        stacks=3,
+        nonlinear_activation="LeakyReLU",
+        nonlinear_activation_params={"alpha": 0.2},
+        pad="ReflectionPad1d",
+        pad_params={},
+        use_final_nonlinear_activation=True,
+        use_weight_norm=True,
+        use_causal_conv=False,
+    ):
+        """Initialize TFMelGANGenerator module.
+
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            kernel_size (int): Kernel size of initial and final conv layer.
+            channels (int): Initial number of channels for conv layer.
+            bias (bool): Whether to add bias parameter in convolution layers.
+            upsample_scales (list): List of upsampling scales.
+            stack_kernel_size (int): Kernel size of dilated conv layers in residual stack.
+            stacks (int): Number of stacks in a single residual stack.
+            nonlinear_activation (str): Activation function module name.
+            nonlinear_activation_params (dict): Hyperparameters for activation function.
+            pad (str): Padding function module name before dilated convolution layer.
+            pad_params (dict): Hyperparameters for padding function.
+            use_final_nonlinear_activation (torch.nn.Module): Activation function for the final layer.
+            use_weight_norm (bool): No effect but keep it as is to be the same as pytorch version.
+            use_causal_conv (bool): Whether to use causal convolution.
+
+        """
+        super(TFMelGANGenerator, self).__init__()
+
+        # check hyper parameters is valid
+        assert not use_causal_conv, "Not supported yet."
+        assert channels >= np.prod(upsample_scales)
+        assert channels % (2 ** len(upsample_scales)) == 0
+        assert pad == "ReflectionPad1d", f"Not supported (pad={pad})."
+
+        # add initial layer
+        layers = []
+        layers += [
+            TFReflectionPad1d((kernel_size - 1) // 2),
+            tf.keras.layers.Conv2D(
+                filters=channels,
+                kernel_size=(kernel_size, 1),
+                padding="valid",
+                use_bias=bias,
+            ),
+        ]
+
+        for i, upsample_scale in enumerate(upsample_scales):
+            # add upsampling layer
+            layers += [
+                getattr(tf.keras.layers, nonlinear_activation)(
+                    **nonlinear_activation_params
+                ),
+                TFConvTranspose1d(
+                    channels=channels // (2 ** (i + 1)),
+                    kernel_size=upsample_scale * 2,
+                    stride=upsample_scale,
+                    padding="same",
+                ),
+            ]
+
+            # add residual stack
+            for j in range(stacks):
+                layers += [
+                    TFResidualStack(
+                        kernel_size=stack_kernel_size,
+                        channels=channels // (2 ** (i + 1)),
+                        dilation=stack_kernel_size ** j,
+                        bias=bias,
+                        nonlinear_activation=nonlinear_activation,
+                        nonlinear_activation_params=nonlinear_activation_params,
+                        padding="same",
+                    )
+                ]
+
+        # add final layer
+        layers += [
+            getattr(tf.keras.layers, nonlinear_activation)(
+                **nonlinear_activation_params
+            ),
+            TFReflectionPad1d((kernel_size - 1) // 2),
+            tf.keras.layers.Conv2D(
+                filters=out_channels, kernel_size=(kernel_size, 1), use_bias=bias
+            ),
+        ]
+        if use_final_nonlinear_activation:
+            layers += [tf.keras.layers.Activation("tanh")]
+
+        self.melgan = tf.keras.models.Sequential(layers)
+
+    # TODO(kan-bayashi): Fix hard coded dimension
+    @tf.function(
+        input_signature=[tf.TensorSpec(shape=[None, None, 80], dtype=tf.float32)]
+    )
+    def call(self, c):
+        """Calculate forward propagation.
+
+        Args:
+            c (Tensor): Input tensor (B, T, in_channels).
+
+        Returns:
+            Tensor: Output tensor (B, T ** prod(upsample_scales), out_channels).
+
+        """
+        c = tf.expand_dims(c, 2)
+        c = self.melgan(c)
+        return c[:, :, 0, :]
diff --git a/hifigan/parallel_wavegan/optimizers/__init__.py b/hifigan/parallel_wavegan/optimizers/__init__.py
new file mode 100644
index 0000000..db777e8
--- /dev/null
+++ b/hifigan/parallel_wavegan/optimizers/__init__.py
@@ -0,0 +1,3 @@
+from torch.optim import *  # NOQA
+
+from .radam import *  # NOQA
diff --git a/hifigan/parallel_wavegan/optimizers/radam.py b/hifigan/parallel_wavegan/optimizers/radam.py
new file mode 100644
index 0000000..f9d35dc
--- /dev/null
+++ b/hifigan/parallel_wavegan/optimizers/radam.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+
+"""RAdam optimizer.
+
+This code is drived from https://github.com/LiyuanLucasLiu/RAdam.
+"""
+
+import math
+import torch
+
+from torch.optim.optimizer import Optimizer
+
+
+class RAdam(Optimizer):
+    """Rectified Adam optimizer."""
+
+    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
+        """Initilize RAdam optimizer."""
+        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
+        self.buffer = [[None, None, None] for ind in range(10)]
+        super(RAdam, self).__init__(params, defaults)
+
+    def __setstate__(self, state):
+        """Set state."""
+        super(RAdam, self).__setstate__(state)
+
+    def step(self, closure=None):
+        """Run one step."""
+        loss = None
+        if closure is not None:
+            loss = closure()
+
+        for group in self.param_groups:
+
+            for p in group["params"]:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data.float()
+                if grad.is_sparse:
+                    raise RuntimeError("RAdam does not support sparse gradients")
+
+                p_data_fp32 = p.data.float()
+
+                state = self.state[p]
+
+                if len(state) == 0:
+                    state["step"] = 0
+                    state["exp_avg"] = torch.zeros_like(p_data_fp32)
+                    state["exp_avg_sq"] = torch.zeros_like(p_data_fp32)
+                else:
+                    state["exp_avg"] = state["exp_avg"].type_as(p_data_fp32)
+                    state["exp_avg_sq"] = state["exp_avg_sq"].type_as(p_data_fp32)
+
+                exp_avg, exp_avg_sq = state["exp_avg"], state["exp_avg_sq"]
+                beta1, beta2 = group["betas"]
+
+                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
+                exp_avg.mul_(beta1).add_(1 - beta1, grad)
+
+                state["step"] += 1
+                buffered = self.buffer[int(state["step"] % 10)]
+                if state["step"] == buffered[0]:
+                    N_sma, step_size = buffered[1], buffered[2]
+                else:
+                    buffered[0] = state["step"]
+                    beta2_t = beta2 ** state["step"]
+                    N_sma_max = 2 / (1 - beta2) - 1
+                    N_sma = N_sma_max - 2 * state["step"] * beta2_t / (1 - beta2_t)
+                    buffered[1] = N_sma
+
+                    # more conservative since it's an approximated value
+                    if N_sma >= 5:
+                        step_size = math.sqrt(
+                            (1 - beta2_t)
+                            * (N_sma - 4)
+                            / (N_sma_max - 4)
+                            * (N_sma - 2)
+                            / N_sma
+                            * N_sma_max
+                            / (N_sma_max - 2)
+                        ) / (
+                            1 - beta1 ** state["step"]
+                        )  # NOQA
+                    else:
+                        step_size = 1.0 / (1 - beta1 ** state["step"])
+                    buffered[2] = step_size
+
+                if group["weight_decay"] != 0:
+                    p_data_fp32.add_(-group["weight_decay"] * group["lr"], p_data_fp32)
+
+                # more conservative since it's an approximated value
+                if N_sma >= 5:
+                    denom = exp_avg_sq.sqrt().add_(group["eps"])
+                    p_data_fp32.addcdiv_(-step_size * group["lr"], exp_avg, denom)
+                else:
+                    p_data_fp32.add_(-step_size * group["lr"], exp_avg)
+
+                p.data.copy_(p_data_fp32)
+
+        return loss
diff --git a/hifigan/parallel_wavegan/utils/__init__.py b/hifigan/parallel_wavegan/utils/__init__.py
new file mode 100644
index 0000000..e8fa95a
--- /dev/null
+++ b/hifigan/parallel_wavegan/utils/__init__.py
@@ -0,0 +1 @@
+from .utils import *  # NOQA
diff --git a/hifigan/parallel_wavegan/utils/utils.py b/hifigan/parallel_wavegan/utils/utils.py
new file mode 100644
index 0000000..86dc30b
--- /dev/null
+++ b/hifigan/parallel_wavegan/utils/utils.py
@@ -0,0 +1,394 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Tomoki Hayashi
+#  MIT License (https://opensource.org/licenses/MIT)
+
+"""Utility functions."""
+
+import fnmatch
+import logging
+import os
+import sys
+import tarfile
+
+from distutils.version import LooseVersion
+from filelock import FileLock
+
+import h5py
+import numpy as np
+import torch
+import yaml
+
+PRETRAINED_MODEL_LIST = {
+    "ljspeech_parallel_wavegan.v1": "1PdZv37JhAQH6AwNh31QlqruqrvjTBq7U",
+    "ljspeech_parallel_wavegan.v1.long": "1A9TsrD9fHxFviJVFjCk5W6lkzWXwhftv",
+    "ljspeech_parallel_wavegan.v1.no_limit": "1CdWKSiKoFNPZyF1lo7Dsj6cPKmfLJe72",
+    "ljspeech_parallel_wavegan.v3": "1-oZpwpWZMMolDYsCqeL12dFkXSBD9VBq",
+    "ljspeech_melgan.v1": "1i7-FPf9LPsYLHM6yNPoJdw5Q9d28C-ip",
+    "ljspeech_melgan.v1.long": "1x1b_R7d2561nqweK3FPb2muTdcFIYTu6",
+    "ljspeech_melgan.v3": "1J5gJ_FUZhOAKiRFWiAK6FcO5Z6oYJbmQ",
+    "ljspeech_melgan.v3.long": "124JnaLcRe7TsuAGh3XIClS3C7Wom9AU2",
+    "ljspeech_full_band_melgan.v2": "1Kb7q5zBeQ30Wsnma0X23G08zvgDG5oen",
+    "ljspeech_multi_band_melgan.v2": "1b70pJefKI8DhGYz4SxbEHpxm92tj1_qC",
+    "ljspeech_hifigan.v1": "1i6-hR_ksEssCYNlNII86v3AoeA1JcuWD",
+    "ljspeech_style_melgan.v1": "10aJSZfmCAobQJgRGio6cNyw6Xlgmme9-",
+    "jsut_parallel_wavegan.v1": "1qok91A6wuubuz4be-P9R2zKhNmQXG0VQ",
+    "jsut_multi_band_melgan.v2": "1chTt-76q2p69WPpZ1t1tt8szcM96IKad",
+    "jsut_hifigan.v1": "1vdgqTu9YKyGMCn-G7H2fI6UBC_4_55XB",
+    "jsut_style_melgan.v1": "1VIkjSxYxAGUVEvJxNLaOaJ7Twe48SH-s",
+    "csmsc_parallel_wavegan.v1": "1QTOAokhD5dtRnqlMPTXTW91-CG7jf74e",
+    "csmsc_multi_band_melgan.v2": "1G6trTmt0Szq-jWv2QDhqglMdWqQxiXQT",
+    "csmsc_hifigan.v1": "1fVKGEUrdhGjIilc21Sf0jODulAq6D1qY",
+    "csmsc_style_melgan.v1": "1kGUC_b9oVSv24vZRi66AAbSNUKJmbSCX",
+    "arctic_slt_parallel_wavegan.v1": "1_MXePg40-7DTjD0CDVzyduwQuW_O9aA1",
+    "jnas_parallel_wavegan.v1": "1D2TgvO206ixdLI90IqG787V6ySoXLsV_",
+    "vctk_parallel_wavegan.v1": "1bqEFLgAroDcgUy5ZFP4g2O2MwcwWLEca",
+    "vctk_parallel_wavegan.v1.long": "1tO4-mFrZ3aVYotgg7M519oobYkD4O_0-",
+    "vctk_multi_band_melgan.v2": "10PRQpHMFPE7RjF-MHYqvupK9S0xwBlJ_",
+    "vctk_hifigan.v1": "1oVOC4Vf0DYLdDp4r7GChfgj7Xh5xd0ex",
+    "vctk_style_melgan.v1": "14ThSEgjvl_iuFMdEGuNp7d3DulJHS9Mk",
+    "libritts_parallel_wavegan.v1": "1zHQl8kUYEuZ_i1qEFU6g2MEu99k3sHmR",
+    "libritts_parallel_wavegan.v1.long": "1b9zyBYGCCaJu0TIus5GXoMF8M3YEbqOw",
+    "libritts_multi_band_melgan.v2": "1kIDSBjrQvAsRewHPiFwBZ3FDelTWMp64",
+    "libritts_hifigan.v1": "1_TVFIvVtMn-Z4NiQrtrS20uSJOvBsnu1",
+    "libritts_style_melgan.v1": "1yuQakiMP0ECdB55IoxEGCbXDnNkWCoBg",
+    "kss_parallel_wavegan.v1": "1mLtQAzZHLiGSWguKCGG0EZa4C_xUO5gX",
+    "hui_acg_hokuspokus_parallel_wavegan.v1": "1irKf3okMLau56WNeOnhr2ZfSVESyQCGS",
+    "ruslan_parallel_wavegan.v1": "1M3UM6HN6wrfSe5jdgXwBnAIl_lJzLzuI",
+}
+
+
+def find_files(root_dir, query="*.wav", include_root_dir=True):
+    """Find files recursively.
+
+    Args:
+        root_dir (str): Root root_dir to find.
+        query (str): Query to find.
+        include_root_dir (bool): If False, root_dir name is not included.
+
+    Returns:
+        list: List of found filenames.
+
+    """
+    files = []
+    for root, dirnames, filenames in os.walk(root_dir, followlinks=True):
+        for filename in fnmatch.filter(filenames, query):
+            files.append(os.path.join(root, filename))
+    if not include_root_dir:
+        files = [file_.replace(root_dir + "/", "") for file_ in files]
+
+    return files
+
+
+def read_hdf5(hdf5_name, hdf5_path):
+    """Read hdf5 dataset.
+
+    Args:
+        hdf5_name (str): Filename of hdf5 file.
+        hdf5_path (str): Dataset name in hdf5 file.
+
+    Return:
+        any: Dataset values.
+
+    """
+    if not os.path.exists(hdf5_name):
+        logging.error(f"There is no such a hdf5 file ({hdf5_name}).")
+        sys.exit(1)
+
+    hdf5_file = h5py.File(hdf5_name, "r")
+
+    if hdf5_path not in hdf5_file:
+        logging.error(f"There is no such a data in hdf5 file. ({hdf5_path})")
+        sys.exit(1)
+
+    hdf5_data = hdf5_file[hdf5_path][()]
+    hdf5_file.close()
+
+    return hdf5_data
+
+
+def write_hdf5(hdf5_name, hdf5_path, write_data, is_overwrite=True):
+    """Write dataset to hdf5.
+
+    Args:
+        hdf5_name (str): Hdf5 dataset filename.
+        hdf5_path (str): Dataset path in hdf5.
+        write_data (ndarray): Data to write.
+        is_overwrite (bool): Whether to overwrite dataset.
+
+    """
+    # convert to numpy array
+    write_data = np.array(write_data)
+
+    # check folder existence
+    folder_name, _ = os.path.split(hdf5_name)
+    if not os.path.exists(folder_name) and len(folder_name) != 0:
+        os.makedirs(folder_name)
+
+    # check hdf5 existence
+    if os.path.exists(hdf5_name):
+        # if already exists, open with r+ mode
+        hdf5_file = h5py.File(hdf5_name, "r+")
+        # check dataset existence
+        if hdf5_path in hdf5_file:
+            if is_overwrite:
+                logging.warning(
+                    "Dataset in hdf5 file already exists. " "recreate dataset in hdf5."
+                )
+                hdf5_file.__delitem__(hdf5_path)
+            else:
+                logging.error(
+                    "Dataset in hdf5 file already exists. "
+                    "if you want to overwrite, please set is_overwrite = True."
+                )
+                hdf5_file.close()
+                sys.exit(1)
+    else:
+        # if not exists, open with w mode
+        hdf5_file = h5py.File(hdf5_name, "w")
+
+    # write data to hdf5
+    hdf5_file.create_dataset(hdf5_path, data=write_data)
+    hdf5_file.flush()
+    hdf5_file.close()
+
+
+class HDF5ScpLoader(object):
+    """Loader class for a fests.scp file of hdf5 file.
+
+    Examples:
+        key1 /some/path/a.h5:feats
+        key2 /some/path/b.h5:feats
+        key3 /some/path/c.h5:feats
+        key4 /some/path/d.h5:feats
+        ...
+        >>> loader = HDF5ScpLoader("hdf5.scp")
+        >>> array = loader["key1"]
+
+        key1 /some/path/a.h5
+        key2 /some/path/b.h5
+        key3 /some/path/c.h5
+        key4 /some/path/d.h5
+        ...
+        >>> loader = HDF5ScpLoader("hdf5.scp", "feats")
+        >>> array = loader["key1"]
+
+        key1 /some/path/a.h5:feats_1,feats_2
+        key2 /some/path/b.h5:feats_1,feats_2
+        key3 /some/path/c.h5:feats_1,feats_2
+        key4 /some/path/d.h5:feats_1,feats_2
+        ...
+        >>> loader = HDF5ScpLoader("hdf5.scp")
+        # feats_1 and feats_2 will be concatenated
+        >>> array = loader["key1"]
+
+    """
+
+    def __init__(self, feats_scp, default_hdf5_path="feats"):
+        """Initialize HDF5 scp loader.
+
+        Args:
+            feats_scp (str): Kaldi-style feats.scp file with hdf5 format.
+            default_hdf5_path (str): Path in hdf5 file. If the scp contain the info, not used.
+
+        """
+        self.default_hdf5_path = default_hdf5_path
+        with open(feats_scp) as f:
+            lines = [line.replace("\n", "") for line in f.readlines()]
+        self.data = {}
+        for line in lines:
+            key, value = line.split()
+            self.data[key] = value
+
+    def get_path(self, key):
+        """Get hdf5 file path for a given key."""
+        return self.data[key]
+
+    def __getitem__(self, key):
+        """Get ndarray for a given key."""
+        p = self.data[key]
+        if ":" in p:
+            if len(p.split(",")) == 1:
+                return read_hdf5(*p.split(":"))
+            else:
+                p1, p2 = p.split(":")
+                feats = [read_hdf5(p1, p) for p in p2.split(",")]
+                return np.concatenate(
+                    [f if len(f.shape) != 1 else f.reshape(-1, 1) for f in feats], 1
+                )
+        else:
+            return read_hdf5(p, self.default_hdf5_path)
+
+    def __len__(self):
+        """Return the length of the scp file."""
+        return len(self.data)
+
+    def __iter__(self):
+        """Return the iterator of the scp file."""
+        return iter(self.data)
+
+    def keys(self):
+        """Return the keys of the scp file."""
+        return self.data.keys()
+
+    def values(self):
+        """Return the values of the scp file."""
+        for key in self.keys():
+            yield self[key]
+
+
+class NpyScpLoader(object):
+    """Loader class for a fests.scp file of npy file.
+
+    Examples:
+        key1 /some/path/a.npy
+        key2 /some/path/b.npy
+        key3 /some/path/c.npy
+        key4 /some/path/d.npy
+        ...
+        >>> loader = NpyScpLoader("feats.scp")
+        >>> array = loader["key1"]
+
+    """
+
+    def __init__(self, feats_scp):
+        """Initialize npy scp loader.
+
+        Args:
+            feats_scp (str): Kaldi-style feats.scp file with npy format.
+
+        """
+        with open(feats_scp) as f:
+            lines = [line.replace("\n", "") for line in f.readlines()]
+        self.data = {}
+        for line in lines:
+            key, value = line.split()
+            self.data[key] = value
+
+    def get_path(self, key):
+        """Get npy file path for a given key."""
+        return self.data[key]
+
+    def __getitem__(self, key):
+        """Get ndarray for a given key."""
+        return np.load(self.data[key])
+
+    def __len__(self):
+        """Return the length of the scp file."""
+        return len(self.data)
+
+    def __iter__(self):
+        """Return the iterator of the scp file."""
+        return iter(self.data)
+
+    def keys(self):
+        """Return the keys of the scp file."""
+        return self.data.keys()
+
+    def values(self):
+        """Return the values of the scp file."""
+        for key in self.keys():
+            yield self[key]
+
+
+def load_model(checkpoint, config=None, stats=None):
+    """Load trained model.
+
+    Args:
+        checkpoint (str): Checkpoint path.
+        config (dict): Configuration dict.
+        stats (str): Statistics file path.
+
+    Return:
+        torch.nn.Module: Model instance.
+
+    """
+    # load config if not provided
+    if config is None:
+        dirname = os.path.dirname(checkpoint)
+        config = os.path.join(dirname, "config.yml")
+        with open(config) as f:
+            config = yaml.load(f, Loader=yaml.Loader)
+
+    # lazy load for circular error
+    import parallel_wavegan.models
+
+    # get model and load parameters
+    model_class = getattr(
+        parallel_wavegan.models,
+        config.get("generator_type", "ParallelWaveGANGenerator"),
+    )
+    # workaround for typo #295
+    generator_params = {
+        k.replace("upsample_kernal_sizes", "upsample_kernel_sizes"): v
+        for k, v in config["generator_params"].items()
+    }
+    model = model_class(**generator_params)
+    model.load_state_dict(
+        torch.load(checkpoint, map_location="cpu")["model"]["generator"]
+    )
+
+    # check stats existence
+    if stats is None and config.get("format"):
+        dirname = os.path.dirname(checkpoint)
+        if config["format"] == "hdf5":
+            ext = "h5"
+        else:
+            ext = "npy"
+        if os.path.exists(os.path.join(dirname, f"stats.{ext}")):
+            stats = os.path.join(dirname, f"stats.{ext}")
+
+    # load stats
+    if stats is not None:
+        model.register_stats(stats)
+
+    # add pqmf if needed
+    if config["generator_params"]["out_channels"] > 1:
+        # lazy load for circular error
+        from parallel_wavegan.layers import PQMF
+
+        pqmf_params = {}
+        if LooseVersion(config.get("version", "0.1.0")) <= LooseVersion("0.4.2"):
+            # For compatibility, here we set default values in version <= 0.4.2
+            pqmf_params.update(taps=62, cutoff_ratio=0.15, beta=9.0)
+        model.pqmf = PQMF(
+            subbands=config["generator_params"]["out_channels"],
+            **config.get("pqmf_params", pqmf_params),
+        )
+
+    return model
+
+
+def download_pretrained_model(tag, download_dir=None):
+    """Download pretrained model form google drive.
+
+    Args:
+        tag (str): Pretrained model tag.
+        download_dir (str): Directory to save downloaded files.
+
+    Returns:
+        str: Path of downloaded model checkpoint.
+
+    """
+    assert tag in PRETRAINED_MODEL_LIST, f"{tag} does not exists."
+    id_ = PRETRAINED_MODEL_LIST[tag]
+    if download_dir is None:
+        download_dir = os.path.expanduser("~/.cache/parallel_wavegan")
+    output_path = f"{download_dir}/{tag}.tar.gz"
+    os.makedirs(f"{download_dir}", exist_ok=True)
+    with FileLock(output_path + ".lock"):
+        if not os.path.exists(output_path):
+            # lazy load for compatibility
+            import gdown
+
+            gdown.download(
+                f"https://drive.google.com/uc?id={id_}", output_path, quiet=False
+            )
+            with tarfile.open(output_path, "r:*") as tar:
+                for member in tar.getmembers():
+                    if member.isreg():
+                        member.name = os.path.basename(member.name)
+                        tar.extract(member, f"{download_dir}/{tag}")
+    checkpoint_path = find_files(f"{download_dir}/{tag}", "checkpoint*.pkl")
+
+    return checkpoint_path[0]
diff --git a/hifigan/utils b/hifigan/utils
new file mode 120000
index 0000000..468ba70
--- /dev/null
+++ b/hifigan/utils
@@ -0,0 +1 @@
+../utils
\ No newline at end of file
diff --git a/path.sh b/path.sh
index 52bd140..c978741 100644
--- a/path.sh
+++ b/path.sh
@@ -1,3 +1,5 @@
 conda activate vflow
 export PATH=$PWD/utils:$PATH
+export PYTHONPATH=$PWD/hifigan:$PYTHONPATH
 chmod +x utils/*
+chmod +x hifigan/parallel_wavegan/bin/decode.py