diff --git a/.gitignore b/.gitignore
index 3eb56cb..029eac4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -150,3 +150,6 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
+synthetic_wav/
+exp/
+**/*.wav
\ No newline at end of file
diff --git a/README.md b/README.md
index 3530de2..e59f2e7 100644
--- a/README.md
+++ b/README.md
@@ -21,11 +21,60 @@ python setup.py build_ext --inplace
 Note that to avoid the trouble of installing [torchdyn](https://github.com/DiffEqML/torchdyn), we directly copy the torchdyn 1.0.6 version here locally at `torchdyn/`.
 
 The following process may also need `bash` and `perl` commands in your environment.
+
 ## Data Preparation
+This repo relies on Kaldi-style data organization.
+All data description files should be put in subdirectories in `data/`.
+See `data/ljspeech/example` for a basic example. 
+In this example, the following plain text files are necessary:
+1. `wav.scp`: organized as `utt /path/to/wav`.
+2. `utts.list`: every line specifies an utterance. This can be obtained by `cut -d ' ' -f 1 wav.scp > utts.list`.
+3. `utt2spk`: organized as `utt spk_name`.
+4. `text` and `phn_duration`: specifies the phoneme sequence and the corresponding integer durations (in frames).
+Also, there is a `data/ljspeech/phones.txt` file to specify all the phones together with their indexes in dictionary.
+
+For LJSpeech, we provide the processed file [online](https://huggingface.co/datasets/cantabile-kwok/ljspeech-1024-256-dur/resolve/main/ljspeech-1024-256.zip).
+You can download it and unzip to `data/ljspeech`.
+If you want to train on your own dataset, you might have to create these files yourself (or change the data loading strategy).
+
+After having these manifest files, please do the following to extract mel-spectrogram for training:
+```shell
+bash extract_fbank.sh --stage 0 --stop_stage 2 --nj 16
+# nj: number of parallel jobs. 
+# Have a look into the script if you need to change something
+# Bash variables before "parse_options.sh" can be passed by CLI, e.g. "--key value".
+```
+Note that we default to use **16kHz** data here.
+This will create `feats/fbank` and `feats/normed_fbank`, where Kaldi-style scp and ark files store the mel-spectrogram data. 
+The normed features will be used for training.
+
+If you want to use speaker-IDs (like LJSpeech, instead of using pretrained speaker embeddings such as xvectors) for training, please run:
+```shell
+make_utt2spk_id.py data/ljspeech/train/utt2spk data/ljspeech/val/utt2spk
+# You can add more files in CLI. Will write utt2num_frames in the same directory to these files.
+```
 
 ## Training
+Configurations for training is stored as yaml file in `configs/`.
+Data manifests and features for training and validation set will be specified in those yaml files.
+You will need to change double-quoted file paths there if you need to train on your own data.
+
+Then, training is performed by 
+```shell
+python train.py -c configs/${your_yaml} -m ${model_name}
+# e.g. python train.py -c configs/lj_16k_gt_dur.yaml -m lj_16k_gt_dur
+```
+It will create `logs/${model_name}` for logging and checkpointing.
+
+Several notes:
+* By default, the program performs EMA to average weights. Weights with or without EMA will both be saved. 
+* By default, the program will try to find the latest checkpoint for resuming. EMA checkpoints are prior to non-EMA checkpoints.
+* You can set `use_gt_dur` to `false` to turn on MAS algorithm. In this setting, it is better to set `add_blank` to `true`.
 
+## Generate Data for ReFlow and Perform Reflow
+TO BE DONE
 ## Inference
+TO BE DONE
 
 ## Acknowledgement
 During the development, the following repositories were referred to:
diff --git a/cmd.sh b/cmd.sh
new file mode 100644
index 0000000..19f3421
--- /dev/null
+++ b/cmd.sh
@@ -0,0 +1,91 @@
+# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
+# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
+# e.g.
+#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
+#
+# Options:
+#   --time <time>: Limit the maximum time to execute.
+#   --mem <mem>: Limit the maximum memory usage.
+#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
+#   --num-threads <ngpu>: Specify the number of CPU core.
+#   --gpu <ngpu>: Specify the number of GPU devices.
+#   --config: Change the configuration file from default.
+#
+# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
+# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
+# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
+# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
+#
+# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
+# These options are mapping to specific options for each backend and
+# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
+# If jobs failed, your configuration might be wrong for your environment.
+#
+#
+# The official documentaion for run.pl, queue.pl, slurm.pl, and ssh.pl:
+#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
+# =========================================================~
+
+
+# Select the backend used by run.sh from "local", "stdout", "sge", "slurm", or "ssh"
+cmd_backend="local"
+
+# Local machine, without any Job scheduling system
+if [ "${cmd_backend}" = local ]; then
+
+    # The other usage
+    export train_cmd="utils/run.pl"
+    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
+    export cuda_cmd="utils/run.pl"
+    # Used for "*_recog.py"
+    export decode_cmd="utils/run.pl"
+
+# Local machine, without any Job scheduling system
+elif [ "${cmd_backend}" = stdout ]; then
+
+    # The other usage
+    export train_cmd="utils/stdout.pl"
+    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
+    export cuda_cmd="utils/stdout.pl"
+    # Used for "*_recog.py"
+    export decode_cmd="utils/stdout.pl"
+
+# "qsub" (SGE, Torque, PBS, etc.)
+elif [ "${cmd_backend}" = sge ]; then
+    # The default setting is written in conf/queue.conf.
+    # You must change "-q g.q" for the "queue" for your environment.
+    # To know the "queue" names, type "qhost -q"
+    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.
+
+    export train_cmd="utils/queue.pl"
+    export cuda_cmd="utils/queue.pl"
+    export decode_cmd="utils/queue.pl"
+
+# "sbatch" (Slurm)
+elif [ "${cmd_backend}" = slurm ]; then
+    # The default setting is written in conf/slurm.conf.
+    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
+    # To know the "partion" names, type "sinfo".
+    # You can use "--gpu * " by defualt for slurm and it is interpreted as "--gres gpu:*"
+    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
+
+    export train_cmd="utils/slurm.pl"
+    export cuda_cmd="utils/slurm.pl"
+    export decode_cmd="utils/slurm.pl"
+
+elif [ "${cmd_backend}" = ssh ]; then
+    # You have to create ".queue/machines" to specify the host to execute jobs.
+    # e.g. .queue/machines
+    #   host1
+    #   host2
+    #   host3
+    # Assuming you can login them without any password, i.e. You have to set ssh keys.
+
+    export train_cmd="utils/ssh.pl"
+    export cuda_cmd="utils/ssh.pl"
+    export decode_cmd="utils/ssh.pl"
+
+else
+    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
+    return 1
+fi
diff --git a/configs/lj_16k_gt_dur.yaml b/configs/lj_16k_gt_dur.yaml
new file mode 100644
index 0000000..d00ad38
--- /dev/null
+++ b/configs/lj_16k_gt_dur.yaml
@@ -0,0 +1,55 @@
+xvector: false  # whether to use xvector for speaker modeling.
+
+perform_reflow: false  # if true, will need noise_scp be specified
+
+train:
+    test_size: 4
+    n_epochs: 10000
+    batch_size: 24
+    learning_rate: !!float 5e-5
+    seed: 37
+    save_every: 10
+    use_gt_dur: true  # whether to supervise duration modeling
+
+data:
+    sampling_rate: 16000
+    n_mel_channels: 80
+    add_blank: false  # whether to add blank tokens between each input phones
+    hop_length: 200  # in sampling points
+
+    phn2id: "data/ljspeech/phones.txt"
+
+    train:
+        utts: "data/ljspeech/train/utts.list"
+        utt2phns: "data/ljspeech/train/text"
+        utt2phn_duration: "data/ljspeech/train/phn_duration"
+        feats_scp: "feats/normed_fbank/ljspeech/train/feats.scp"
+        utt2num_frames: "feats/normed_fbank/ljspeech/train/utt2num_frames"
+        utt2spk: "data/ljspeech/train/utt2spk_id.json"
+
+    val:
+        utts: "data/ljspeech/val/utts.list"
+        utt2phns: "data/ljspeech/val/text"
+        utt2phn_duration: "data/ljspeech/val/phn_duration"
+        feats_scp: "feats/normed_fbank/ljspeech/val/feats.scp"
+        utt2num_frames: "feats/normed_fbank/ljspeech/val/utt2num_frames"
+        utt2spk: "data/ljspeech/val/utt2spk_id.json"
+
+model:
+    n_vocab: 148
+    n_spks: 1
+    spk_emb_dim: 64
+    n_enc_channels: 192
+    filter_channels: 768
+    filter_channels_dp: 256
+    n_enc_layers: 6
+    enc_kernel: 3
+    enc_dropout: 0.1
+    n_heads: 2
+    window_size: 4
+    dec_dim: 128
+    pe_scale: 1000
+    fm_type: "CFM"  # FM, CFM
+    fm_net_type: "unet"  # unet or diffsinger
+    shift_by_mu: false  # whether to shift the prior distribution by mu. True means GradTTS-style.
+    condition_by_mu: true  # whether to condition the flow matching decoder by mu. False supports text-agnostic voice conversion like GlowTTS.
diff --git a/data/ljspeech/example/phn_duration b/data/ljspeech/example/phn_duration
new file mode 100644
index 0000000..da28e80
--- /dev/null
+++ b/data/ljspeech/example/phn_duration
@@ -0,0 +1,10 @@
+LJ001-0110 9 4 3 4 3 4 9 9 7 5 8 4 8 13 10 14 8 3 4 4 7 5 11 6 9 9 11 15 9 5 7 5 5 3 11 3 4 4 6 5 5 4 7 5 4 4 4 4 8 4 5 4 6 6 9 11
+LJ002-0018 3 3 3 3 7 3 3 5 3 3 9 4 4 3 3 4 10 7 8 4 3 5 5 6 4 7 4 6 3 6 4 3 3 4 6 4 3 4 3 3 5 6 11 10 11 11 9 10 9 6 4 3 4 9 10 5 5 3 4 4 3 5 8 4 4 5 9 4 7 5 3 3 3 5 6 4 5 4 4 4 7 7 4 4 4 7 3
+LJ002-0043 4 15 7 6 5 7 10 5 13 9 24 3 12 7 5 9 6 4 6 8 6 4 7 4 13 13 11 17 7 5 10 4 7 6 4 3 4 6 7 7 5 8 14 13 9 7 3 4 3 7 13 13 14 17 9 15 13
+LJ003-0111 4 3 4 4 4 5 5 6 4 5 4 5 4 3 6 5 9 7 6 8 4 10 5 4 3 3 4 4 5 6 5 5 6 4 3 4 3 4 8 5 8 6 8 7 4 3 5 9 24 19 12 4 4 6 5 10 12 27 5 9 5 8 12 3 3 4 3 6 8 3 3 6 3 4 3 3 3 7 6 4 5 5 3 4 7 6 7 4 6 16
+LJ003-0345 10 12 4 3 4 4 4 3 4 4 6 3 5 3 6 12 3 3 3 3 8 4 5 5 6 10 6 8 17 8 3 4 4 4 7 5 6 3 4 3 6 4 4 5 6 4 4 3 4 4 5 5 8 8 4 3 10 5 10 21
+LJ004-0045 3 3 3 3 4 12 5 8 5 5 7 10 9 9 11 24 15 5 11 8 5 6 6 6 5 3 3 8 12 25 3 14 6 9 6 5 4 13 4 4 6 7 5 14 21 10 4 5 3 4 3 6 5 6 5 7 5 3 3 5 8 5 7 18
+LJ004-0096 15 4 5 8 4 4 3 4 6 5 4 3 5 4 3 3 3 5 4 4 5 3 6 9 4 3 6 3 3 4 6 3 3 5 5 6 4 4 5 3 4 5 4 3 8 3 5 3 4 6 5 4 5 3 3 4 7 7 5 5 4 7 6 5 8 10 6 8 10 6 6 13
+LJ004-0152 5 4 6 6 3 3 3 3 4 4 5 5 4 4 4 4 3 3 3 3 4 7 4 8 5 4 8 7 8 6 8 3 3 4 5 4 5 3 5 7 5 4 5 3 3 8 4 5 7 4 11 7 6 5 5 6 6 7 6 6 6 10 5 6 4 4 6 3 3 9 9 7 7 5 3 3 3 7 9 6 3 3 3 3 4 4 5 7 8 8 6 10 22 10 6 6 4 4 3 4 6 6 7 6 5 4 3 4 4 12 8 8 3
+LJ005-0014 8 4 7 5 4 6 3 5 4 3 4 5 12 7 4 7 4 5 3 5 4 4 3 4 9 5 8 23 12 9 3 3 5 6 4 7 5 3 3 6 12
+LJ005-0079 4 3 3 3 3 6 4 5 4 3 4 4 5 4 4 3 6 4 3 3 4 5 4 4 4 6 9 14 18 6 3 4 7 4 6 3 4 6 6 4 4 3 5 4 4 6 4 7 5 7 5 4 5 5 5 3 4 5 5 4 3 7 3 5 6 3 4 5 5 5 5 4 4 5 3 6 9 4 4 5 8 22
diff --git a/data/ljspeech/example/text b/data/ljspeech/example/text
new file mode 100644
index 0000000..2d9ecf7
--- /dev/null
+++ b/data/ljspeech/example/text
@@ -0,0 +1,10 @@
+LJ001-0110 IY1 V IH0 N DH AH1 K AE1 S L AH0 N T AY1 P sil W EH1 N IH0 N L AA1 R JH D sil SH OW1 Z G R EY1 T SH AO1 R T K AH2 M IH0 NG Z IH1 N DH IH1 S R IY0 S P EH1 K T
+LJ002-0018 DH IY0 IH0 N AE1 D IH0 K W AH0 S IY0 AH0 V DH AH0 JH EY1 L W AH0 Z N OW1 T AH0 S T sil AE1 N D R IH0 P AO1 R T AH0 D AH0 P AA1 N sil AH0 G EH1 N AE1 N D AH0 G EH1 N B AY1 DH AH1 G R AE1 N D JH UH1 R IY0 Z AH0 V DH AH0 S IH1 T IY0 AH0 V L AH1 N D AH0 N sil
+LJ002-0043 L AO1 NG N EH1 R OW0 R UW1 M Z sil W AH1 N TH ER1 T IY2 S IH1 K S F IY1 T sil S IH1 K S sil T W EH1 N T IY0 TH R IY1 F IY1 T sil AE1 N D DH IY0 EY1 TH sil EY0 T IY1 N
+LJ003-0111 HH IY1 W AH0 Z IH0 N K AA1 N S AH0 K W AH0 N S P UH1 T sil AW1 T AH0 V DH AH1 P ER0 T EH1 K SH AH0 N AH0 V DH EH1 R IH0 N T ER1 N AH0 L L AO1 sil EH1 N D K W OW1 T sil DH EH1 R K OW1 D W AH0 Z AH0 S AH1 B JH IH0 K T AH0 V S AH1 M K Y UH2 R IY0 AA1 S AH0 T IY0
+LJ003-0345 sil AO1 L DH AH1 K AH0 M IH1 T IY0 K UH1 D D UW1 IH0 N DH IH0 S R IY0 S P EH1 K T sil W AH0 Z T AH0 TH R OW1 DH AH0 R IY0 S P AA2 N S AH0 B IH1 L AH0 T IY0 AA1 N sil AH1 DH ER0 Z
+LJ004-0045 M IH1 S T ER0 S T ER1 JH IH0 Z B AO1 R N sil S ER1 JH EY1 M Z M AE1 K AH0 N T AA2 SH sil S ER1 JH EY1 M Z S K AA1 R L IH0 T sil AE1 N D W IH1 L Y AH0 M sil W IH1 L B ER0 F AO2 R S
+LJ004-0096 sil DH AH1 F EY1 T AH0 L K AA1 N S AH0 K W EH2 N S AH0 Z W EH2 R AH1 V M AY1 T B IY0 P R IH0 V EH1 N T IH0 D sil IH0 F DH AH0 JH AH1 S T IH0 S IH0 Z AH0 V DH AH1 P IY1 S W ER0 D UW1 L IY0 AO1 TH ER0 AY2 Z D sil
+LJ004-0152 AO2 L DH OW1 AE1 T M IH1 S T ER0 B AH1 K S T AH0 N EH1 S V IH1 Z IH0 T AH0 N UW1 JH EY1 L W AH0 Z IH0 N P R AA1 S EH2 S AH0 V IH0 R EH1 K SH AH0 N sil DH AH0 F ER1 S T S T EH1 P T AO1 R D Z R AH0 F AO1 R M S IH1 N S HH AW1 ER0 D EH1 S V IH2 Z IH0 T EY1 SH AH0 N sil IH1 N S EH1 V AH0 N T IY1 N S EH1 V AH0 N IY0 F AO1 R sil
+LJ005-0014 S P IY1 K IH0 NG AA1 N AH0 D AH0 B EY1 T sil AO1 N P R IH1 Z AH0 N M AE1 T ER0 Z sil HH IY1 D IH0 K L EH1 R D DH AE1 T
+LJ005-0079 AE1 N D IH0 M P R UW1 V DH AH0 M AO1 R AH0 L Z AH0 V DH AH1 P R IH1 Z N ER0 Z sil AE1 N D SH AE1 L IH0 N SH UH1 R DH AH0 P R AA1 P ER0 M EH1 ZH ER0 AH0 V P AH1 N IH0 SH M AH0 N T sil T AH0 K AH0 N V IH1 K T AH0 D sil AH0 F EH1 N D ER0 Z
diff --git a/data/ljspeech/example/utt2spk b/data/ljspeech/example/utt2spk
new file mode 100644
index 0000000..03fa0f5
--- /dev/null
+++ b/data/ljspeech/example/utt2spk
@@ -0,0 +1,10 @@
+LJ001-0110 LJ
+LJ002-0018 LJ
+LJ002-0043 LJ
+LJ003-0111 LJ
+LJ003-0345 LJ
+LJ004-0045 LJ
+LJ004-0096 LJ
+LJ004-0152 LJ
+LJ005-0014 LJ
+LJ005-0079 LJ
diff --git a/data/ljspeech/example/utts.list b/data/ljspeech/example/utts.list
new file mode 100644
index 0000000..eaebcef
--- /dev/null
+++ b/data/ljspeech/example/utts.list
@@ -0,0 +1,10 @@
+LJ001-0110
+LJ002-0018
+LJ002-0043
+LJ003-0111
+LJ003-0345
+LJ004-0045
+LJ004-0096
+LJ004-0152
+LJ005-0014
+LJ005-0079
diff --git a/data/ljspeech/example/wav.scp b/data/ljspeech/example/wav.scp
new file mode 100644
index 0000000..db1fa3f
--- /dev/null
+++ b/data/ljspeech/example/wav.scp
@@ -0,0 +1,10 @@
+LJ001-0110 /path/to/dataset/LJ001-0110.wav
+LJ002-0018 /path/to/dataset/LJ002-0018.wav
+LJ002-0043 /path/to/dataset/LJ002-0043.wav
+LJ003-0111 /path/to/dataset/LJ003-0111.wav
+LJ003-0345 /path/to/dataset/LJ003-0345.wav
+LJ004-0045 /path/to/dataset/LJ004-0045.wav
+LJ004-0096 /path/to/dataset/LJ004-0096.wav
+LJ004-0152 /path/to/dataset/LJ004-0152.wav
+LJ005-0014 /path/to/dataset/LJ005-0014.wav
+LJ005-0079 /path/to/dataset/LJ005-0079.wav
diff --git a/extract_fbank.sh b/extract_fbank.sh
index 23c2735..b70cca2 100644
--- a/extract_fbank.sh
+++ b/extract_fbank.sh
@@ -3,16 +3,16 @@
 
 nj=16     # number of parallel jobs in feature extraction
 sampling_rate=16000        # sampling frequency
-fmax=       # maximum frequency
-fmin=         # minimum frequency
+fmax=       # maximum frequency. If left blank, default to half the sampling rate
+fmin=         # minimum frequency. If left blank, default to 0.
 num_mels=80     # number of mel basis
 fft_size=1024   # number of fft points
 hop_size=256    # number of shift points
-win_length=  # window length
+win_length=  # window length. If left blank, default to minimum integer value that is greater than hop_size and is a power of 2.
 
-train_set="train" # name of training data directory
-dev_set="val"           # name of development data directory
-eval_set="val"         # name of evaluation data directory
+train_set="ljspeech/train" # name of training data directory
+dev_set="ljspeech/val"           # name of development data directory
+eval_set="ljspeech/val"         # name of evaluation data directory
 
 stage=0
 stop_stage=100
@@ -45,7 +45,9 @@ fi
 if [ "${stage}" -le 1 ] && [ "${stop_stage}" -ge 1 ]; then
     echo "Cepstral Mean Variance Normalization"
     feat_name=fbank
-    compute-cmvn-stats.py scp:${featdir}/${feat_name}/${train_set}/feats.scp ${featdir}/${feat_name}/${train_set}/cmvn.ark
+
+    # if you want to compute the CMVN stats instead of using the provided one, un-comment the line below.
+    # compute-cmvn-stats.py scp:${featdir}/${feat_name}/${train_set}/feats.scp ${featdir}/${feat_name}/${train_set}/cmvn.ark
     for x in ${train_set} ${dev_set} ${eval_set} ; do
         echo "Applying normalization for dataset ${x}"
         mkdir -p ${featdir}/normed_${feat_name}/${x} ;
@@ -55,3 +57,12 @@ if [ "${stage}" -le 1 ] && [ "${stop_stage}" -ge 1 ]; then
                     ark,scp:${featdir}/normed_${feat_name}/${x}/feats.ark,${featdir}/normed_${feat_name}/${x}/feats.scp
     done
 fi
+
+if [ "${stage}" -le 2 ] && [ "${stop_stage}" -ge 2 ]; then
+    echo "Write utt2num_frames"
+    feat_name=fbank
+    for x in ${train_set} ${dev_set} ${eval_set} ; do
+        feat-to-len.py scp:${featdir}/normed_${feat_name}/${x}/feats.scp > ${featdir}/normed_${feat_name}/${x}/utt2num_frames
+    done
+fi
+
diff --git a/feats/fbank/ljspeech/train/cmvn.ark b/feats/fbank/ljspeech/train/cmvn.ark
new file mode 100644
index 0000000..950acea
Binary files /dev/null and b/feats/fbank/ljspeech/train/cmvn.ark differ
diff --git a/generate_for_reflow.py b/generate_for_reflow.py
index 5c62c46..1f59e35 100644
--- a/generate_for_reflow.py
+++ b/generate_for_reflow.py
@@ -9,18 +9,18 @@
 import torch.multiprocessing as mp
 import torch.distributed as dist
 from torch.nn.parallel import DistributedDataParallel as DDP
-import utils
+import tools
 
 
 def run(rank, n_gpus, hps, args, ckpt, feats_dir, temp_dir):
-    logger = utils.get_logger(hps.model_dir, f"inference.{rank}.log")  # NOTE: cannot delete this line.
+    logger = tools.get_logger(hps.model_dir, f"inference.{rank}.log")  # NOTE: cannot delete this line.
     device = torch.device('cpu' if not torch.cuda.is_available() else f"cuda:{rank}")
     torch.manual_seed(hps.train.seed)  # NOTE: control seed
 
     setattr(hps.data, "train_utts" if args.dataset == "train" else "val_utts", f"{temp_dir}/{rank}.txt")
 
-    train_dataset, collate_fn, model = utils.get_correct_class(hps)
-    val_dataset, _, _ = utils.get_correct_class(hps, train=False)
+    train_dataset, collate_fn, model = tools.get_correct_class(hps)
+    val_dataset, _, _ = tools.get_correct_class(hps, train=False)
 
     batch_collate = collate_fn
     train_loader = DataLoader(
@@ -40,7 +40,7 @@ def run(rank, n_gpus, hps, args, ckpt, feats_dir, temp_dir):
         shuffle=False,
     )
     model = model(**hps.model).to(device)
-    utils.load_checkpoint(ckpt, model, None)
+    tools.load_checkpoint(ckpt, model, None)
     print(f"Loaded checkpoint from {ckpt}")
     model.to(device).eval()
     print(f"Number of parameters: {model.nparams}")
@@ -147,8 +147,8 @@ def run(rank, n_gpus, hps, args, ckpt, feats_dir, temp_dir):
     os.environ["MASTER_ADDR"] = "localhost"
     os.environ["MASTER_PORT"] = "80000"
 
-    hps, args = utils.get_hparams_decode()
-    ckpt = utils.latest_checkpoint_path(hps.model_dir, "grad_*.pt" if not args.EMA else "EMA_grad_*.pt")
+    hps, args = tools.get_hparams_decode()
+    ckpt = tools.latest_checkpoint_path(hps.model_dir, "grad_*.pt" if not args.EMA else "EMA_grad_*.pt")
 
     if args.use_control_spk:
         feats_dir = f"synthetic_wav/{args.model}/tts_other_spk"
diff --git a/inference_dataset.py b/inference_dataset.py
index 42ef5f9..6079eda 100644
--- a/inference_dataset.py
+++ b/inference_dataset.py
@@ -5,16 +5,16 @@
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 
-import utils
+import tools
 
 
 # @profile
 def evaluate(hps, args, ckpt, feats_dir):
-    logger = utils.get_logger(hps.model_dir, "inference.log")
+    logger = tools.get_logger(hps.model_dir, "inference.log")
     device = torch.device('cpu' if not torch.cuda.is_available() else "cuda")
     torch.manual_seed(hps.train.seed)  # NOTE: control seed
-    train_dataset, collate_fn, model = utils.get_correct_class(hps)
-    val_dataset, _, _ = utils.get_correct_class(hps, train=False)
+    train_dataset, collate_fn, model = tools.get_correct_class(hps)
+    val_dataset, _, _ = tools.get_correct_class(hps, train=False)
     batch_collate = collate_fn
     train_loader = DataLoader(dataset=train_dataset, batch_size=1,
                               collate_fn=batch_collate, drop_last=True,
@@ -23,7 +23,7 @@ def evaluate(hps, args, ckpt, feats_dir):
                             collate_fn=batch_collate, drop_last=True,
                             num_workers=4, shuffle=False)
     model = model(**hps.model).to(device)
-    utils.load_checkpoint(ckpt, model, None)
+    tools.load_checkpoint(ckpt, model, None)
     print(f"Loaded checkpoint from {ckpt}")
     _ = model.cuda().eval()
     print(f'Number of parameters: {model.nparams}')
@@ -95,8 +95,8 @@ def evaluate(hps, args, ckpt, feats_dir):
 
 
 if __name__ == '__main__':
-    hps, args = utils.get_hparams_decode()
-    ckpt = utils.latest_checkpoint_path(hps.model_dir, "grad_*.pt" if not args.EMA else "EMA_grad_*.pt")
+    hps, args = tools.get_hparams_decode()
+    ckpt = tools.latest_checkpoint_path(hps.model_dir, "grad_*.pt" if not args.EMA else "EMA_grad_*.pt")
 
     if args.use_control_spk:
         feats_dir = f"synthetic_wav/{args.model}/tts_other_spk"
diff --git a/path.sh b/path.sh
index 83e583a..52bd140 100644
--- a/path.sh
+++ b/path.sh
@@ -1,3 +1,3 @@
 conda activate vflow
-export PATH=$PWD/tools:$PATH
-chmod +x tools/*
+export PATH=$PWD/utils:$PATH
+chmod +x utils/*
diff --git a/requirements.txt b/requirements.txt
index 26969bd..63ffb50 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,3 +25,4 @@ attrs==22.1.0
 torchsde>=0.2.5
 torchcde>=0.2.3
 pytorch-lightning>=0.8.4
+filelock==3.12.4
diff --git a/utils.py b/tools.py
similarity index 100%
rename from utils.py
rename to tools.py
diff --git a/train.py b/train.py
index 8047526..e1d396e 100644
--- a/train.py
+++ b/train.py
@@ -17,9 +17,9 @@
 from torch.utils.data import DataLoader
 from torch.utils.tensorboard import SummaryWriter
 from data_collate import DistributedBucketSampler
-from utils import plot_tensor, save_plot
+from tools import plot_tensor, save_plot
 from model.utils import fix_len_compatibility
-import utils
+import tools
 
 
 class ModelEmaV2(torch.nn.Module):
@@ -52,7 +52,7 @@ def state_dict(self, destination=None, prefix='', keep_vars=False):
 
 
 def run(rank, n_gpus, hps):
-    logger_text = utils.get_logger(hps.model_dir)
+    logger_text = tools.get_logger(hps.model_dir)
     logger_text.info(hps)
     out_size = fix_len_compatibility(getattr(hps.data, "cut_segment_seconds", 2) * hps.data.sampling_rate // hps.data.hop_length)
     # NOTE: cut_segment_seconds sec of mel-spec
@@ -68,8 +68,8 @@ def run(rank, n_gpus, hps):
     if rank == 0:
         print('Initializing logger...')
         logger = SummaryWriter(log_dir=log_dir)
-    train_dataset, collate, model = utils.get_correct_class(hps)
-    test_dataset, _, _ = utils.get_correct_class(hps, train=False)
+    train_dataset, collate, model = tools.get_correct_class(hps)
+    test_dataset, _, _ = tools.get_correct_class(hps, train=False)
 
     print('Initializing data loaders...')
     train_sampler = DistributedBucketSampler(
@@ -108,11 +108,11 @@ def run(rank, n_gpus, hps):
 
     try:
         try:
-            ckpt = utils.latest_checkpoint_path(hps.model_dir, "EMA_grad_*.pt")
+            ckpt = tools.latest_checkpoint_path(hps.model_dir, "EMA_grad_*.pt")
         except IndexError:
             print(f"Cannot find EMA checkpoint. Trying to find normal checkpoint.")
-            ckpt = utils.latest_checkpoint_path(hps.model_dir, "grad_*.pt")
-        model, optimizer, learning_rate, epoch_logged = utils.load_checkpoint(ckpt, model, optimizer)
+            ckpt = tools.latest_checkpoint_path(hps.model_dir, "grad_*.pt")
+        model, optimizer, learning_rate, epoch_logged = tools.load_checkpoint(ckpt, model, optimizer)
         epoch_start = epoch_logged + 1
         print(f"Loaded checkpoint from {epoch_logged} epoch, resuming training.")
         # optimizer.step_num = (epoch_str - 1) * len(train_dataset)
@@ -232,8 +232,8 @@ def run(rank, n_gpus, hps):
                     save_plot(attn.squeeze().cpu(),
                               f'{log_dir}/alignment_{i}.png')
 
-            utils.save_checkpoint(ema_model, optimizer, learning_rate, epoch, checkpoint_path=f"{log_dir}/EMA_grad_{epoch}.pt")
-            utils.save_checkpoint(model, optimizer, learning_rate, epoch, checkpoint_path=f"{log_dir}/grad_{epoch}.pt")
+            tools.save_checkpoint(ema_model, optimizer, learning_rate, epoch, checkpoint_path=f"{log_dir}/EMA_grad_{epoch}.pt")
+            tools.save_checkpoint(model, optimizer, learning_rate, epoch, checkpoint_path=f"{log_dir}/grad_{epoch}.pt")
 
 
 if __name__ == "__main__":
@@ -245,5 +245,5 @@ def run(rank, n_gpus, hps):
     os.environ['MASTER_ADDR'] = 'localhost'
     os.environ['MASTER_PORT'] = '8001'
 
-    hps = utils.get_hparams()
+    hps = tools.get_hparams()
     mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hps,))
diff --git a/tools/apply-cmvn.py b/utils/apply-cmvn.py
similarity index 100%
rename from tools/apply-cmvn.py
rename to utils/apply-cmvn.py
diff --git a/tools/compute-cmvn-stats.py b/utils/compute-cmvn-stats.py
similarity index 100%
rename from tools/compute-cmvn-stats.py
rename to utils/compute-cmvn-stats.py
diff --git a/tools/compute-fbank-feats.py b/utils/compute-fbank-feats.py
similarity index 100%
rename from tools/compute-fbank-feats.py
rename to utils/compute-fbank-feats.py
diff --git a/tools/copy-feats.py b/utils/copy-feats.py
similarity index 100%
rename from tools/copy-feats.py
rename to utils/copy-feats.py
diff --git a/tools/custom_librosa/__init__.py b/utils/custom_librosa/__init__.py
similarity index 100%
rename from tools/custom_librosa/__init__.py
rename to utils/custom_librosa/__init__.py
diff --git a/tools/custom_librosa/_cache.py b/utils/custom_librosa/_cache.py
similarity index 100%
rename from tools/custom_librosa/_cache.py
rename to utils/custom_librosa/_cache.py
diff --git a/tools/custom_librosa/beat.py b/utils/custom_librosa/beat.py
similarity index 100%
rename from tools/custom_librosa/beat.py
rename to utils/custom_librosa/beat.py
diff --git a/tools/custom_librosa/core/__init__.py b/utils/custom_librosa/core/__init__.py
similarity index 100%
rename from tools/custom_librosa/core/__init__.py
rename to utils/custom_librosa/core/__init__.py
diff --git a/tools/custom_librosa/core/audio.py b/utils/custom_librosa/core/audio.py
similarity index 100%
rename from tools/custom_librosa/core/audio.py
rename to utils/custom_librosa/core/audio.py
diff --git a/tools/custom_librosa/core/constantq.py b/utils/custom_librosa/core/constantq.py
similarity index 100%
rename from tools/custom_librosa/core/constantq.py
rename to utils/custom_librosa/core/constantq.py
diff --git a/tools/custom_librosa/core/convert.py b/utils/custom_librosa/core/convert.py
similarity index 100%
rename from tools/custom_librosa/core/convert.py
rename to utils/custom_librosa/core/convert.py
diff --git a/tools/custom_librosa/core/fft.py b/utils/custom_librosa/core/fft.py
similarity index 100%
rename from tools/custom_librosa/core/fft.py
rename to utils/custom_librosa/core/fft.py
diff --git a/tools/custom_librosa/core/harmonic.py b/utils/custom_librosa/core/harmonic.py
similarity index 100%
rename from tools/custom_librosa/core/harmonic.py
rename to utils/custom_librosa/core/harmonic.py
diff --git a/tools/custom_librosa/core/notation.py b/utils/custom_librosa/core/notation.py
similarity index 100%
rename from tools/custom_librosa/core/notation.py
rename to utils/custom_librosa/core/notation.py
diff --git a/tools/custom_librosa/core/pitch.py b/utils/custom_librosa/core/pitch.py
similarity index 100%
rename from tools/custom_librosa/core/pitch.py
rename to utils/custom_librosa/core/pitch.py
diff --git a/tools/custom_librosa/core/spectrum.py b/utils/custom_librosa/core/spectrum.py
similarity index 100%
rename from tools/custom_librosa/core/spectrum.py
rename to utils/custom_librosa/core/spectrum.py
diff --git a/tools/custom_librosa/decompose.py b/utils/custom_librosa/decompose.py
similarity index 100%
rename from tools/custom_librosa/decompose.py
rename to utils/custom_librosa/decompose.py
diff --git a/tools/custom_librosa/display.py b/utils/custom_librosa/display.py
similarity index 100%
rename from tools/custom_librosa/display.py
rename to utils/custom_librosa/display.py
diff --git a/tools/custom_librosa/effects.py b/utils/custom_librosa/effects.py
similarity index 100%
rename from tools/custom_librosa/effects.py
rename to utils/custom_librosa/effects.py
diff --git a/tools/custom_librosa/feature/__init__.py b/utils/custom_librosa/feature/__init__.py
similarity index 100%
rename from tools/custom_librosa/feature/__init__.py
rename to utils/custom_librosa/feature/__init__.py
diff --git a/tools/custom_librosa/feature/inverse.py b/utils/custom_librosa/feature/inverse.py
similarity index 100%
rename from tools/custom_librosa/feature/inverse.py
rename to utils/custom_librosa/feature/inverse.py
diff --git a/tools/custom_librosa/feature/rhythm.py b/utils/custom_librosa/feature/rhythm.py
similarity index 100%
rename from tools/custom_librosa/feature/rhythm.py
rename to utils/custom_librosa/feature/rhythm.py
diff --git a/tools/custom_librosa/feature/spectral.py b/utils/custom_librosa/feature/spectral.py
similarity index 100%
rename from tools/custom_librosa/feature/spectral.py
rename to utils/custom_librosa/feature/spectral.py
diff --git a/tools/custom_librosa/feature/utils.py b/utils/custom_librosa/feature/utils.py
similarity index 100%
rename from tools/custom_librosa/feature/utils.py
rename to utils/custom_librosa/feature/utils.py
diff --git a/tools/custom_librosa/filters.py b/utils/custom_librosa/filters.py
similarity index 100%
rename from tools/custom_librosa/filters.py
rename to utils/custom_librosa/filters.py
diff --git a/tools/custom_librosa/onset.py b/utils/custom_librosa/onset.py
similarity index 100%
rename from tools/custom_librosa/onset.py
rename to utils/custom_librosa/onset.py
diff --git a/tools/custom_librosa/segment.py b/utils/custom_librosa/segment.py
similarity index 100%
rename from tools/custom_librosa/segment.py
rename to utils/custom_librosa/segment.py
diff --git a/tools/custom_librosa/sequence.py b/utils/custom_librosa/sequence.py
similarity index 100%
rename from tools/custom_librosa/sequence.py
rename to utils/custom_librosa/sequence.py
diff --git a/tools/custom_librosa/util/__init__.py b/utils/custom_librosa/util/__init__.py
similarity index 100%
rename from tools/custom_librosa/util/__init__.py
rename to utils/custom_librosa/util/__init__.py
diff --git a/tools/custom_librosa/util/_nnls.py b/utils/custom_librosa/util/_nnls.py
similarity index 100%
rename from tools/custom_librosa/util/_nnls.py
rename to utils/custom_librosa/util/_nnls.py
diff --git a/tools/custom_librosa/util/decorators.py b/utils/custom_librosa/util/decorators.py
similarity index 100%
rename from tools/custom_librosa/util/decorators.py
rename to utils/custom_librosa/util/decorators.py
diff --git a/tools/custom_librosa/util/deprecation.py b/utils/custom_librosa/util/deprecation.py
similarity index 100%
rename from tools/custom_librosa/util/deprecation.py
rename to utils/custom_librosa/util/deprecation.py
diff --git a/tools/custom_librosa/util/example_data/index.json b/utils/custom_librosa/util/example_data/index.json
similarity index 100%
rename from tools/custom_librosa/util/example_data/index.json
rename to utils/custom_librosa/util/example_data/index.json
diff --git a/tools/custom_librosa/util/example_data/registry.txt b/utils/custom_librosa/util/example_data/registry.txt
similarity index 100%
rename from tools/custom_librosa/util/example_data/registry.txt
rename to utils/custom_librosa/util/example_data/registry.txt
diff --git a/tools/custom_librosa/util/exceptions.py b/utils/custom_librosa/util/exceptions.py
similarity index 100%
rename from tools/custom_librosa/util/exceptions.py
rename to utils/custom_librosa/util/exceptions.py
diff --git a/tools/custom_librosa/util/files.py b/utils/custom_librosa/util/files.py
similarity index 100%
rename from tools/custom_librosa/util/files.py
rename to utils/custom_librosa/util/files.py
diff --git a/tools/custom_librosa/util/matching.py b/utils/custom_librosa/util/matching.py
similarity index 100%
rename from tools/custom_librosa/util/matching.py
rename to utils/custom_librosa/util/matching.py
diff --git a/tools/custom_librosa/util/utils.py b/utils/custom_librosa/util/utils.py
similarity index 100%
rename from tools/custom_librosa/util/utils.py
rename to utils/custom_librosa/util/utils.py
diff --git a/tools/custom_librosa/version.py b/utils/custom_librosa/version.py
similarity index 100%
rename from tools/custom_librosa/version.py
rename to utils/custom_librosa/version.py
diff --git a/tools/espnet_transform/add_deltas.py b/utils/espnet_transform/add_deltas.py
similarity index 100%
rename from tools/espnet_transform/add_deltas.py
rename to utils/espnet_transform/add_deltas.py
diff --git a/tools/espnet_transform/channel_selector.py b/utils/espnet_transform/channel_selector.py
similarity index 100%
rename from tools/espnet_transform/channel_selector.py
rename to utils/espnet_transform/channel_selector.py
diff --git a/tools/espnet_transform/cmvn.py b/utils/espnet_transform/cmvn.py
similarity index 100%
rename from tools/espnet_transform/cmvn.py
rename to utils/espnet_transform/cmvn.py
diff --git a/tools/espnet_transform/functional.py b/utils/espnet_transform/functional.py
similarity index 100%
rename from tools/espnet_transform/functional.py
rename to utils/espnet_transform/functional.py
diff --git a/tools/espnet_transform/perturb.py b/utils/espnet_transform/perturb.py
similarity index 100%
rename from tools/espnet_transform/perturb.py
rename to utils/espnet_transform/perturb.py
diff --git a/tools/espnet_transform/spec_augment.py b/utils/espnet_transform/spec_augment.py
similarity index 100%
rename from tools/espnet_transform/spec_augment.py
rename to utils/espnet_transform/spec_augment.py
diff --git a/tools/espnet_transform/spectrogram.py b/utils/espnet_transform/spectrogram.py
similarity index 100%
rename from tools/espnet_transform/spectrogram.py
rename to utils/espnet_transform/spectrogram.py
diff --git a/tools/espnet_transform/transform_interface.py b/utils/espnet_transform/transform_interface.py
similarity index 100%
rename from tools/espnet_transform/transform_interface.py
rename to utils/espnet_transform/transform_interface.py
diff --git a/tools/espnet_transform/transformation.py b/utils/espnet_transform/transformation.py
similarity index 100%
rename from tools/espnet_transform/transformation.py
rename to utils/espnet_transform/transformation.py
diff --git a/tools/espnet_transform/wpe.py b/utils/espnet_transform/wpe.py
similarity index 100%
rename from tools/espnet_transform/wpe.py
rename to utils/espnet_transform/wpe.py
diff --git a/tools/espnet_utils/cli_readers.py b/utils/espnet_utils/cli_readers.py
similarity index 100%
rename from tools/espnet_utils/cli_readers.py
rename to utils/espnet_utils/cli_readers.py
diff --git a/tools/espnet_utils/cli_utils.py b/utils/espnet_utils/cli_utils.py
similarity index 100%
rename from tools/espnet_utils/cli_utils.py
rename to utils/espnet_utils/cli_utils.py
diff --git a/tools/espnet_utils/cli_writers.py b/utils/espnet_utils/cli_writers.py
similarity index 100%
rename from tools/espnet_utils/cli_writers.py
rename to utils/espnet_utils/cli_writers.py
diff --git a/tools/espnet_utils/dynamic_import.py b/utils/espnet_utils/dynamic_import.py
similarity index 100%
rename from tools/espnet_utils/dynamic_import.py
rename to utils/espnet_utils/dynamic_import.py
diff --git a/tools/espnet_utils/io_utils.py b/utils/espnet_utils/io_utils.py
similarity index 100%
rename from tools/espnet_utils/io_utils.py
rename to utils/espnet_utils/io_utils.py
diff --git a/tools/feat-to-len.py b/utils/feat-to-len.py
similarity index 100%
rename from tools/feat-to-len.py
rename to utils/feat-to-len.py
diff --git a/tools/feat-to-shape.py b/utils/feat-to-shape.py
similarity index 100%
rename from tools/feat-to-shape.py
rename to utils/feat-to-shape.py
diff --git a/tools/filter_scp.pl b/utils/filter_scp.pl
similarity index 100%
rename from tools/filter_scp.pl
rename to utils/filter_scp.pl
diff --git a/tools/fix_data_dir.sh b/utils/fix_data_dir.sh
similarity index 100%
rename from tools/fix_data_dir.sh
rename to utils/fix_data_dir.sh
diff --git a/tools/make_fbank.sh b/utils/make_fbank.sh
similarity index 100%
rename from tools/make_fbank.sh
rename to utils/make_fbank.sh
diff --git a/utils/make_utt2spk_id.py b/utils/make_utt2spk_id.py
new file mode 100644
index 0000000..82f2aa6
--- /dev/null
+++ b/utils/make_utt2spk_id.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+
+import sys
+import json
+import os
+
+utt2spk_list = sys.argv[1:]
+
+spks = set()
+for file in utt2spk_list:
+    with open(file, 'r') as fr:
+        for line in fr.readlines():
+            spk = line.strip().split()[1]
+            # print(spk)
+            spks.update([spk])
+spk2id = {spk:i for i, spk in enumerate(spks)}
+for file in utt2spk_list:
+    dirname = os.path.dirname(file)
+    utt2id = dict()
+    with open(file, 'r') as fr:
+        for line in fr.readlines():
+            utt, spk = line.strip().split()
+            utt2id[utt] = spk2id[spk]
+    with open(os.path.join(dirname, "utt2spk_id.json"), 'w') as fw:
+        json.dump(utt2id, fw, indent=4, ensure_ascii=False)
\ No newline at end of file
diff --git a/tools/parse_options.sh b/utils/parse_options.sh
similarity index 100%
rename from tools/parse_options.sh
rename to utils/parse_options.sh
diff --git a/tools/paste-feats.py b/utils/paste-feats.py
similarity index 100%
rename from tools/paste-feats.py
rename to utils/paste-feats.py
diff --git a/tools/run.pl b/utils/run.pl
similarity index 100%
rename from tools/run.pl
rename to utils/run.pl
diff --git a/tools/spk2utt_to_utt2spk.pl b/utils/spk2utt_to_utt2spk.pl
similarity index 100%
rename from tools/spk2utt_to_utt2spk.pl
rename to utils/spk2utt_to_utt2spk.pl
diff --git a/tools/split_scp.pl b/utils/split_scp.pl
similarity index 100%
rename from tools/split_scp.pl
rename to utils/split_scp.pl
diff --git a/tools/subset_data_dir.sh b/utils/subset_data_dir.sh
similarity index 100%
rename from tools/subset_data_dir.sh
rename to utils/subset_data_dir.sh
diff --git a/tools/subset_scp.pl b/utils/subset_scp.pl
similarity index 100%
rename from tools/subset_scp.pl
rename to utils/subset_scp.pl
diff --git a/tools/utt2spk_to_spk2utt.pl b/utils/utt2spk_to_spk2utt.pl
similarity index 100%
rename from tools/utt2spk_to_spk2utt.pl
rename to utils/utt2spk_to_spk2utt.pl
diff --git a/tools/validate_data_dir.sh b/utils/validate_data_dir.sh
similarity index 100%
rename from tools/validate_data_dir.sh
rename to utils/validate_data_dir.sh