Skip to content

Commit

Permalink
complete env setup
Browse files Browse the repository at this point in the history
  • Loading branch information
cantabile-kwok committed Oct 8, 2023
1 parent d018a7b commit ab58090
Show file tree
Hide file tree
Showing 39 changed files with 3,832 additions and 31 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

## Acknowledgement
During the development, the following repositories were referred to:
* [Kaldi](https://github.com/kaldi-asr/kaldi), for most utility scripts in `utils/`.
* [Kaldi](https://github.com/kaldi-asr/kaldi) and [UniCATS-CTX-vec2wav](https://github.com/cantabile-kwok/UniCATS-CTX-vec2wav) for most utility scripts in `utils/`.
* [GradTTS](https://github.com/huawei-noah/Speech-Backbones/tree/main/Grad-TTS), where most of the model architecture and training pipelines are adopted.
* [VITS](https://github.com/jaywalnut310/vits), whose distributed bucket sampler is used.
* [CFM](https://github.com/atong01/conditional-flow-matching), for the ODE samplers.
Expand Down
5 changes: 3 additions & 2 deletions path.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
conda activate py39
export PATH=$PWD/tools:$PATH
conda activate vflow
export PATH=$PWD/tools:$PATH
chmod +x tools/*
27 changes: 27 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
audioread==3.0.0
Cython==0.29.28
decorator==5.1.1
h5py==3.7.0
joblib==1.3.2
kaldiio==2.18.0
matplotlib==3.4.3
numba==0.56.4
numpy==1.21.6
packaging==21.3
pooch==1.6.0
POT==0.9.0
resampy==0.4.0
setuptools==52.0.0
soundfile==0.12.1
soxr==0.3.5
torch==1.11.0
tqdm==4.62.2
tensorboard==2.14.1
Pillow==9.5.0
pyyaml==6.0.1
einops==0.7.0
scikit-learn==1.3.1
attrs==22.1.0
torchsde>=0.2.5
torchcde>=0.2.3
pytorch-lightning>=0.8.4
2 changes: 1 addition & 1 deletion tools/espnet_transform/perturb.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import librosa
import custom_librosa as librosa
import numpy
import scipy
import soundfile
Expand Down
2 changes: 1 addition & 1 deletion tools/espnet_transform/spec_augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"):
elif mode == "sparse_image_warp":
import torch

from espnet.utils import spec_augment
from espnet_utils import spec_augment

# TODO(karita): make this differentiable again
return spec_augment.time_warp(torch.from_numpy(x), window).numpy()
Expand Down
40 changes: 20 additions & 20 deletions tools/espnet_transform/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,26 @@
# TODO(karita): inherit TransformInterface
# TODO(karita): register cmd arguments in asr_train.py
import_alias = dict(
identity='espnet.transform.transform_interface:Identity',
time_warp='espnet.transform.spec_augment:TimeWarp',
time_mask='espnet.transform.spec_augment:TimeMask',
freq_mask='espnet.transform.spec_augment:FreqMask',
spec_augment='espnet.transform.spec_augment:SpecAugment',
speed_perturbation='espnet.transform.perturb:SpeedPerturbation',
volume_perturbation='espnet.transform.perturb:VolumePerturbation',
noise_injection='espnet.transform.perturb:NoiseInjection',
bandpass_perturbation='espnet.transform.perturb:BandpassPerturbation',
rir_convolve='espnet.transform.perturb:RIRConvolve',
delta='espnet.transform.add_deltas:AddDeltas',
cmvn='espnet.transform.cmvn:CMVN',
utterance_cmvn='espnet.transform.cmvn:UtteranceCMVN',
fbank='espnet.transform.spectrogram:LogMelSpectrogram',
spectrogram='espnet.transform.spectrogram:Spectrogram',
stft='espnet.transform.spectrogram:Stft',
istft='espnet.transform.spectrogram:IStft',
stft2fbank='espnet.transform.spectrogram:Stft2LogMelSpectrogram',
wpe='espnet.transform.wpe:WPE',
channel_selector='espnet.transform.channel_selector:ChannelSelector')
identity='espnet_transform.transform_interface:Identity',
time_warp='espnet_transform.spec_augment:TimeWarp',
time_mask='espnet_transform.spec_augment:TimeMask',
freq_mask='espnet_transform.spec_augment:FreqMask',
spec_augment='espnet_transform.spec_augment:SpecAugment',
speed_perturbation='espnet_transform.perturb:SpeedPerturbation',
volume_perturbation='espnet_transform.perturb:VolumePerturbation',
noise_injection='espnet_transform.perturb:NoiseInjection',
bandpass_perturbation='espnet_transform.perturb:BandpassPerturbation',
rir_convolve='espnet_transform.perturb:RIRConvolve',
delta='espnet_transform.add_deltas:AddDeltas',
cmvn='espnet_transform.cmvn:CMVN',
utterance_cmvn='espnet_transform.cmvn:UtteranceCMVN',
fbank='espnet_transform.spectrogram:LogMelSpectrogram',
spectrogram='espnet_transform.spectrogram:Spectrogram',
stft='espnet_transform.spectrogram:Stft',
istft='espnet_transform.spectrogram:IStft',
stft2fbank='espnet_transform.spectrogram:Stft2LogMelSpectrogram',
wpe='espnet_transform.wpe:WPE',
channel_selector='espnet_transform.channel_selector:ChannelSelector')


class Transformation(object):
Expand Down
4 changes: 2 additions & 2 deletions tools/espnet_utils/dynamic_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ def dynamic_import(import_path, alias=dict()):
"""dynamic import module and class
:param str import_path: syntax 'module_name:class_name'
e.g., 'espnet.transform.add_deltas:AddDeltas'
e.g., 'espnet_transform.add_deltas:AddDeltas'
:param dict alias: shortcut for registered class
:return: imported class
"""
if import_path not in alias and ':' not in import_path:
raise ValueError(
'import_path should be one of {} or '
'include ":", e.g. "espnet.transform.add_deltas:AddDeltas" : '
'include ":", e.g. "espnet_transform.add_deltas:AddDeltas" : '
'{}'.format(set(alias), import_path))
if ':' not in import_path:
import_path = alias[import_path]
Expand Down
2 changes: 1 addition & 1 deletion tools/feat-to-len.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def main():
logging.info(get_commandline_args())

if args.preprocess_conf is not None:
from espnet.transform.transformation import Transformation
from espnet_transform.transformation import Transformation
preprocessing = Transformation(args.preprocess_conf)
logging.info('Apply preprocessing: {}'.format(preprocessing))
else:
Expand Down
2 changes: 1 addition & 1 deletion tools/feat-to-shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def main():
logging.info(get_commandline_args())

if args.preprocess_conf is not None:
from espnet.transform.transformation import Transformation
from espnet_transform.transformation import Transformation
preprocessing = Transformation(args.preprocess_conf)
logging.info('Apply preprocessing: {}'.format(preprocessing))
else:
Expand Down
19 changes: 19 additions & 0 deletions torchdyn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = '1.0.6'
__author__ = 'Michael Poli, Stefano Massaroli et al.'

from torch import Tensor
from typing import Tuple

TTuple = Tuple[Tensor, Tensor]
21 changes: 21 additions & 0 deletions torchdyn/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from torchdyn.core.defunc import DEFunc
from torchdyn.core.neuralde import NeuralODE, NeuralSDE, MultipleShootingLayer
from torchdyn.core.problems import ODEProblem, SDEProblem, MultipleShootingProblem

# backward-compatibility (pre v0.2.0)
NeuralDE = NeuralODE

__all__ = ['DEFunc', 'NeuralODE', 'NeuralDE', 'NeuralSDE', 'ODEProblem', 'SDEProblem',
'MultipleShootingProblem', 'MultipleShootingLayer']
117 changes: 117 additions & 0 deletions torchdyn/core/defunc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, Dict
import torch
from torch import Tensor, cat
import torch.nn as nn


class DEFuncBase(nn.Module):
def __init__(self, vector_field:Callable, has_time_arg:bool=True):
"""Basic wrapper to ensure call signature compatibility between generic torch Modules and vector fields.
Args:
vector_field (Callable): callable defining the dynamics / vector field / `dxdt` / forcing function
has_time_arg (bool, optional): Internal arg. to indicate whether the callable has `t` in its `__call__'
or `forward` method. Defaults to True.
"""
super().__init__()
self.nfe, self.vf, self.has_time_arg = 0., vector_field, has_time_arg

def forward(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
self.nfe += 1
if self.has_time_arg: return self.vf(t, x, args=args)
else: return self.vf(x)


class DEFunc(nn.Module):
def __init__(self, vector_field:Callable, order:int=1):
"""Special vector field wrapper for Neural ODEs.
Handles auxiliary tasks: time ("depth") concatenation, higher-order dynamics and forward propagated integral losses.
Args:
vector_field (Callable): callable defining the dynamics / vector field / `dxdt` / forcing function
order (int, optional): order of the differential equation. Defaults to 1.
Notes:
Currently handles the following:
(1) assigns time tensor to each submodule requiring it (e.g. `GalLinear`).
(2) in case of integral losses + reverse-mode differentiation, propagates the loss in the first dimension of `x`
and automatically splits the Tensor into `x[:, 0]` and `x[:, 1:]` for vector field computation
(3) in case of higher-order dynamics, adjusts the vector field forward to recursively compute various orders.
"""
super().__init__()
self.vf, self.nfe, = vector_field, 0.
self.order, self.integral_loss, self.sensitivity = order, None, None
# identify whether vector field already has time arg

def forward(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
self.nfe += 1
# set `t` depth-variable to DepthCat modules
for _, module in self.vf.named_modules():
if hasattr(module, 't'):
module.t = t

# if-else to handle autograd training with integral loss propagated in x[:, 0]
if (self.integral_loss is not None) and self.sensitivity == 'autograd':
x_dyn = x[:, 1:]
dlds = self.integral_loss(t, x_dyn)
if len(dlds.shape) == 1: dlds = dlds[:, None]
if self.order > 1: x_dyn = self.horder_forward(t, x_dyn, args)
else: x_dyn = self.vf(t, x_dyn)
return cat([dlds, x_dyn], 1).to(x_dyn)

# regular forward
else:
if self.order > 1: x = self.higher_order_forward(t, x)
else: x = self.vf(t, x, args=args)
return x

def higher_order_forward(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
x_new = []
size_order = x.size(1) // self.order
for i in range(1, self.order):
x_new.append(x[:, size_order*i : size_order*(i+1)])
x_new.append(self.vf(t, x))
return cat(x_new, dim=1).to(x)


class SDEFunc(nn.Module):
def __init__(self, f:Callable, g:Callable, order:int=1):
""""Special vector field wrapper for Neural SDEs.
Args:
f (Callable): callable defining the drift
g (Callable): callable defining the diffusion term
order (int, optional): order of the differential equation. Defaults to 1.
"""
super().__init__()
self.order, self.intloss, self.sensitivity = order, None, None
self.f_func, self.g_func = f, g
self.nfe = 0

def forward(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
pass

def f(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
self.nfe += 1
for _, module in self.f_func.named_modules():
if hasattr(module, 't'):
module.t = t
return self.f_func(x, args)

def g(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
for _, module in self.g_func.named_modules():
if hasattr(module, 't'):
module.t = t
return self.g_func(x, args)
Loading

0 comments on commit ab58090

Please sign in to comment.