complete env setup

zshy1205 · Oct 8, 2023 · ab58090 · ab58090
1 parent d018a7b
commit ab58090
Show file tree

Hide file tree

Showing 39 changed files with 3,832 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@
 
 ## Acknowledgement
 During the development, the following repositories were referred to:
-* [Kaldi](https://github.com/kaldi-asr/kaldi), for most utility scripts in `utils/`.
+* [Kaldi](https://github.com/kaldi-asr/kaldi) and [UniCATS-CTX-vec2wav](https://github.com/cantabile-kwok/UniCATS-CTX-vec2wav) for most utility scripts in `utils/`.
 * [GradTTS](https://github.com/huawei-noah/Speech-Backbones/tree/main/Grad-TTS), where most of the model architecture and training pipelines are adopted.
 * [VITS](https://github.com/jaywalnut310/vits), whose distributed bucket sampler is used.
 * [CFM](https://github.com/atong01/conditional-flow-matching), for the ODE samplers.

diff --git a/path.sh b/path.sh
@@ -1,2 +1,3 @@
-conda activate py39
-export PATH=$PWD/tools:$PATH
+conda activate vflow
+export PATH=$PWD/tools:$PATH
+chmod +x tools/*
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,27 @@
+audioread==3.0.0
+Cython==0.29.28
+decorator==5.1.1
+h5py==3.7.0
+joblib==1.3.2
+kaldiio==2.18.0
+matplotlib==3.4.3
+numba==0.56.4
+numpy==1.21.6
+packaging==21.3
+pooch==1.6.0
+POT==0.9.0
+resampy==0.4.0
+setuptools==52.0.0
+soundfile==0.12.1
+soxr==0.3.5
+torch==1.11.0
+tqdm==4.62.2
+tensorboard==2.14.1
+Pillow==9.5.0
+pyyaml==6.0.1
+einops==0.7.0
+scikit-learn==1.3.1
+attrs==22.1.0
+torchsde>=0.2.5
+torchcde>=0.2.3
+pytorch-lightning>=0.8.4
diff --git a/tools/espnet_transform/perturb.py b/tools/espnet_transform/perturb.py
@@ -1,4 +1,4 @@
-import librosa
+import custom_librosa as librosa
 import numpy
 import scipy
 import soundfile

diff --git a/tools/espnet_transform/spec_augment.py b/tools/espnet_transform/spec_augment.py
@@ -38,7 +38,7 @@ def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"):
     elif mode == "sparse_image_warp":
         import torch
 
-        from espnet.utils import spec_augment
+        from espnet_utils import spec_augment
 
         # TODO(karita): make this differentiable again
         return spec_augment.time_warp(torch.from_numpy(x), window).numpy()

diff --git a/tools/espnet_transform/transformation.py b/tools/espnet_transform/transformation.py
@@ -23,26 +23,26 @@
 # TODO(karita): inherit TransformInterface
 # TODO(karita): register cmd arguments in asr_train.py
 import_alias = dict(
-    identity='espnet.transform.transform_interface:Identity',
-    time_warp='espnet.transform.spec_augment:TimeWarp',
-    time_mask='espnet.transform.spec_augment:TimeMask',
-    freq_mask='espnet.transform.spec_augment:FreqMask',
-    spec_augment='espnet.transform.spec_augment:SpecAugment',
-    speed_perturbation='espnet.transform.perturb:SpeedPerturbation',
-    volume_perturbation='espnet.transform.perturb:VolumePerturbation',
-    noise_injection='espnet.transform.perturb:NoiseInjection',
-    bandpass_perturbation='espnet.transform.perturb:BandpassPerturbation',
-    rir_convolve='espnet.transform.perturb:RIRConvolve',
-    delta='espnet.transform.add_deltas:AddDeltas',
-    cmvn='espnet.transform.cmvn:CMVN',
-    utterance_cmvn='espnet.transform.cmvn:UtteranceCMVN',
-    fbank='espnet.transform.spectrogram:LogMelSpectrogram',
-    spectrogram='espnet.transform.spectrogram:Spectrogram',
-    stft='espnet.transform.spectrogram:Stft',
-    istft='espnet.transform.spectrogram:IStft',
-    stft2fbank='espnet.transform.spectrogram:Stft2LogMelSpectrogram',
-    wpe='espnet.transform.wpe:WPE',
-    channel_selector='espnet.transform.channel_selector:ChannelSelector')
+    identity='espnet_transform.transform_interface:Identity',
+    time_warp='espnet_transform.spec_augment:TimeWarp',
+    time_mask='espnet_transform.spec_augment:TimeMask',
+    freq_mask='espnet_transform.spec_augment:FreqMask',
+    spec_augment='espnet_transform.spec_augment:SpecAugment',
+    speed_perturbation='espnet_transform.perturb:SpeedPerturbation',
+    volume_perturbation='espnet_transform.perturb:VolumePerturbation',
+    noise_injection='espnet_transform.perturb:NoiseInjection',
+    bandpass_perturbation='espnet_transform.perturb:BandpassPerturbation',
+    rir_convolve='espnet_transform.perturb:RIRConvolve',
+    delta='espnet_transform.add_deltas:AddDeltas',
+    cmvn='espnet_transform.cmvn:CMVN',
+    utterance_cmvn='espnet_transform.cmvn:UtteranceCMVN',
+    fbank='espnet_transform.spectrogram:LogMelSpectrogram',
+    spectrogram='espnet_transform.spectrogram:Spectrogram',
+    stft='espnet_transform.spectrogram:Stft',
+    istft='espnet_transform.spectrogram:IStft',
+    stft2fbank='espnet_transform.spectrogram:Stft2LogMelSpectrogram',
+    wpe='espnet_transform.wpe:WPE',
+    channel_selector='espnet_transform.channel_selector:ChannelSelector')
 
 
 class Transformation(object):

diff --git a/tools/espnet_utils/dynamic_import.py b/tools/espnet_utils/dynamic_import.py
@@ -5,14 +5,14 @@ def dynamic_import(import_path, alias=dict()):
     """dynamic import module and class
 
     :param str import_path: syntax 'module_name:class_name'
-        e.g., 'espnet.transform.add_deltas:AddDeltas'
+        e.g., 'espnet_transform.add_deltas:AddDeltas'
     :param dict alias: shortcut for registered class
     :return: imported class
     """
     if import_path not in alias and ':' not in import_path:
         raise ValueError(
             'import_path should be one of {} or '
-            'include ":", e.g. "espnet.transform.add_deltas:AddDeltas" : '
+            'include ":", e.g. "espnet_transform.add_deltas:AddDeltas" : '
             '{}'.format(set(alias), import_path))
     if ':' not in import_path:
         import_path = alias[import_path]

diff --git a/tools/feat-to-len.py b/tools/feat-to-len.py
@@ -42,7 +42,7 @@ def main():
     logging.info(get_commandline_args())
 
     if args.preprocess_conf is not None:
-        from espnet.transform.transformation import Transformation
+        from espnet_transform.transformation import Transformation
         preprocessing = Transformation(args.preprocess_conf)
         logging.info('Apply preprocessing: {}'.format(preprocessing))
     else:

diff --git a/tools/feat-to-shape.py b/tools/feat-to-shape.py
@@ -42,7 +42,7 @@ def main():
     logging.info(get_commandline_args())
 
     if args.preprocess_conf is not None:
-        from espnet.transform.transformation import Transformation
+        from espnet_transform.transformation import Transformation
         preprocessing = Transformation(args.preprocess_conf)
         logging.info('Apply preprocessing: {}'.format(preprocessing))
     else:

diff --git a/torchdyn/__init__.py b/torchdyn/__init__.py
@@ -0,0 +1,19 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = '1.0.6'
+__author__  = 'Michael Poli, Stefano Massaroli et al.'
+
+from torch import Tensor
+from typing import Tuple
+
+TTuple = Tuple[Tensor, Tensor]
diff --git a/torchdyn/core/__init__.py b/torchdyn/core/__init__.py
@@ -0,0 +1,21 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from torchdyn.core.defunc import DEFunc
+from torchdyn.core.neuralde import NeuralODE, NeuralSDE, MultipleShootingLayer
+from torchdyn.core.problems import ODEProblem, SDEProblem, MultipleShootingProblem
+
+# backward-compatibility (pre v0.2.0)
+NeuralDE = NeuralODE
+
+__all__ =   ['DEFunc', 'NeuralODE', 'NeuralDE', 'NeuralSDE', 'ODEProblem', 'SDEProblem', 
+            'MultipleShootingProblem', 'MultipleShootingLayer']
diff --git a/torchdyn/core/defunc.py b/torchdyn/core/defunc.py
@@ -0,0 +1,117 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Callable, Dict
+import torch
+from torch import Tensor, cat
+import torch.nn as nn
+
+
+class DEFuncBase(nn.Module):
+    def __init__(self, vector_field:Callable, has_time_arg:bool=True):
+        """Basic wrapper to ensure call signature compatibility between generic torch Modules and vector fields.
+        Args:
+            vector_field (Callable): callable defining the dynamics / vector field / `dxdt` / forcing function
+            has_time_arg (bool, optional): Internal arg. to indicate whether the callable has `t` in its `__call__'
+                or `forward` method. Defaults to True.
+        """
+        super().__init__()
+        self.nfe, self.vf, self.has_time_arg = 0., vector_field, has_time_arg
+
+    def forward(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
+        self.nfe += 1
+        if self.has_time_arg: return self.vf(t, x, args=args)
+        else: return self.vf(x)
+
+
+class DEFunc(nn.Module):
+    def __init__(self, vector_field:Callable, order:int=1):
+        """Special vector field wrapper for Neural ODEs.
+
+        Handles auxiliary tasks: time ("depth") concatenation, higher-order dynamics and forward propagated integral losses.
+
+        Args:
+            vector_field (Callable): callable defining the dynamics / vector field / `dxdt` / forcing function
+            order (int, optional): order of the differential equation. Defaults to 1.
+
+        Notes:
+            Currently handles the following:
+            (1) assigns time tensor to each submodule requiring it (e.g. `GalLinear`).
+            (2) in case of integral losses + reverse-mode differentiation, propagates the loss in the first dimension of `x`
+                and automatically splits the Tensor into `x[:, 0]` and `x[:, 1:]` for vector field computation
+            (3) in case of higher-order dynamics, adjusts the vector field forward to recursively compute various orders.
+        """
+        super().__init__()
+        self.vf, self.nfe,  = vector_field, 0.
+        self.order, self.integral_loss, self.sensitivity = order, None, None
+        # identify whether vector field already has time arg
+
+    def forward(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
+        self.nfe += 1
+        # set `t` depth-variable to DepthCat modules
+        for _, module in self.vf.named_modules():
+            if hasattr(module, 't'):
+                module.t = t
+
+        # if-else to handle autograd training with integral loss propagated in x[:, 0]
+        if (self.integral_loss is not None) and self.sensitivity == 'autograd':
+            x_dyn = x[:, 1:]
+            dlds = self.integral_loss(t, x_dyn)
+            if len(dlds.shape) == 1: dlds = dlds[:, None]
+            if self.order > 1: x_dyn = self.horder_forward(t, x_dyn, args)
+            else: x_dyn = self.vf(t, x_dyn)
+            return cat([dlds, x_dyn], 1).to(x_dyn)
+
+        # regular forward
+        else:
+            if self.order > 1: x = self.higher_order_forward(t, x)
+            else: x = self.vf(t, x, args=args)
+            return x
+
+    def higher_order_forward(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
+        x_new = []
+        size_order = x.size(1) // self.order
+        for i in range(1, self.order):
+            x_new.append(x[:, size_order*i : size_order*(i+1)])
+        x_new.append(self.vf(t, x))
+        return cat(x_new, dim=1).to(x)
+
+
+class SDEFunc(nn.Module):
+    def __init__(self, f:Callable, g:Callable, order:int=1):
+        """"Special vector field wrapper for Neural SDEs.
+
+        Args:
+            f (Callable): callable defining the drift
+            g (Callable): callable defining the diffusion term
+            order (int, optional): order of the differential equation. Defaults to 1.
+        """
+        super().__init__()
+        self.order, self.intloss, self.sensitivity = order, None, None
+        self.f_func, self.g_func = f, g
+        self.nfe = 0
+
+    def forward(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
+        pass
+
+    def f(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
+        self.nfe += 1
+        for _, module in self.f_func.named_modules():
+            if hasattr(module, 't'):
+                module.t = t
+        return self.f_func(x, args)
+
+    def g(self, t:Tensor, x:Tensor, args:Dict={}) -> Tensor:
+        for _, module in self.g_func.named_modules():
+            if hasattr(module, 't'):
+                module.t = t
+        return self.g_func(x, args)