diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000..d7f340a7 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,36 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python package + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + env: + OMP_NUM_THREADS: 1 + strategy: + matrix: + python-version: [3.7, 3.8, 3.9] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + sudo apt-get install libsndfile1-dev + python -m pip install --upgrade pip + pip install torch + pip install -e ".[test]" + - name: Test + run: | + nosetests --with-coverage --cover-package=nnmnkwii -v -w tests/ -a '!require_local_data,!modspec' \ No newline at end of file diff --git a/.gitignore b/.gitignore index f989c1a2..824295bf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ README.rst nnmnkwii/version.py nnmnkwii/paramgen/mlpg_helper.c nnmnkwii/util/_linalg.c +nnmnkwii/paramgen/_bandmat/*.c examples docs/references/generated docs/.doctrees diff --git a/.travis.yml b/.travis.yml index 2ce221ad..b05ae2b4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,14 +35,8 @@ install: - pip install -e ".[test]" script: - # Workaround for bandmat installation issue on python >= 3.7 - # see https://github.com/MattShannon/bandmat/issues/10 - - if [ "$TRAVIS_PYTHON_VERSION" == "3.7" -o "$TRAVIS_PYTHON_VERSION" == "3.8" ]; then - nosetests --with-coverage --cover-package=nnmnkwii -v -w tests/ -a '!require_local_data,!modspec,!requires_bandmat' --ignore-files=test_autograd.py; - else - nosetests --with-coverage --cover-package=nnmnkwii -v -w tests/ -a '!require_local_data,!modspec'; - fi - - flake8 + - nosetests --with-coverage --cover-package=nnmnkwii -v -w tests/ -a '!require_local_data,!modspec'; +# - flake8 after_success: - codecov diff --git a/MANIFEST.in b/MANIFEST.in index 62564d9b..30745a6f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ include README.md LICENSE.md -recursive-include nnmnkwii COPYING +recursive-include nnmnkwii COPYING License recursive-include nnmnkwii/util/_example_data *.lab *.wav *.npz *.hed recursive-include nnmnkwii *.c diff --git a/README.md b/README.md index 0f04cdb3..a925390b 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![][docs-stable-img]][docs-stable-url] [![][docs-latest-img]][docs-latest-url] [![PyPI](https://img.shields.io/pypi/v/nnmnkwii.svg)](https://pypi.python.org/pypi/nnmnkwii) +[![Python package](https://github.com/r9y9/nnmnkwii/actions/workflows/ci.yaml/badge.svg)](https://github.com/r9y9/nnmnkwii/actions/workflows/ci.yaml) [![Build Status](https://travis-ci.org/r9y9/nnmnkwii.svg?branch=master)](https://travis-ci.org/r9y9/nnmnkwii) [![Build status](https://ci.appveyor.com/api/projects/status/ch8cmtpw8ic1sd86?svg=true)](https://ci.appveyor.com/project/r9y9/nnmnkwii) [![codecov](https://codecov.io/gh/r9y9/nnmnkwii/branch/master/graph/badge.svg)](https://codecov.io/gh/r9y9/nnmnkwii) @@ -11,8 +12,6 @@ Library to build speech synthesis systems designed for easy and fast prototyping. -Supported python versions: 2.7 and 3.6. - ## Documentation - [**STABLE**][docs-stable-url] — **most recently tagged version of the documentation.** diff --git a/docs/changelog.rst b/docs/changelog.rst index 96a74b82..6d6db4a8 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,9 +1,18 @@ Change log ========== -v0.0.23 <2021-xx-xx> +v0.0.23 <2021-05-15> -------------------- +- `#112`_: Renamed continuous_dict to numeric_dict to be more precise. +- `#112`_: Bandmat is now a part of nnmnkwii's internal package. This is to avoid instllation failusres on python >=3.6. https://github.com/MattShannon/bandmat/issues/10 +- `#112`_: Started testing using github actions (python 3.7, 3.8, 3.9) +- `#112`_: [hts.io]: :func:`nnmnkwii.io.hts.load_question_set` now keeps question names (e.g. "L-Phone_Boin") in dictionary. +- `#112`_: [hts.io]: New functionality :func:`nnmnkwii.io.hts.write_audacity_labels` +- `#112`_: [hts.io]: New functionality :func:`nnmnkwii.io.hts.write_textgrid` +- `#112`_: Renamed np.int to int +- `#112`_: Added pyproject.yaml + v0.0.22 <2020-12-25> -------------------- @@ -214,4 +223,5 @@ v0.0.1 <2017-08-14> .. _#101: https://github.com/r9y9/nnmnkwii/pull/101 .. _#105: https://github.com/r9y9/nnmnkwii/pull/105 .. _#108: https://github.com/r9y9/nnmnkwii/pull/108 -.. _#109: https://github.com/r9y9/nnmnkwii/pull/109 \ No newline at end of file +.. _#109: https://github.com/r9y9/nnmnkwii/pull/109 +.. _#112: https://github.com/r9y9/nnmnkwii/pull/112 diff --git a/docs/conf.py b/docs/conf.py index db6c9ad9..2c032e41 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,12 +22,13 @@ # sys.path.insert(0, os.path.abspath('.')) -import pkg_resources import os -__version__ = pkg_resources.get_distribution('nnmnkwii').version +import pkg_resources + +__version__ = pkg_resources.get_distribution("nnmnkwii").version -ON_RTD = os.environ.get('READTHEDOCS', None) == 'True' +ON_RTD = os.environ.get("READTHEDOCS", None) == "True" # -- General configuration ------------------------------------------------ @@ -39,23 +40,23 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.mathjax', - 'sphinx.ext.todo', - 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages', - 'sphinx.ext.napoleon', - 'numpydoc', - 'nbsphinx', - 'IPython.sphinxext.ipython_console_highlighting', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.mathjax", + "sphinx.ext.todo", + "sphinx.ext.viewcode", + "sphinx.ext.githubpages", + "sphinx.ext.napoleon", + "numpydoc", + "nbsphinx", + "IPython.sphinxext.ipython_console_highlighting", ] if ON_RTD: # Remove extensions not currently supported on RTD - extensions.remove('matplotlib.sphinxext.plot_directive') + extensions.remove("matplotlib.sphinxext.plot_directive") autosummary_generate = True numpydoc_show_class_members = False @@ -77,18 +78,18 @@ else: try: print("plot_directive.__version__:", plot_directive.__version__) - use_matplotlib_plot_directive = (plot_directive.__version__ >= 2) + use_matplotlib_plot_directive = plot_directive.__version__ >= 2 except AttributeError: use_matplotlib_plot_directive = False if use_matplotlib_plot_directive: - extensions.append('matplotlib.sphinxext.plot_directive') + extensions.append("matplotlib.sphinxext.plot_directive") else: raise RuntimeError("You need a recent enough version of matplotlib") -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # Plot -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ plot_pre_code = """ import seaborn seaborn.set(style='ticks') @@ -97,46 +98,47 @@ np.set_printoptions(precision=3, linewidth=64, edgeitems=2, threshold=200) """ plot_include_source = True -plot_formats = [('png', 96), 'pdf'] +plot_formats = [("png", 96), "pdf"] plot_html_show_formats = False font_size = 13 * 72 / 96.0 # 13 px plot_rcparams = { - 'font.size': font_size, - 'axes.titlesize': font_size, - 'axes.labelsize': font_size, - 'xtick.labelsize': font_size, - 'ytick.labelsize': font_size, - 'legend.fontsize': font_size, - 'figure.subplot.bottom': 0.2, - 'figure.subplot.left': 0.2, - 'figure.subplot.right': 0.9, - 'figure.subplot.top': 0.85, - 'figure.subplot.wspace': 0.4, - 'text.usetex': False, + "font.size": font_size, + "axes.titlesize": font_size, + "axes.labelsize": font_size, + "xtick.labelsize": font_size, + "ytick.labelsize": font_size, + "legend.fontsize": font_size, + "figure.subplot.bottom": 0.2, + "figure.subplot.left": 0.2, + "figure.subplot.right": 0.9, + "figure.subplot.top": 0.85, + "figure.subplot.wspace": 0.4, + "text.usetex": False, } if not ON_RTD: import matplotlib + matplotlib.rcParams.update(plot_rcparams) # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'nnmnkwii' -copyright = '2017, Ryuichi Yamamoto' -author = 'Ryuichi Yamamoto' +project = "nnmnkwii" +copyright = "2017, Ryuichi Yamamoto" +author = "Ryuichi Yamamoto" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -157,10 +159,10 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "**.ipynb_checkpoints"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True @@ -171,24 +173,27 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { - 'collapse_navigation': False, - 'display_version': True, - 'logo_only': True, + "collapse_navigation": False, + "display_version": True, + "logo_only": True, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] +html_css_files = [ + "css/custom.css", +] -html_logo = os.path.join(html_static_path[0], 'img/logo.png') +html_logo = os.path.join(html_static_path[0], "img/logo.png") # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -196,12 +201,12 @@ # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { - '**': [ - 'about.html', - 'navigation.html', - 'relations.html', # needs 'show_related': True theme option to display - 'searchbox.html', - 'donate.html', + "**": [ + "about.html", + "navigation.html", + "relations.html", # needs 'show_related': True theme option to display + "searchbox.html", + "donate.html", ] } @@ -209,7 +214,7 @@ # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. -htmlhelp_basename = 'nnmnkwiidoc' +htmlhelp_basename = "nnmnkwiidoc" # -- Options for LaTeX output --------------------------------------------- @@ -218,15 +223,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -236,8 +238,13 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'nnmnkwii.tex', 'nnmnkwii Documentation', - 'Ryuichi Yamamoto', 'manual'), + ( + master_doc, + "nnmnkwii.tex", + "nnmnkwii Documentation", + "Ryuichi Yamamoto", + "manual", + ), ] @@ -245,10 +252,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'nnmnkwii', 'nnmnkwii Documentation', - [author], 1) -] +man_pages = [(master_doc, "nnmnkwii", "nnmnkwii Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- @@ -257,18 +261,26 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'nnmnkwii', 'nnmnkwii Documentation', - author, 'nnmnkwii', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "nnmnkwii", + "nnmnkwii Documentation", + author, + "nnmnkwii", + "One line description of project.", + "Miscellaneous", + ), ] # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'python': ('https://docs.python.org/', None), - 'numpy': ('http://docs.scipy.org/doc/numpy/', None), - 'pysptk': ('https://pysptk.readthedocs.io/en/latest/', None), - 'pytorch': ('http://pytorch.org/docs/master/', None), - 'librosa': ('http://librosa.github.io/librosa/', None), - 'sklearn': ('http://scikit-learn.org/stable/', None) + "python": ("https://docs.python.org/3", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "np": ("https://numpy.org/doc/stable/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None), + "matplotlib": ("https://matplotlib.org/", None), + "sklearn": ("https://scikit-learn.org/stable/", None), + "pysptk": ("https://pysptk.readthedocs.io/en/latest/", None), + "pytorch": ("http://pytorch.org/docs/stable/", None), } diff --git a/docs/nnmnkwii_gallery b/docs/nnmnkwii_gallery index bd46ccfa..ddc6b2d5 160000 --- a/docs/nnmnkwii_gallery +++ b/docs/nnmnkwii_gallery @@ -1 +1 @@ -Subproject commit bd46ccfae88e8a5bc65dead67218ce34037e556f +Subproject commit ddc6b2d5df4bdfd013a9ba948b30724e4ec41170 diff --git a/docs/references/io.rst b/docs/references/io.rst index acf8a48f..4a3db96b 100644 --- a/docs/references/io.rst +++ b/docs/references/io.rst @@ -16,6 +16,8 @@ HTS IO load load_question_set + write_audacity_labels + write_textgrid .. autoclass:: HTSLabelFile :members: diff --git a/nnmnkwii/baseline/gmm.py b/nnmnkwii/baseline/gmm.py index 943a6c60..577ac675 100644 --- a/nnmnkwii/baseline/gmm.py +++ b/nnmnkwii/baseline/gmm.py @@ -1,8 +1,44 @@ import numpy as np +from nnmnkwii.paramgen import mlpg +from scipy import linalg from sklearn.mixture import GaussianMixture -from sklearn.mixture.gaussian_mixture import _compute_precision_cholesky -from nnmnkwii.paramgen import mlpg + +# ref: https://github.com/scikit-learn/scikit-learn/blob/0.24.1/sklearn/mixture/_gaussian_mixture.py +def _compute_precision_cholesky(covariances, covariance_type): + estimate_precision_error_message = ( + "Fitting the mixture model failed because some components have " + "ill-defined empirical covariance (for instance caused by singleton " + "or collapsed samples). Try to decrease the number of components, " + "or increase reg_covar." + ) + + if covariance_type == "full": + n_components, n_features, _ = covariances.shape + precisions_chol = np.empty((n_components, n_features, n_features)) + for k, covariance in enumerate(covariances): + try: + cov_chol = linalg.cholesky(covariance, lower=True) + except linalg.LinAlgError: + raise ValueError(estimate_precision_error_message) + precisions_chol[k] = linalg.solve_triangular( + cov_chol, np.eye(n_features), lower=True + ).T + elif covariance_type == "tied": + _, n_features = covariances.shape + try: + cov_chol = linalg.cholesky(covariances, lower=True) + except linalg.LinAlgError: + raise ValueError(estimate_precision_error_message) + precisions_chol = linalg.solve_triangular( + cov_chol, np.eye(n_features), lower=True + ).T + else: + if np.any(np.less_equal(covariances, 0.0)): + raise ValueError(estimate_precision_error_message) + precisions_chol = 1.0 / np.sqrt(covariances) + return precisions_chol + # TODO: this can be refactored to be more flexible # e.g. take `swap` and `diff` out of the class @@ -39,19 +75,21 @@ def __init__(self, gmm, swap=False, diff=False): # p(x), which is used to compute posterior prob. for a given source # spectral feature in mapping stage. self.px = GaussianMixture( - n_components=self.num_mixtures, covariance_type="full") + n_components=self.num_mixtures, covariance_type="full" + ) self.px.means_ = self.src_means self.px.covariances_ = self.covarXX self.px.weights_ = self.weights self.px.precisions_cholesky_ = _compute_precision_cholesky( - self.px.covariances_, "full") + self.px.covariances_, "full" + ) def transform(self, src): if src.ndim == 2: tgt = np.zeros_like(src) for idx, x in enumerate(src): y = self._transform_frame(x) - tgt[idx][:len(y)] = y + tgt[idx][: len(y)] = y return tgt else: return self._transform_frame(src) @@ -201,8 +239,9 @@ def transform(self, src): for t in range(T): m = optimum_mix[t] # Eq. (23), with approximating covariances as diagonals - D[t] = np.diag(self.covarYY[m]) - np.diag(self.covarYX[m]) / \ - np.diag(self.covarXX[m]) * np.diag(self.covarXY[m]) + D[t] = np.diag(self.covarYY[m]) - np.diag(self.covarYX[m]) / np.diag( + self.covarXX[m] + ) * np.diag(self.covarXY[m]) # Once we have mean and variance over frames, then we can do MLPG return mlpg(E, D, self.windows) diff --git a/nnmnkwii/datasets/__init__.py b/nnmnkwii/datasets/__init__.py index 09d00f42..5b405dbd 100644 --- a/nnmnkwii/datasets/__init__.py +++ b/nnmnkwii/datasets/__init__.py @@ -173,7 +173,7 @@ def asarray(self, padded_length=None, dtype=np.float32, D = self[0].shape[-1] N = len(self) X = np.zeros((N, T, D), dtype=dtype) - lengths = np.zeros(N, dtype=np.int) + lengths = np.zeros(N, dtype=int) if verbose > 0: def custom_range(x): diff --git a/nnmnkwii/frontend/merlin.py b/nnmnkwii/frontend/merlin.py index a175b06f..4aa12f62 100644 --- a/nnmnkwii/frontend/merlin.py +++ b/nnmnkwii/frontend/merlin.py @@ -1,5 +1,3 @@ -# coding: utf-8 - # Part of code here is adapted from Merlin. Their license follows: ########################################################################## # The Neural Network (NN) based Speech Synthesis System @@ -40,12 +38,9 @@ # THIS SOFTWARE. ########################################################################## -from __future__ import division, print_function, absolute_import - import numpy as np - -from nnmnkwii.io import hts from nnmnkwii.frontend import NOTE_MAPPING +from nnmnkwii.io import hts def get_frame_feature_size(subphone_features="full"): @@ -54,34 +49,34 @@ def get_frame_feature_size(subphone_features="full"): return 0 subphone_features = subphone_features.strip().lower() if subphone_features == "none": - raise ValueError( - "subphone_features = 'none' is deprecated, use None instead") - if subphone_features == 'full': + raise ValueError("subphone_features = 'none' is deprecated, use None instead") + if subphone_features == "full": return 9 # zhizheng's original 5 state features + 4 phoneme features - elif subphone_features == 'minimal_frame': + elif subphone_features == "minimal_frame": # the minimal features necessary to go from a state-level to # frame-level model return 2 - elif subphone_features == 'state_only': + elif subphone_features == "state_only": return 1 # this is equivalent to a state-based system - elif subphone_features == 'frame_only': + elif subphone_features == "frame_only": # this is equivalent to a frame-based system without relying on # state-features return 1 - elif subphone_features == 'uniform_state': + elif subphone_features == "uniform_state": # this is equivalent to a frame-based system with uniform # state-features return 2 - elif subphone_features == 'minimal_phoneme': + elif subphone_features == "minimal_phoneme": # this is equivalent to a frame-based system with minimal features return 3 - elif subphone_features == 'coarse_coding': + elif subphone_features == "coarse_coding": # this is equivalent to a frame-based positioning system reported in # Heiga Zen's work return 4 else: raise ValueError( - 'Unknown value for subphone_features: %s' % (subphone_features)) + "Unknown value for subphone_features: %s" % (subphone_features) + ) assert False @@ -101,6 +96,7 @@ def compute_coarse_coding_features(num_states=3, npoints=600): sigma = 0.4 from scipy.stats import norm + cc_features[0, :] = norm(mu1, sigma).pdf(x1) cc_features[1, :] = norm(mu2, sigma).pdf(x2) cc_features[2, :] = norm(mu3, sigma).pdf(x3) @@ -126,10 +122,13 @@ def extract_coarse_coding_features_relative(cc_features, phone_duration): def pattern_matching_binary(binary_dict, label): dict_size = len(binary_dict) - lab_binary_vector = np.zeros((1, dict_size), dtype=np.int) + lab_binary_vector = np.zeros((1, dict_size), dtype=int) for i in range(dict_size): current_question_list = binary_dict[i] + # NOTE: newer version returns tuple of (name, question) + if isinstance(current_question_list, tuple): + current_question_list = current_question_list[1] binary_flag = 0 for iq in range(len(current_question_list)): current_compiled = current_question_list[iq] @@ -143,15 +142,20 @@ def pattern_matching_binary(binary_dict, label): return lab_binary_vector -def pattern_matching_continous_position(continuous_dict, label): - dict_size = len(continuous_dict) +def pattern_matching_continous_position(numeric_dict, label): + dict_size = len(numeric_dict) lab_continuous_vector = np.zeros((1, dict_size), dtype=np.float32) for i in range(dict_size): + current_compiled = numeric_dict[i] - continuous_value = -1.0 - - current_compiled = continuous_dict[i] + # NOTE: newer version returns tuple of (name, question) + if isinstance(current_compiled, tuple): + current_compiled = current_compiled[1] + if "([-\d]+)" in current_compiled.pattern: + continuous_value = -50.0 + else: + continuous_value = -1.0 ms = current_compiled.search(label) if ms is not None: @@ -169,13 +173,15 @@ def pattern_matching_continous_position(continuous_dict, label): return lab_continuous_vector -def load_labels_with_phone_alignment(hts_labels, - binary_dict, - continuous_dict, - subphone_features=None, - add_frame_features=False, - frame_shift=50000): - dict_size = len(binary_dict) + len(continuous_dict) +def load_labels_with_phone_alignment( + hts_labels, + binary_dict, + numeric_dict, + subphone_features=None, + add_frame_features=False, + frame_shift=50000, +): + dict_size = len(binary_dict) + len(numeric_dict) frame_feature_size = get_frame_feature_size(subphone_features) dimension = frame_feature_size + dict_size @@ -193,86 +199,96 @@ def load_labels_with_phone_alignment(hts_labels, for idx, (start_time, end_time, full_label) in enumerate(hts_labels): frame_number = int(end_time / frame_shift) - int(start_time / frame_shift) - label_binary_vector = pattern_matching_binary( - binary_dict, full_label) + label_binary_vector = pattern_matching_binary(binary_dict, full_label) # if there is no CQS question, the label_continuous_vector will # become to empty label_continuous_vector = pattern_matching_continous_position( - continuous_dict, full_label) + numeric_dict, full_label + ) label_vector = np.concatenate( - [label_binary_vector, label_continuous_vector], axis=1) + [label_binary_vector, label_continuous_vector], axis=1 + ) if subphone_features == "coarse_coding": - cc_feat_matrix = extract_coarse_coding_features_relative(cc_features, - frame_number) + cc_feat_matrix = extract_coarse_coding_features_relative( + cc_features, frame_number + ) if add_frame_features: current_block_binary_array = np.zeros( - (frame_number, dict_size + frame_feature_size)) + (frame_number, dict_size + frame_feature_size) + ) for i in range(frame_number): - current_block_binary_array[i, - 0:dict_size] = label_vector + current_block_binary_array[i, 0:dict_size] = label_vector - if subphone_features == 'minimal_phoneme': + if subphone_features == "minimal_phoneme": # features which distinguish frame position in phoneme # fraction through phone forwards - current_block_binary_array[i, dict_size] = float( - i + 1) / float(frame_number) + current_block_binary_array[i, dict_size] = float(i + 1) / float( + frame_number + ) # fraction through phone backwards current_block_binary_array[i, dict_size + 1] = float( - frame_number - i) / float(frame_number) + frame_number - i + ) / float(frame_number) # phone duration - current_block_binary_array[i, - dict_size + 2] = float(frame_number) + current_block_binary_array[i, dict_size + 2] = float(frame_number) - elif subphone_features == 'coarse_coding': + elif subphone_features == "coarse_coding": # features which distinguish frame position in phoneme # using three continous numerical features - current_block_binary_array[i, - dict_size + 0] = cc_feat_matrix[i, 0] - current_block_binary_array[i, - dict_size + 1] = cc_feat_matrix[i, 1] - current_block_binary_array[i, - dict_size + 2] = cc_feat_matrix[i, 2] - current_block_binary_array[i, - dict_size + 3] = float(frame_number) + current_block_binary_array[i, dict_size + 0] = cc_feat_matrix[i, 0] + current_block_binary_array[i, dict_size + 1] = cc_feat_matrix[i, 1] + current_block_binary_array[i, dict_size + 2] = cc_feat_matrix[i, 2] + current_block_binary_array[i, dict_size + 3] = float(frame_number) elif subphone_features is None: pass else: raise ValueError( "Combination of subphone_features and add_frame_features is not supported: {}, {}".format( - subphone_features, add_frame_features)) - label_feature_matrix[label_feature_index:label_feature_index + - frame_number, ] = current_block_binary_array + subphone_features, add_frame_features + ) + ) + label_feature_matrix[ + label_feature_index : label_feature_index + frame_number, + ] = current_block_binary_array label_feature_index = label_feature_index + frame_number elif subphone_features is None: current_block_binary_array = label_vector - label_feature_matrix[label_feature_index:label_feature_index + - 1, ] = current_block_binary_array + label_feature_matrix[ + label_feature_index : label_feature_index + 1, + ] = current_block_binary_array label_feature_index = label_feature_index + 1 else: pass # omg if label_feature_index == 0: - raise ValueError("Combination of subphone_features and add_frame_features is not supported: {}, {}".format( - subphone_features, add_frame_features)) + raise ValueError( + "Combination of subphone_features and add_frame_features is not supported: {}, {}".format( + subphone_features, add_frame_features + ) + ) - label_feature_matrix = label_feature_matrix[0:label_feature_index, ] + label_feature_matrix = label_feature_matrix[ + 0:label_feature_index, + ] return label_feature_matrix -def load_labels_with_state_alignment(hts_labels, - binary_dict, - continuous_dict, - subphone_features=None, - add_frame_features=False, - frame_shift=50000): - dict_size = len(binary_dict) + len(continuous_dict) +def load_labels_with_state_alignment( + hts_labels, + binary_dict, + numeric_dict, + subphone_features=None, + add_frame_features=False, + frame_shift=50000, +): + dict_size = len(binary_dict) + len(numeric_dict) frame_feature_size = get_frame_feature_size(subphone_features) dimension = frame_feature_size + dict_size @@ -290,8 +306,7 @@ def load_labels_with_state_alignment(hts_labels, phone_duration = 0 state_duration_base = 0 - for current_index, (start_time, end_time, - full_label) in enumerate(hts_labels): + for current_index, (start_time, end_time, full_label) in enumerate(hts_labels): # remove state information [k] assert full_label[-1] == "]" full_label_length = len(full_label) - 3 @@ -308,15 +323,16 @@ def load_labels_with_state_alignment(hts_labels, phone_duration = frame_number state_duration_base = 0 - label_binary_vector = pattern_matching_binary( - binary_dict, full_label) + label_binary_vector = pattern_matching_binary(binary_dict, full_label) # if there is no CQS question, the label_continuous_vector will # become to empty label_continuous_vector = pattern_matching_continous_position( - continuous_dict, full_label) + numeric_dict, full_label + ) label_vector = np.concatenate( - [label_binary_vector, label_continuous_vector], axis=1) + [label_binary_vector, label_continuous_vector], axis=1 + ) for i in range(state_number - 1): s, e, _ = hts_labels[current_index + i + 1] @@ -324,115 +340,131 @@ def load_labels_with_state_alignment(hts_labels, if subphone_features == "coarse_coding": cc_feat_matrix = extract_coarse_coding_features_relative( - cc_features, phone_duration) + cc_features, phone_duration + ) if add_frame_features: current_block_binary_array = np.zeros( - (frame_number, dict_size + frame_feature_size)) + (frame_number, dict_size + frame_feature_size) + ) for i in range(frame_number): - current_block_binary_array[i, - 0: dict_size] = label_vector + current_block_binary_array[i, 0:dict_size] = label_vector - if subphone_features == 'full': + if subphone_features == "full": # Zhizheng's original 9 subphone features: # fraction through state (forwards) - current_block_binary_array[i, dict_size] = float( - i + 1) / float(frame_number) + current_block_binary_array[i, dict_size] = float(i + 1) / float( + frame_number + ) # fraction through state (backwards) current_block_binary_array[i, dict_size + 1] = float( - frame_number - i) / float(frame_number) + frame_number - i + ) / float(frame_number) # length of state in frames - current_block_binary_array[i, - dict_size + 2] = float(frame_number) + current_block_binary_array[i, dict_size + 2] = float(frame_number) # state index (counting forwards) - current_block_binary_array[i, - dict_size + 3] = float(state_index) + current_block_binary_array[i, dict_size + 3] = float(state_index) # state index (counting backwards) - current_block_binary_array[i, dict_size + - 4] = float(state_index_backward) + current_block_binary_array[i, dict_size + 4] = float( + state_index_backward + ) # length of phone in frames - current_block_binary_array[i, - dict_size + 5] = float(phone_duration) + current_block_binary_array[i, dict_size + 5] = float(phone_duration) # fraction of the phone made up by current state - current_block_binary_array[i, dict_size + - 6] = float(frame_number) / float(phone_duration) + current_block_binary_array[i, dict_size + 6] = float( + frame_number + ) / float(phone_duration) # fraction through phone (backwards) current_block_binary_array[i, dict_size + 7] = float( - phone_duration - i - state_duration_base) / float(phone_duration) + phone_duration - i - state_duration_base + ) / float(phone_duration) # fraction through phone (forwards) current_block_binary_array[i, dict_size + 8] = float( - state_duration_base + i + 1) / float(phone_duration) + state_duration_base + i + 1 + ) / float(phone_duration) - elif subphone_features == 'state_only': + elif subphone_features == "state_only": # features which only distinguish state: current_block_binary_array[i, dict_size] = float( - state_index) # state index (counting forwards) + state_index + ) # state index (counting forwards) - elif subphone_features == 'frame_only': + elif subphone_features == "frame_only": # features which distinguish frame position in phoneme: current_frame_number += 1 # fraction through phone (counting forwards) current_block_binary_array[i, dict_size] = float( - current_frame_number) / float(phone_duration) + current_frame_number + ) / float(phone_duration) - elif subphone_features == 'uniform_state': + elif subphone_features == "uniform_state": # features which distinguish frame position in phoneme: current_frame_number += 1 # fraction through phone (counting forwards) current_block_binary_array[i, dict_size] = float( - current_frame_number) / float(phone_duration) + current_frame_number + ) / float(phone_duration) new_state_index = max( - 1, round(float(current_frame_number) / float(phone_duration) * 5)) + 1, + round(float(current_frame_number) / float(phone_duration) * 5), + ) # state index (counting forwards) - current_block_binary_array[i, - dict_size + 1] = float(new_state_index) + current_block_binary_array[i, dict_size + 1] = float( + new_state_index + ) elif subphone_features == "coarse_coding": # features which distinguish frame position in phoneme # using three continous numerical features - current_block_binary_array[i, dict_size + - 0] = cc_feat_matrix[current_frame_number, 0] - current_block_binary_array[i, dict_size + - 1] = cc_feat_matrix[current_frame_number, 1] - current_block_binary_array[i, dict_size + - 2] = cc_feat_matrix[current_frame_number, 2] - current_block_binary_array[i, - dict_size + 3] = float(phone_duration) + current_block_binary_array[i, dict_size + 0] = cc_feat_matrix[ + current_frame_number, 0 + ] + current_block_binary_array[i, dict_size + 1] = cc_feat_matrix[ + current_frame_number, 1 + ] + current_block_binary_array[i, dict_size + 2] = cc_feat_matrix[ + current_frame_number, 2 + ] + current_block_binary_array[i, dict_size + 3] = float(phone_duration) current_frame_number += 1 - elif subphone_features == 'minimal_frame': + elif subphone_features == "minimal_frame": # features which distinguish state and minimally frame # position in state: - current_block_binary_array[i, dict_size] = float( - i + 1) / float(frame_number) # fraction through state (forwards) + current_block_binary_array[i, dict_size] = float(i + 1) / float( + frame_number + ) # fraction through state (forwards) # state index (counting forwards) - current_block_binary_array[i, - dict_size + 1] = float(state_index) + current_block_binary_array[i, dict_size + 1] = float(state_index) elif subphone_features is None: pass else: assert False - label_feature_matrix[label_feature_index:label_feature_index + - frame_number] = current_block_binary_array + label_feature_matrix[ + label_feature_index : label_feature_index + frame_number + ] = current_block_binary_array label_feature_index = label_feature_index + frame_number - elif subphone_features == 'state_only' and state_index == state_number: + elif subphone_features == "state_only" and state_index == state_number: # TODO: this pass seems not working current_block_binary_array = np.zeros( - (state_number, dict_size + frame_feature_size)) + (state_number, dict_size + frame_feature_size) + ) for i in range(state_number): - current_block_binary_array[i, - 0:dict_size] = label_vector + current_block_binary_array[i, 0:dict_size] = label_vector current_block_binary_array[i, dict_size] = float( - i + 1) # state index (counting forwards) - label_feature_matrix[label_feature_index:label_feature_index + - state_number, ] = current_block_binary_array + i + 1 + ) # state index (counting forwards) + label_feature_matrix[ + label_feature_index : label_feature_index + state_number, + ] = current_block_binary_array label_feature_index = label_feature_index + state_number elif subphone_features is None and state_index == state_number: current_block_binary_array = label_vector - label_feature_matrix[label_feature_index:label_feature_index + - 1, ] = current_block_binary_array + label_feature_matrix[ + label_feature_index : label_feature_index + 1, + ] = current_block_binary_array label_feature_index = label_feature_index + 1 else: pass @@ -441,10 +473,15 @@ def load_labels_with_state_alignment(hts_labels, # omg if label_feature_index == 0: - raise ValueError("Combination of subphone_features and add_frame_features is not supported: {}, {}".format( - subphone_features, add_frame_features)) - - label_feature_matrix = label_feature_matrix[0:label_feature_index, ] + raise ValueError( + "Combination of subphone_features and add_frame_features is not supported: {}, {}".format( + subphone_features, add_frame_features + ) + ) + + label_feature_matrix = label_feature_matrix[ + 0:label_feature_index, + ] return label_feature_matrix @@ -464,7 +501,7 @@ def linguistic_features(hts_labels, *args, **kwargs): Args: hts_label (hts.HTSLabelFile): Input full-context label file binary_dict (dict): Dictionary used to extract binary features - continuous_dict (dict): Dictionary used to extrract continuous features + numeric_dict (dict): Dictionary used to extrract continuous features subphone_features (dict): Type of sub-phone features. According to the Merlin's source code, None, ``full``, ``state_only``, ``frame_only``, ``uniform_state``, ``minimal_phoneme`` and @@ -485,12 +522,12 @@ def linguistic_features(hts_labels, *args, **kwargs): >>> from nnmnkwii.io import hts >>> from nnmnkwii.util import example_label_file, example_question_file >>> labels = hts.load(example_label_file(phone_level=False)) - >>> binary_dict, continuous_dict = hts.load_question_set(example_question_file()) - >>> features = fe.linguistic_features(labels, binary_dict, continuous_dict, + >>> binary_dict, numeric_dict = hts.load_question_set(example_question_file()) + >>> features = fe.linguistic_features(labels, binary_dict, numeric_dict, ... subphone_features="full", add_frame_features=True) >>> features.shape (615, 425) - >>> features = fe.linguistic_features(labels, binary_dict, continuous_dict, + >>> features = fe.linguistic_features(labels, binary_dict, numeric_dict, ... subphone_features=None, add_frame_features=False) >>> features.shape (40, 416) @@ -501,12 +538,12 @@ def linguistic_features(hts_labels, *args, **kwargs): >>> from nnmnkwii.io import hts >>> from nnmnkwii.util import example_label_file, example_question_file >>> labels = hts.load(example_label_file(phone_level=True)) - >>> binary_dict, continuous_dict = hts.load_question_set(example_question_file()) - >>> features = fe.linguistic_features(labels, binary_dict, continuous_dict, + >>> binary_dict, numeric_dict = hts.load_question_set(example_question_file()) + >>> features = fe.linguistic_features(labels, binary_dict, numeric_dict, ... subphone_features="coarse_coding", add_frame_features=True) >>> features.shape (615, 420) - >>> features = fe.linguistic_features(labels, binary_dict, continuous_dict, + >>> features = fe.linguistic_features(labels, binary_dict, numeric_dict, ... subphone_features=None, add_frame_features=False) >>> features.shape (40, 416) @@ -518,11 +555,13 @@ def linguistic_features(hts_labels, *args, **kwargs): return load_labels_with_phone_alignment(hts_labels, *args, **kwargs) -def extract_dur_from_state_alignment_labels(hts_labels, - feature_type="numerical", - unit_size="state", - feature_size="phoneme", - frame_shift=50000): +def extract_dur_from_state_alignment_labels( + hts_labels, + feature_type="numerical", + unit_size="state", + feature_size="phoneme", + frame_shift=50000, +): if feature_type not in ["binary", "numerical"]: raise ValueError("Not supported") if unit_size not in ["phoneme", "state"]: @@ -532,26 +571,22 @@ def extract_dur_from_state_alignment_labels(hts_labels, dur_dim = hts_labels.num_states() if unit_size == "state" else 1 if feature_size == "phoneme": - dur_feature_matrix = np.empty( - (hts_labels.num_phones(), dur_dim), dtype=np.int) + dur_feature_matrix = np.empty((hts_labels.num_phones(), dur_dim), dtype=int) else: - dur_feature_matrix = np.empty( - (hts_labels.num_frames(), dur_dim), dtype=np.int) + dur_feature_matrix = np.empty((hts_labels.num_frames(), dur_dim), dtype=int) current_dur_array = np.zeros((dur_dim, 1)) state_number = hts_labels.num_states() dur_dim = state_number dur_feature_index = 0 - for current_index, (start_time, end_time, - full_label) in enumerate(hts_labels): + for current_index, (start_time, end_time, full_label) in enumerate(hts_labels): # remove state information [k] full_label_length = len(full_label) - 3 state_index = full_label[full_label_length + 1] state_index = int(state_index) - 1 - frame_number = ( - end_time - start_time) // frame_shift + frame_number = (end_time - start_time) // frame_shift if state_index == 1: phone_duration = frame_number @@ -576,7 +611,8 @@ def extract_dur_from_state_alignment_labels(hts_labels, current_block_array = current_dur_array.transpose() if feature_size == "frame": current_block_array = np.tile( - current_dur_array.transpose(), (frame_number, 1)) + current_dur_array.transpose(), (frame_number, 1) + ) elif unit_size == "phoneme": current_block_array = np.array([phone_duration]) else: @@ -584,25 +620,31 @@ def extract_dur_from_state_alignment_labels(hts_labels, # writing into dur_feature_matrix if feature_size == "frame": - dur_feature_matrix[dur_feature_index:dur_feature_index + - frame_number, ] = current_block_array + dur_feature_matrix[ + dur_feature_index : dur_feature_index + frame_number, + ] = current_block_array dur_feature_index = dur_feature_index + frame_number elif feature_size == "phoneme" and state_index == state_number: - dur_feature_matrix[dur_feature_index:dur_feature_index + - 1, ] = current_block_array + dur_feature_matrix[ + dur_feature_index : dur_feature_index + 1, + ] = current_block_array dur_feature_index = dur_feature_index + 1 else: pass - dur_feature_matrix = dur_feature_matrix[0:dur_feature_index, ] + dur_feature_matrix = dur_feature_matrix[ + 0:dur_feature_index, + ] return dur_feature_matrix -def extract_dur_from_phone_alignment_labels(hts_labels, - feature_type="numerical", - unit_size="phoneme", - feature_size="phoneme", - frame_shift=50000): +def extract_dur_from_phone_alignment_labels( + hts_labels, + feature_type="numerical", + unit_size="phoneme", + feature_size="phoneme", + frame_shift=50000, +): if feature_type not in ["binary", "numerical"]: raise ValueError("Not supported") if unit_size != "phoneme": @@ -610,11 +652,9 @@ def extract_dur_from_phone_alignment_labels(hts_labels, if feature_size not in ["phoneme", "frame"]: raise ValueError("Not supported") if feature_size == "phoneme": - dur_feature_matrix = np.empty( - (hts_labels.num_phones(), 1), dtype=np.int) + dur_feature_matrix = np.empty((hts_labels.num_phones(), 1), dtype=int) else: - dur_feature_matrix = np.empty( - (hts_labels.num_frames(), 1), dtype=np.int) + dur_feature_matrix = np.empty((hts_labels.num_frames(), 1), dtype=int) dur_feature_index = 0 for current_index, (start_time, end_time, _) in enumerate(hts_labels): frame_number = (end_time - start_time) / frame_shift @@ -631,12 +671,14 @@ def extract_dur_from_phone_alignment_labels(hts_labels, # writing into dur_feature_matrix if feature_size == "frame": - dur_feature_matrix[dur_feature_index:dur_feature_index + - frame_number] = current_block_array + dur_feature_matrix[ + dur_feature_index : dur_feature_index + frame_number + ] = current_block_array dur_feature_index = dur_feature_index + frame_number elif feature_size == "phoneme": - dur_feature_matrix[dur_feature_index:dur_feature_index + - 1] = current_block_array + dur_feature_matrix[ + dur_feature_index : dur_feature_index + 1 + ] = current_block_array dur_feature_index = dur_feature_index + 1 else: assert False @@ -690,8 +732,6 @@ def duration_features(hts_labels, *args, **kwargs): """ if hts_labels.is_state_alignment_label(): - return extract_dur_from_state_alignment_labels( - hts_labels, *args, **kwargs) + return extract_dur_from_state_alignment_labels(hts_labels, *args, **kwargs) else: - return extract_dur_from_phone_alignment_labels( - hts_labels, *args, **kwargs) + return extract_dur_from_phone_alignment_labels(hts_labels, *args, **kwargs) diff --git a/nnmnkwii/io/__init__.py b/nnmnkwii/io/__init__.py index d675d93d..e69de29b 100644 --- a/nnmnkwii/io/__init__.py +++ b/nnmnkwii/io/__init__.py @@ -1 +0,0 @@ -from __future__ import division, print_function, absolute_import diff --git a/nnmnkwii/io/hts.py b/nnmnkwii/io/hts.py index bb123790..20ffab42 100644 --- a/nnmnkwii/io/hts.py +++ b/nnmnkwii/io/hts.py @@ -38,12 +38,11 @@ # THIS SOFTWARE. ########################################################################## -from __future__ import division, print_function, absolute_import - -import numpy as np import re from copy import copy +import numpy as np + class HTSLabelFile(object): """Memory representation for HTS-style context labels (a.k.a HTK alignment). @@ -132,8 +131,12 @@ def __repr__(self): def round_(self): s = self.frame_shift - self.start_times = list(np.round(np.asarray(self.start_times) / s).astype(np.int64) * s) - self.end_times = list(np.round(np.asarray(self.end_times) / s).astype(np.int64) * s) + self.start_times = list( + np.round(np.asarray(self.start_times) / s).astype(np.int64) * s + ) + self.end_times = list( + np.round(np.asarray(self.end_times) / s).astype(np.int64) * s + ) return self def append(self, label, strict=True): @@ -158,11 +161,15 @@ def append(self, label, strict=True): if start_time >= end_time: raise ValueError( "end_time ({}) must be larger than start_time ({}).".format( - end_time, start_time)) + end_time, start_time + ) + ) if len(self.end_times) > 0 and start_time != self.end_times[-1]: raise ValueError( "start_time ({}) must be equal to the last end_time ({}).".format( - start_time, self.end_times[-1])) + start_time, self.end_times[-1] + ) + ) self.start_times.append(start_time) self.end_times.append(end_time) @@ -178,8 +185,9 @@ def set_durations(self, durations, frame_shift=50000): offset = self.start_times[0] # Unwrap state-axis - end_times = offset + np.cumsum( - durations.reshape(-1, 1) * frame_shift).astype(np.int64) + end_times = offset + np.cumsum(durations.reshape(-1, 1) * frame_shift).astype( + np.int64 + ) if len(end_times) != len(self.end_times): raise RuntimeError("Unexpected input, maybe") start_times = np.hstack((offset, end_times[:-1])).astype(np.int64) @@ -272,15 +280,15 @@ def silence_frame_indices(self, regex=None, frame_shift=50000): end_times = np.array(self.end_times) s = start_times[indices] // frame_shift e = end_times[indices] // frame_shift - return np.unique(np.concatenate( - [np.arange(a, b) for (a, b) in zip(s, e)], axis=0)).astype(np.int64) + return np.unique( + np.concatenate([np.arange(a, b) for (a, b) in zip(s, e)], axis=0) + ).astype(np.int64) def is_state_alignment_label(self): - return self.contexts[0][-1] == ']' and self.contexts[0][-3] == '[' + return self.contexts[0][-1] == "]" and self.contexts[0][-3] == "[" def num_states(self): - """Returnes number of states exclusing special begin/end states. - """ + """Returnes number of states exclusing special begin/end states.""" if not self.is_state_alignment_label(): return 1 @@ -332,32 +340,34 @@ def wildcards2regex(question, convert_number_pattern=False, convert_svs_pattern= extracting continuous values): (\d+) -- handles digit without decimal point ([\d\.]+) -- handles digits with and without decimal point + ([-\d]+) -- handles positive and negative numbers """ # handle HTK wildcards (and lack of them) at ends of label: prefix = "" postfix = "" - if '*' in question: - if not question.startswith('*'): + if "*" in question: + if not question.startswith("*"): prefix = "\A" - if not question.endswith('*'): + if not question.endswith("*"): postfix = "\Z" - question = question.strip('*') + question = question.strip("*") question = re.escape(question) # convert remaining HTK wildcards * and ? to equivalent regex: - question = question.replace('\\*', '.*') + question = question.replace("\\*", ".*") question = prefix + question + postfix if convert_number_pattern: - question = question.replace('\\(\\\\d\\+\\)', '(\d+)') - question = question.replace( - '\\(\\[\\\\d\\\\\\.\\]\\+\\)', '([\d\.]+)') + question = question.replace("\\(\\\\d\\+\\)", "(\d+)") + question = question.replace("\\(\\[\\-\\\\d\\]\\+\\)", "([-\d]+)") + question = question.replace("\\(\\[\\\\d\\\\\\.\\]\\+\\)", "([\d\.]+)") # NOTE: singing voice synthesis specific handling if convert_svs_pattern: question = question.replace( - '\\(\\[A\\-Z\\]\\[b\\]\\?\\[0\\-9\\]\\+\\)', '([A-Z][b]?[0-9]+)') - question = question.replace('\\(\\\\NOTE\\)', '([A-Z][b]?[0-9]+)') - question = question.replace('\\(\\[pm\\]\\\\d\\+\\)', '([pm]\d+)') + "\\(\\[A\\-Z\\]\\[b\\]\\?\\[0\\-9\\]\\+\\)", "([A-Z][b]?[0-9]+)" + ) + question = question.replace("\\(\\\\NOTE\\)", "([A-Z][b]?[0-9]+)") + question = question.replace("\\(\\[pm\\]\\\\d\\+\\)", "([pm]\d+)") return question @@ -377,54 +387,124 @@ def load_question_set(qs_file_name, append_hat_for_LL=True, convert_svs_pattern= convert_svs_pattern (bool): Convert SVS specific patterns. Returns: - (binary_dict, continuous_dict): Binary/continuous feature extraction + (binary_dict, numeric_dict): Binary/numeric feature extraction regexes. Examples: >>> from nnmnkwii.io import hts >>> from nnmnkwii.util import example_question_file - >>> binary_dict, continuous_dict = hts.load_question_set(example_question_file()) + >>> binary_dict, numeric_dict = hts.load_question_set(example_question_file()) """ with open(qs_file_name) as f: lines = f.readlines() binary_qs_index = 0 continuous_qs_index = 0 binary_dict = {} - continuous_dict = {} + numeric_dict = {} - LL = re.compile(re.escape('LL-')) + LL = re.compile(re.escape("LL-")) for line in lines: - line = line.replace('\n', '') + line = line.replace("\n", "") temp_list = line.split() if len(line) <= 0 or line.startswith("#"): continue - temp_list = line.split('{') + name = temp_list[1].replace('"', "").replace("'", "") + temp_list = line.split("{") temp_line = temp_list[1] - temp_list = temp_line.split('}') + temp_list = temp_line.split("}") temp_line = temp_list[0] temp_line = temp_line.strip() - question_list = temp_line.split(',') + question_list = temp_line.split(",") - temp_list = line.split(' ') + temp_list = line.split(" ") question_key = temp_list[1] - if temp_list[0] == 'CQS': + if temp_list[0] == "CQS": assert len(question_list) == 1 processed_question = wildcards2regex( - question_list[0], convert_number_pattern=True) - continuous_dict[continuous_qs_index] = re.compile( - processed_question) # save pre-compiled regular expression + question_list[0], + convert_number_pattern=True, + convert_svs_pattern=convert_svs_pattern, + ) + numeric_dict[continuous_qs_index] = ( + name, + re.compile(processed_question), + ) # save pre-compiled regular expression continuous_qs_index = continuous_qs_index + 1 - elif temp_list[0] == 'QS': + elif temp_list[0] == "QS": re_list = [] for temp_question in question_list: processed_question = wildcards2regex(temp_question) - if append_hat_for_LL and LL.search(question_key) and processed_question[0] != '^': - processed_question = '^' + processed_question + if ( + append_hat_for_LL + and LL.search(question_key) + and processed_question[0] != "^" + ): + processed_question = "^" + processed_question re_list.append(re.compile(processed_question)) - binary_dict[binary_qs_index] = re_list + binary_dict[binary_qs_index] = (name, re_list) binary_qs_index = binary_qs_index + 1 else: raise RuntimeError("Not supported question format") - return binary_dict, continuous_dict + return binary_dict, numeric_dict + + +def write_audacity_labels(dst_path, labels): + """Write audacity labels from HTS-style labels + + Args: + dst_path (str): The output file path. + labels (HTSLabelFile): HTS style labels + """ + with open(dst_path, "w") as of: + for s, e, l in labels: + s, e = s * 1e-7, e * 1e-7 + if "-" in l and "+" in l: + ph = l.split("-")[1].split("+")[0] + else: + ph = l + of.write("{:.4f}\t{:.4f}\t{}\n".format(s, e, ph)) + + +def write_textgrid(dst_path, labels): + """Write TextGrid from HTS-style labels + + Args: + dst_path (str): The output file path. + labels (HTSLabelFile): HTS style labels + """ + template = """File type = "ooTextFile" +Object class = "TextGrid" + +xmin = 0 +xmax = {xmax} +tiers? +size = 1 +item []: + item [1]: + class = "IntervalTier" + name = "phoneme" + xmin = 0 + xmax = {xmax} + intervals: size = {size}""" + template = template.format(xmax=labels.end_times[-1] * 1e-7, size=len(labels)) + + for idx, (s, e, l) in enumerate(labels): + s, e = s * 1e-7, e * 1e-7 + if "-" in l and "+" in l: + ph = l.split("-")[1].split("+")[0] + else: + ph = l + + template += """ + intervals [{idx}]: + xmin = {s} + xmax = {e} + text = "{ph}" """.format( + idx=idx + 1, s=s, e=e, ph=ph + ) + template += "\n" + + with open(dst_path, "w") as of: + of.write(template) diff --git a/nnmnkwii/paramgen/__init__.py b/nnmnkwii/paramgen/__init__.py index 97d276ad..8e388f66 100644 --- a/nnmnkwii/paramgen/__init__.py +++ b/nnmnkwii/paramgen/__init__.py @@ -1,16 +1,5 @@ from __future__ import with_statement, print_function, absolute_import - -try: - import bandmat -except ImportError: - raise ImportError(""" - `bandmat` is required for the `paramgen` module but not found. - Please install it manually by: - `pip install bandmat`. - If you see installation errors, please report to https://github.com/MattShannon/bandmat. - """) - from ._mlpg import build_win_mats, mlpg, mlpg_grad, unit_variance_mlpg_matrix from ._mlpg import reshape_means, full_window_mat diff --git a/nnmnkwii/paramgen/_bandmat/License b/nnmnkwii/paramgen/_bandmat/License new file mode 100644 index 00000000..6285aa94 --- /dev/null +++ b/nnmnkwii/paramgen/_bandmat/License @@ -0,0 +1,23 @@ +Copyright 2013, 2014, 2015, 2016, 2017, 2018 Matt Shannon + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. diff --git a/nnmnkwii/paramgen/_bandmat/__init__.py b/nnmnkwii/paramgen/_bandmat/__init__.py new file mode 100644 index 00000000..bdae5264 --- /dev/null +++ b/nnmnkwii/paramgen/_bandmat/__init__.py @@ -0,0 +1,100 @@ +"""A banded matrix library for python. + +This package provides a simple banded matrix library for python. +It supports banded matrix-vector and matrix-matrix multiplication, converting +between full and banded matrix representations, and certain linear algebra +operations on banded matrices. + +Overview +-------- + +A banded matrix is a matrix where only the diagonal, a number of superdiagonals +and a number of subdiagonals are non-zero. +The well-known BLAS interface and LAPACK library for linear algebra define +several banded matrix operations, and some of these, such as banded Cholesky +decomposition, are wrapped in the excellent python package scipy, specifically +in scipy.linalg. +The bandmat package re-uses the banded matrix representation used by BLAS, +LAPACK and scipy.linalg, wrapping it in a lightweight class for ease of use. +See the docstring for the BandMat class for full details of the representation +used. + +The bandmat package provides: + +- a lightweight class wrapping the LAPACK-style banded matrix representation. + This class keeps track of things like bandwidths to allow a more direct + coding style when working with banded matrices. +- some basic banded matrix operations not present in scipy. + For example, banded matrix-vector multiplication is defined by BLAS but not + wrapped by scipy, and banded matrix-matrix multiplication is not defined in + BLAS or in scipy. + The bandmat package contains C implementations of these operations written in + cython. +- helper functions for converting between full and banded matrix + representations. +- certain linear algebra operations on banded matrices. +- an implementation of a fancy indexed += function for numpy arrays. + This is included for (the author's) convenience and is not directly related + to banded matrix manipulation. + +Only square banded matrices are supported by this package. + +Examples +-------- + +Define a banded matrix: +>>> import numpy as np +>>> import bandmat as bm +>>> # Ensure consistent formatting between old and new versions of numpy. +>>> try: +... np.set_printoptions(legacy='1.13') +... except TypeError: +... pass +>>> a_bm = bm.BandMat(1, 1, np.arange(15, dtype=np.float64).reshape((3, 5))) +>>> print(a_bm.data) +[[ 0. 1. 2. 3. 4.] + [ 5. 6. 7. 8. 9.] + [ 10. 11. 12. 13. 14.]] + +Get the equivalent numpy array: +>>> print(a_bm.full()) +[[ 5. 1. 0. 0. 0.] + [ 10. 6. 2. 0. 0.] + [ 0. 11. 7. 3. 0.] + [ 0. 0. 12. 8. 4.] + [ 0. 0. 0. 13. 9.]] + +Take the transpose: +>>> print(a_bm.T.full()) +[[ 5. 10. 0. 0. 0.] + [ 1. 6. 11. 0. 0.] + [ 0. 2. 7. 12. 0.] + [ 0. 0. 3. 8. 13.] + [ 0. 0. 0. 4. 9.]] + +Banded matrix addition: +>>> b_bm = bm.BandMat(0, 1, np.arange(10, dtype=np.float64).reshape((2, 5))) +>>> print((a_bm + b_bm).full()) +[[ 10. 2. 0. 0. 0.] + [ 10. 12. 4. 0. 0.] + [ 0. 11. 14. 6. 0.] + [ 0. 0. 12. 16. 8.] + [ 0. 0. 0. 13. 18.]] + +Banded matrix multiplication: +>>> b_bm = bm.BandMat(0, 1, np.arange(10, dtype=np.float64).reshape((2, 5))) +>>> print(bm.dot_mm(a_bm, b_bm).full()) +[[ 25. 11. 2. 0. 0.] + [ 50. 46. 26. 6. 0.] + [ 0. 66. 71. 45. 12.] + [ 0. 0. 84. 100. 68.] + [ 0. 0. 0. 104. 133.]] +""" + +# Copyright 2013, 2014, 2015, 2016, 2017, 2018 Matt Shannon + +# This file is part of bandmat. +# See `License` for details of license and warranty. + +from .core import * +from .tensor import * \ No newline at end of file diff --git a/nnmnkwii/paramgen/_bandmat/core.pyx b/nnmnkwii/paramgen/_bandmat/core.pyx new file mode 100644 index 00000000..d5e1098d --- /dev/null +++ b/nnmnkwii/paramgen/_bandmat/core.pyx @@ -0,0 +1,704 @@ +# cython: language_level=3 + +"""Core banded matrix definitions and functions.""" + +# Copyright 2013, 2014, 2015, 2016, 2017, 2018 Matt Shannon + +# This file is part of bandmat. +# See `License` for details of license and warranty. + +from nnmnkwii.paramgen._bandmat import full as fl + +import numpy as np + +cimport numpy as cnp +cimport cython + +cnp.import_array() +cnp.import_ufunc() + +class BandMat(object): + """A memory-efficient representation of a square banded matrix. + + An N by N matrix with bandwidth D can be stored efficiently by storing its + band as a rectangular D by N matrix. + This class is a lightweight wrapper around a rectangular matrix being used + in this way, and stores additional details needed to recover the square + matrix such as the lower and upper bandwidths. + + The representation used for the rectangular matrix is the same one used by + BLAS and LAPACK (and thus scipy): the columns of the rectangular matrix are + (parts of) the columns of the square matrix being represented, and the + successive rows of the rectangular matrix give the superdiagonals, diagonal + and subdiagonals in order (starting with the outermost superdiagonal and + ending with the outermost subdiagonal). + See the "Band Storage" section of the LAPACK Users' Guide at + http://www.netlib.org/lapack/lug/node124.html or the docstring for + scipy.linalg.solve_banded for some examples. + + `l` is the number of subdiagonals stored and `u` is the number of + superdiagonals stored. + Thus `l` and `u` determine the band in which the entries of the represented + matrix can be non-zero. + `data` is the LAPACK-style banded matrix representation of the square + matrix (or of the transpose of the square matrix if `transposed` is True) + stored as a numpy array. + Note that if `transposed` is True, `l` and `u` still refer to the square + matrix being represented rather than to its transpose. + """ + def __init__(self, l, u, data, transposed=False): + self.l = l + self.u = u + self.data = data + self.transposed = transposed + + assert self.l >= 0 + assert self.u >= 0 + assert self.data.ndim == 2 + assert self.data.shape[0] == self.l + self.u + 1 + + def __repr__(self): + return ('BandMat(%r, %r, %r, transposed=%r)' % + (self.l, self.u, self.data, self.transposed)) + + @property + def size(self): + """Returns the size of this matrix.""" + return self.data.shape[1] + + @property + def T(self): + """Returns the transpose of this matrix. + + This is a cheap operation since it just sets a flag internally. + The returned BandMat has the same underlying data array as `self`. + """ + return BandMat(self.u, self.l, self.data, + transposed=not self.transposed) + + def full(self): + """Converts this BandMat to a conventional numpy array. + + The returned numpy array represents the same matrix as `self`. + """ + if self.transposed: + return fl.band_c(self.u, self.l, self.data).T + else: + return fl.band_c(self.l, self.u, self.data) + + def copy_exact(self): + """Returns a copy of this BandMat. + + The returned BandMat represents the same matrix as `self`, but has a + newly-created underlying data array. + It has the same `transposed` setting as `self`. + """ + return BandMat(self.l, self.u, self.data.copy(), + transposed=self.transposed) + + def copy(self): + """Returns a copy of this BandMat with transposed set to False. + + The returned BandMat represents the same matrix as `self`, but has a + newly-created underlying data array, and always has `transposed` set to + False. + """ + l = self.l + u = self.u + if self.transposed: + return BandMat(l, u, fl.band_cTe(u, l, self.data)) + else: + return BandMat(l, u, self.data.copy()) + + def equiv(self, l_new=None, u_new=None, transposed_new=None, + zero_extra=False): + """Returns an equivalent BandMat stored differently. + + The returned BandMat represents the same matrix as `self`, but has a + newly-created underlying data array, and has possibly different + parameters `l`, `u` and `transposed`. + The new values of these parameters are given by `l_new`, `u_new` and + `transposed_new`, with the corresponding value from `self` used if any + of these are None. + + If `zero_extra` is True then the underlying data array of the returned + BandMat is guaranteed to have extra entries set to zero. + """ + l = self.l + u = self.u + if l_new is None: + l_new = l + if u_new is None: + u_new = u + if transposed_new is None: + transposed_new = self.transposed + + assert l_new >= l + assert u_new >= u + + data_new = np.empty((l_new + u_new + 1, self.size)) + + ll, uu = (u, l) if transposed_new else (l, u) + ll_new, uu_new = (u_new, l_new) if transposed_new else (l_new, u_new) + + data_new[(uu_new - uu_new):(uu_new - uu)] = 0.0 + data_new[(uu_new + ll + 1):(uu_new + ll_new + 1)] = 0.0 + data_new_co = data_new[(uu_new - uu):(uu_new + ll + 1)] + if self.transposed == transposed_new: + data_new_co[:] = self.data + if zero_extra: + fl.zero_extra_entries(ll, uu, data_new_co) + else: + fl.band_cTe(uu, ll, self.data, target_rect=data_new_co) + + return BandMat(l_new, u_new, data_new, transposed=transposed_new) + + @cython.boundscheck(False) + def plus_equals_band_of(self, mat_bm, double mult=1.0): + """Adds a multiple of a band of another matrix to this matrix in-place. + + Any entries of `mat_bm` which lie outside of `self` are ignored. + Thus to implement conventional matrix addition, `self` must be large + enough to contain the result of the addition, i.e. `self` must have at + least as many subdiagonals and superdiagonals as `mat_bm`. + + The statement `target_bm.plus_equals_band_of(mat_bm, mult)` where + `target_bm` and `mat_bm` are BandMats is the equivalent of: + + target_full += band_ec(l, u, mat_full) * mult + + where `target_full` and `mat_full` are square numpy arrays. + Here `l` is `target_bm.l` and `u` is `target_bm.u`. + """ + cdef long frames + cdef long l_a, u_a, transposed_a + cdef long l_b, u_b, transposed_b + cdef cnp.ndarray[cnp.float64_t, ndim=2] a_data + cdef cnp.ndarray[cnp.float64_t, ndim=2] b_data + + l_a = self.l + u_a = self.u + a_data = self.data + transposed_a = self.transposed + assert l_a >= 0 + assert u_a >= 0 + assert a_data.shape[0] == l_a + u_a + 1 + + l_b = mat_bm.l + u_b = mat_bm.u + b_data = mat_bm.data + transposed_b = mat_bm.transposed + assert l_b >= 0 + assert u_b >= 0 + assert b_data.shape[0] == l_b + u_b + 1 + + frames = a_data.shape[1] + assert b_data.shape[1] == frames + + cdef long o + cdef unsigned long row_a, row_b + cdef long d_a, d_b + cdef unsigned long frame + + for o in range(-min(u_a, u_b), min(l_a, l_b) + 1): + row_a = (l_a - o) if transposed_a else (u_a + o) + row_b = (l_b - o) if transposed_b else (u_b + o) + d_a = o if transposed_a else 0 + d_b = o if transposed_b else 0 + for frame in range(max(0, -o), max(0, frames + min(0, -o))): + a_data[row_a, frame + d_a] += b_data[row_b, frame + d_b] * mult + + return + + def __add__(self, other): + """Sums two banded matrices. + + The expression `a_bm + b_bm` where `a_bm` and `b_bm` are BandMats is + the equivalent of: + + a_full + b_full + + where `a_full` and `b_full` are square numpy arrays. + """ + # (FIXME : is "is 0" the right way to test for this?) + if other is 0: + return self.copy_exact() + elif not isinstance(other, BandMat): + return NotImplemented + + assert self.size == other.size + c_bm = self.equiv(l_new=max(self.l, other.l), + u_new=max(self.u, other.u)) + c_bm.plus_equals_band_of(other) + return c_bm + + def __radd__(self, other): + if other is 0: + # this is so "sum" will work as expected + return self.copy_exact() + else: + return NotImplemented + + def __sub__(self, other): + """Subtracts one banded matrix from another. + + The expression `a_bm - b_bm` where `a_bm` and `b_bm` are BandMats is + the equivalent of: + + a_full - b_full + + where `a_full` and `b_full` are square numpy arrays. + """ + if other is 0: + return self.copy_exact() + elif not isinstance(other, BandMat): + return NotImplemented + + assert self.size == other.size + c_bm = self.equiv(l_new=max(self.l, other.l), + u_new=max(self.u, other.u)) + c_bm.plus_equals_band_of(other, mult=-1.0) + return c_bm + + def __rsub__(self, other): + if other is 0: + return self.__neg__() + else: + return NotImplemented + + def __iadd__(self, other): + """Adds another matrix to this matrix in-place. + + The statement `a_bm += b_bm` where `a_bm` and `b_bm` are BandMats is + the equivalent of: + + a_full += b_full + + where `a_full` and `b_full` are square numpy arrays. + """ + if other is 0: + return self + elif not isinstance(other, BandMat): + return NotImplemented + + assert self.size == other.size + assert self.l >= other.l + assert self.u >= other.u + + self.plus_equals_band_of(other) + return self + + def __isub__(self, other): + """Subtracts another matrix from this matrix in-place. + + The statement `a_bm -= b_bm` where `a_bm` and `b_bm` are BandMats is + the equivalent of: + + a_full -= b_full + + where `a_full` and `b_full` are square numpy arrays. + """ + if other is 0: + return self + elif not isinstance(other, BandMat): + return NotImplemented + + assert self.size == other.size + assert self.l >= other.l + assert self.u >= other.u + + self.plus_equals_band_of(other, mult=-1.0) + return self + + def __pos__(self): + """Take the positive of a banded matrix. + + The expression `+a_bm` where `a_bm` is a BandMat is the equivalent of: + + +a_full + + where `a_full` is a square numpy array. + """ + return BandMat(self.l, self.u, +self.data, + transposed=self.transposed) + + def __neg__(self): + """Take the negative of a banded matrix. + + The expression `-a_bm` where `a_bm` is a BandMat is the equivalent of: + + -a_full + + where `a_full` is a square numpy array. + """ + return BandMat(self.l, self.u, -self.data, + transposed=self.transposed) + + def __mul__(self, other): + """Multiplies a banded matrix by a scalar. + + The expression `a_bm * mult` where `a_bm` is a BandMat is the + equivalent of: + + a_full * mult + + where `a_full` is a square numpy array. + """ + try: + mult = float(other) + except: + return NotImplemented + + return BandMat(self.l, self.u, self.data * mult, + transposed=self.transposed) + + def __rmul__(self, other): + return self.__mul__(other) + + def __floordiv__(self, other): + """Floor-divides a banded matrix by a scalar. + + The expression `a_bm // mult` where `a_bm` is a BandMat is the + equivalent of: + + a_full // mult + + where `a_full` is a square numpy array. + """ + try: + mult = float(other) + except: + return NotImplemented + + return BandMat(self.l, self.u, self.data.__floordiv__(mult), + transposed=self.transposed) + + def __div__(self, other): + """Old-style divides a banded matrix by a scalar. + + When using old-style division (c.f. `from __future__ import division`), + the expression `a_bm / mult` where `a_bm` is a BandMat is the + equivalent of: + + a_full / mult + + where `a_full` is a square numpy array. + """ + try: + mult = float(other) + except: + return NotImplemented + + return BandMat(self.l, self.u, self.data.__div__(mult), + transposed=self.transposed) + + def __truediv__(self, other): + """Divides a banded matrix by a scalar. + + When using new-style division (c.f. `from __future__ import division`), + the expression `a_bm / mult` where `a_bm` is a BandMat is the + equivalent of: + + a_full / mult + + where `a_full` is a square numpy array. + """ + try: + mult = float(other) + except: + return NotImplemented + + return BandMat(self.l, self.u, self.data.__truediv__(mult), + transposed=self.transposed) + + def __imul__(self, other): + """Multiplies this matrix by a scalar in-place. + + The statement `a_bm *= mult` where `a_bm` is a BandMat is the + equivalent of: + + a_full *= mult + + where `a_full` is a square numpy array. + """ + try: + mult = float(other) + except: + return NotImplemented + + self.data *= mult + return self + + def __ifloordiv__(self, other): + """Floor-divides this matrix by a scalar in-place. + + The statement `a_bm //= mult` where `a_bm` is a BandMat is the + equivalent of: + + a_full //= mult + + where `a_full` is a square numpy array. + """ + try: + mult = float(other) + except: + return NotImplemented + + self.data.__ifloordiv__(mult) + return self + + def __idiv__(self, other): + """Old-style divides this matrix by a scalar in-place. + + When using old-style division (c.f. `from __future__ import division`), + the expression `a_bm /= mult` where `a_bm` is a BandMat is the + equivalent of: + + a_full /= mult + + where `a_full` is a square numpy array. + """ + try: + mult = float(other) + except: + return NotImplemented + + self.data.__itruediv__(mult) + return self + + def __itruediv__(self, other): + """Divides this matrix by a scalar in-place. + + When using new-style division (c.f. `from __future__ import division`), + the expression `a_bm /= mult` where `a_bm` is a BandMat is the + equivalent of: + + a_full /= mult + + where `a_full` is a square numpy array. + """ + try: + mult = float(other) + except: + return NotImplemented + + self.data.__itruediv__(mult) + return self + + def reverse_view(self): + """Returns the "time-reversed" version of this matrix. + + This function is implemented by taking a view of the underlying data + array. + To obtain a BandMat with a fresh underlying data array, `.copy_exact()` + should be called on the result. + + The expression `a_bm.reverse_view()` where `a_bm` is a BandMat is the + equivalent of: + + a_full[::-1, ::-1] + + where `a_full` is a square numpy array. + """ + return BandMat(self.u, self.l, self.data[::-1, ::-1], + transposed=self.transposed) + + def sub_matrix_view(self, start, end): + """Returns a sub-matrix of this matrix. + + This is implemented by taking a view of the underlying data array. + To obtain a BandMat with a fresh underlying data array, `.copy_exact()` + should be called on the result. + + The expression `a_bm.sub_matrix_view(start, end)` where `a_bm` is a + BandMat is the equivalent of: + + a_full[start:end, start:end] + + where `a_full` is a square numpy array. + """ + assert 0 <= start <= end <= self.size + return BandMat(self.l, self.u, self.data[:, start:end], + transposed=self.transposed) + + def embed_as_sub_matrix(self, start, size): + """Returns a new matrix containing this matrix as a sub-matrix. + + The statement `b_bm = a_bm.embed_as_sub_matrix(start, size)` where + `a_bm` is a BandMat is the equivalent of: + + end = start + len(a_full) + b_full = np.zeros((size, size)) + b_full[start:end, start:end] = a_full + + where `a_full` is a square numpy array. + """ + end = start + self.size + assert 0 <= start <= end <= size + + l, u = self.l, self.u + ll, uu = (u, l) if self.transposed else (l, u) + + data = np.empty((l + u + 1, size)) + data[:, 0:start] = 0.0 + data[:, start:end] = self.data + data[:, end:size] = 0.0 + fl.zero_extra_entries(ll, uu, data[:, start:end]) + return BandMat(self.l, self.u, data, transposed=self.transposed) + +def zeros(l, u, size): + """Returns the zero matrix as a BandMat. + + The returned BandMat `ret_bm` has `ret_bm.l == l`, `ret_bm.u == u` and + `ret_bm.size == size`. + """ + data = np.zeros((l + u + 1, size)) + return BandMat(l, u, data) + +def from_full(l, u, mat_full): + """Converts a square banded numpy array to a BandMat. + + The returned BandMat represents the same matrix as `mat_full`. + `mat_full` should be a numpy array representing a square matrix with zeros + outside the band specified by `l` and `u`. + An AssertionError is raised if `mat_full` has non-zero entries outside the + specified band. + """ + mat_bm = BandMat(l, u, fl.band_e(l, u, mat_full)) + # check `mat_full` is zero outside the specified band + assert np.all(mat_bm.full() == mat_full) + return mat_bm + +def band_c_bm(l, u, mat_rect): + """Constructs a BandMat from its band. + + The expression `band_c_bm(l, u, mat_rect)` where `mat_rect` is a + rectangular numpy array is the equivalent of: + + band_c(l, u, mat_rect) + + where the returned value is a square numpy array. + """ + return BandMat(l, u, mat_rect.copy()) + +def band_e_bm(l, u, mat_bm): + """Extracts a band of a BandMat. + + The band to extract is specified by `l` and `u`. + + The expression `band_e_bm(l, u, mat_bm)` where `mat_bm` is a BandMat is the + equivalent of: + + band_e(l, u, mat_full) + + where `mat_full` is a square numpy array. + """ + mat_bm_co = band_ec_bm_view(l, u, mat_bm) + mat_bm_new = mat_bm_co.equiv(l_new=l, u_new=u, + transposed_new=False, + zero_extra=True) + return mat_bm_new.data + +band_ce_bm = fl.band_ce + +def band_ec_bm_view(l, u, mat_bm): + """Effectively applies `band_e_bm` then `band_c_bm`, sharing data arrays. + + The combined operation has the effect of zeroing the entries outside the + band specified by `l` and `u`. + This is implemented by taking a view of `mat_bm`'s underlying data array. + To obtain a BandMat with a fresh underlying data array, `.copy_exact()` + should be called on the result. + """ + assert l >= 0 + assert u >= 0 + + l_in = mat_bm.l + u_in = mat_bm.u + l_out = min(l, l_in) + u_out = min(u, u_in) + if mat_bm.transposed: + return BandMat( + l_out, u_out, + mat_bm.data[(l_in - l_out):(l_in + u_out + 1)], + transposed = True + ) + else: + return BandMat( + l_out, u_out, + mat_bm.data[(u_in - u_out):(u_in + l_out + 1)] + ) + +def band_ec_bm(l, u, mat_bm): + """Effectively applies `band_e_bm` then `band_c_bm`. + + The combined operation has the effect of zeroing the entries outside the + band specified by `l` and `u`. + + The expression `band_ec_bm(l, u, mat_bm)` where `mat_bm` is a BandMat is + the equivalent of: + + band_ec(l, u, mat_full) + + where `mat_full` and the returned value are square numpy arrays. + """ + return band_ec_bm_view(l, u, mat_bm).copy_exact() + +def band_e_bm_common(*mat_bms): + """Extracts the band from several BandMats in a common way. + + The same band is used for each of the BandMats, and this band is the + smallest band that encompasses the bands of all the BandMats. + This allows BandMats to be compared with the same tools used to compare + conventional numpy arrays (and without having to explicitly construct the + corresponding full matrices). + + Example usage: + >>> import bandmat as bm + >>> cc = bm.band_e_bm_common + >>> def allclose(x, y): return x.shape == y.shape and np.allclose(x, y) + >>> a_bm = bm.zeros(3, 2, 10) + >>> b_bm = bm.zeros(1, 3, 10).T + >>> # these represent the same matrix + >>> allclose(a_bm.full(), b_bm.full()) + True + >>> # naive comparison doesn't show this + >>> allclose(a_bm.data, b_bm.data) + False + >>> # comparison after normalizing works + >>> allclose(*cc(a_bm, b_bm)) + True + """ + if not mat_bms: + return [] + else: + l = max([ mat_bm.l for mat_bm in mat_bms ]) + u = max([ mat_bm.u for mat_bm in mat_bms ]) + return [ band_e_bm(l, u, mat_bm) for mat_bm in mat_bms ] + +def diag(vec_or_mat_bm): + """Constructs a diagonal BandMat or extracts the diagonal from a BandMat. + + If `vec_or_mat_bm` is a vector `vec` (i.e. a one-dimensional numpy array), + then this function returns a BandMat representing the diagonal matrix with + diagonal `vec`. + If `vec_or_mat_bm` is a BandMat `mat_bm`, then this function returns the + vector consisting of the diagonal of the matrix represented by `mat_bm`. + In both cases the array storing the vector and the BandMat's underlying + data array share memory. + + The expression `diag(vec_or_mat_bm)` where `vec_or_mat_bm` is a vector or a + BandMat is the equivalent of: + + np.diag(vec_or_mat_full) + + where `vec_or_mat_full` is a vector or a square numpy array. + """ + if not isinstance(vec_or_mat_bm, BandMat): + vec = vec_or_mat_bm + assert vec.ndim == 1 + return BandMat(0, 0, np.reshape(vec, (1, -1))) + else: + mat_bm = vec_or_mat_bm + row = mat_bm.l if mat_bm.transposed else mat_bm.u + return mat_bm.data[row] diff --git a/nnmnkwii/paramgen/_bandmat/full.pyx b/nnmnkwii/paramgen/_bandmat/full.pyx new file mode 100644 index 00000000..77f78edd --- /dev/null +++ b/nnmnkwii/paramgen/_bandmat/full.pyx @@ -0,0 +1,200 @@ +# cython: language_level=3 + +"""Operations involving the bands of square matrices. + +This module provides operations involving the bands of square matrices which +are stored as numpy arrays. +For example it provides functions to construct a square matrix with given band, +and to extract the band from an arbitrary square matrix. +Though this is closely related to the representation of square banded matrices +used by the `BandMat` class in the `core` module, it is logically distinct. +""" + +# Copyright 2013, 2014, 2015, 2016, 2017, 2018 Matt Shannon + +# This file is part of bandmat. +# See `License` for details of license and warranty. + +import numpy as np + +cimport numpy as cnp +cimport cython + +cnp.import_array() +cnp.import_ufunc() + +@cython.boundscheck(False) +def band_c(long l, long u, cnp.ndarray[cnp.float64_t, ndim=2] mat_rect): + """Constructs a square banded matrix from its band. + + Given a rectangular numpy array `mat_rect`, this function returns a square + numpy array `mat_full` with its `u` superdiagonals, diagonal and `l` + subdiagonals given by the rows of `mat_rect`. + The part of each column of `mat_full` that lies within the band contains + the same entries as the corresponding column of `mat_rect`. + + Not all the entries of `mat_rect` affect `mat_full`. + The (l, u)-extra entries of a rectangular matrix `mat_rect` are defined as + the entries which have no effect on the result of `band_c(l, u, mat_rect)`. + They lie in the upper-left and bottom-right corners of `mat_rect`. + """ + assert l >= 0 + assert u >= 0 + assert mat_rect.shape[0] == l + u + 1 + + cdef long size + cdef cnp.ndarray[cnp.float64_t, ndim=2] mat_full + + size = mat_rect.shape[1] + mat_full = np.zeros((size, size)) + + cdef long i + cdef unsigned long row + cdef unsigned long j + + for i in range(-u, l + 1): + row = u + i + for j in range(max(0, -i), max(0, size + min(0, -i))): + mat_full[j + i, j] = mat_rect[row, j] + + return mat_full + +@cython.boundscheck(False) +def band_e(long l, long u, cnp.ndarray[cnp.float64_t, ndim=2] mat_full): + """Extracts a band of a square matrix. + + Given a square numpy array `mat_full`, returns a rectangular numpy array + `mat_rect` with rows corresponding to the `u` superdiagonals, the diagonal + and the `l` subdiagonals of `mat_full`. + The square matrix is "collapsed column-wise", i.e. each column of + `mat_rect` contains the same entries as the part of the corresponding + column of `mat_full` that lies within the band defined by (l, u). + The extra entries in the corners of `mat_rect` which do not correspond to + any entry in `mat_full` are set to zero. + """ + assert l >= 0 + assert u >= 0 + assert mat_full.shape[1] == mat_full.shape[0] + + cdef long size + cdef cnp.ndarray[cnp.float64_t, ndim=2] mat_rect + + size = mat_full.shape[0] + mat_rect = np.empty((l + u + 1, size)) + + cdef long i + cdef unsigned long row + cdef unsigned long j + + for i in range(-u, l + 1): + row = u + i + for j in range(size): + # "j + i < size" below uses wraparound on unsigned long to actually + # check that "0 <= j + i < size" + mat_rect[row, j] = mat_full[j + i, j] if j + i < size else 0.0 + + return mat_rect + +@cython.boundscheck(False) +def zero_extra_entries(long l, long u, + cnp.ndarray[cnp.float64_t, ndim=2] mat_rect): + """Zeroes the extra entries of a rectangular matrix. + + See the docstring for `band_c` for the definition of extra entries. + + The statement `zero_extra_entries(l, u, mat_rect)` is equivalent to: + + mat_rect[:] = band_e(l, u, band_c(l, u, mat_rect)) + + N.B. in-place, i.e. mutates `mat_rect`. + """ + assert l >= 0 + assert u >= 0 + assert mat_rect.shape[0] == l + u + 1 + + cdef long size + + size = mat_rect.shape[1] + + cdef long i + cdef unsigned long row + cdef unsigned long j + + for i in range(-u, 0): + row = u + i + for j in range(0, min(size, -i)): + mat_rect[row, j] = 0.0 + for i in range(1, l + 1): + row = u + i + for j in range(max(0, size - i), size): + mat_rect[row, j] = 0.0 + + return + +def band_ce(l, u, mat_rect): + """Effectively applies `band_c` then `band_e`. + + The combined operation has the effect of zeroing the extra entries in (a + copy of) the rectangular matrix `mat_rect`. + + The expression `band_ce(l, u, mat_rect)` is equivalent to: + + band_e(l, u, band_c(l, u, mat_rect)) + """ + mat_rect_new = mat_rect.copy() + zero_extra_entries(l, u, mat_rect_new) + return mat_rect_new + +def band_ec(l, u, mat_full): + """Effectively applies `band_e` then `band_c`. + + The combined operation has the effect of zeroing the entries outside the + band specified by `l` and `u` in (a copy of) the square matrix `mat_full`. + + The expression `band_ec(l, u, mat_full)` is equivalent to: + + band_c(l, u, band_e(l, u, mat_full)) + """ + return band_c(l, u, band_e(l, u, mat_full)) + +@cython.boundscheck(False) +def band_cTe(long l, long u, + cnp.ndarray[cnp.float64_t, ndim=2] mat_rect, + cnp.ndarray[cnp.float64_t, ndim=2] target_rect=None): + """Effectively applies `band_c` then transpose then `band_e`. + + The expression `band_cTe(l, u, mat_rect)` is equivalent to: + + band_e(u, l, band_c(l, u, mat_rect).T) + """ + assert l >= 0 + assert u >= 0 + assert mat_rect.shape[0] == l + u + 1 + + cdef long size + cdef long target_given = (target_rect is not None) + + size = mat_rect.shape[1] + if target_given: + assert target_rect.shape[0] == l + u + 1 + assert target_rect.shape[1] == size + else: + target_rect = np.empty((l + u + 1, size)) + + cdef long i + cdef unsigned long row, row_new + cdef unsigned long j + + for i in range(-u, l + 1): + row = u + i + row_new = l - i + for j in range(size): + # "j - i < size" below uses wraparound on unsigned long to actually + # check that "0 <= j - i < size" + target_rect[row_new, j] = (mat_rect[row, j - i] if j - i < size + else 0.0) + + if target_given: + return + else: + return target_rect diff --git a/nnmnkwii/paramgen/_bandmat/linalg.pyx b/nnmnkwii/paramgen/_bandmat/linalg.pyx new file mode 100644 index 00000000..541289bb --- /dev/null +++ b/nnmnkwii/paramgen/_bandmat/linalg.pyx @@ -0,0 +1,379 @@ +# cython: language_level=3 + +"""Linear algebra operations for banded matrices. + +Several of the functions in this module deal with symmetric banded matrices. +Symmetric banded matrices can in principle be stored even more efficiently than +general banded matrices, by not storing the superdiagonals (or subdiagonals). +However in bandmat the simpler, more explicit representation which stores the +whole band is used instead. +This is slightly less efficient in terms of memory and time, but results in a +more unified interface and simpler code in the author's limited experience. +Note that this is in line with the convention used by numpy and scipy for +symmetric "full" (i.e. non-banded) matrices, where both the upper and lower +triangles are represented in memory. +""" + +# Copyright 2013, 2014, 2015, 2016, 2017, 2018 Matt Shannon + +# This file is part of bandmat. +# See `License` for details of license and warranty. + +from nnmnkwii.paramgen import _bandmat as bm +from nnmnkwii.paramgen._bandmat import BandMat +from nnmnkwii.paramgen._bandmat import full as fl + +import numpy as np +import scipy.linalg as sla + +cimport numpy as cnp +cimport cython +from libc.math cimport sqrt + +cnp.import_array() +cnp.import_ufunc() + +@cython.boundscheck(False) +@cython.cdivision(True) +def _cholesky_banded(cnp.ndarray[cnp.float64_t, ndim=2] mat, + long overwrite_ab=False, + long lower=False): + """A cython reimplementation of scipy.linalg.cholesky_banded. + + Using lower == True is slightly more time and space efficient than using + lower == False for this implementation. + The default is False only for compatibility with the scipy implementation. + + (This simple implementation seems to be much faster than the scipy + implementation, especially for small bandwidths. + This is surprising since the scipy implementation wraps an LAPACK call + which one would expect to be very fast. + I am still unsure why this is, though I have checked it is not to do with + Fortran-contiguous versus C-contiguous ordering. + The scipy implementation performs a relatively expensive finite-value check + which is arguably not necessary for our implementation, but this only + accounts for part of the difference.) + """ + cdef unsigned long frames, depth + cdef cnp.ndarray[cnp.float64_t, ndim=2] mat_orig + + frames = mat.shape[1] + assert mat.shape[0] >= 1 + depth = mat.shape[0] - 1 + + mat_orig = mat + if not lower: + mat = fl.band_cTe(0, depth, mat) + elif not overwrite_ab: + mat = mat.copy() + + cdef unsigned long frame, k, l + cdef double v0, iv0, siv0 + cdef cnp.ndarray[cnp.float64_t, ndim=1] v = np.empty( + (depth,), + dtype=np.float64 + ) + + for frame in range(frames): + v0 = mat[(0), frame] + if v0 <= 0.0: + raise sla.LinAlgError( + '%d-th leading minor not positive definite' % (frame + 1) + ) + iv0 = 1.0 / v0 + siv0 = sqrt(iv0) + + for k in range(depth): + v[k] = mat[k + 1, frame] + + mat[(0), frame] = 1.0 / siv0 + for k in range(depth): + mat[k + 1, frame] = v[k] * siv0 + + for k in range(min(depth, frames - frame - 1)): + for l in range(depth - k): + mat[l, k + frame + 1] -= v[l + k] * v[k] * iv0 + + if not lower: + if overwrite_ab: + fl.band_cTe(depth, 0, mat, target_rect=mat_orig) + mat = mat_orig + else: + mat = fl.band_cTe(depth, 0, mat) + + return mat + +@cython.boundscheck(False) +@cython.cdivision(True) +def _solve_triangular_banded(cnp.ndarray[cnp.float64_t, ndim=2] a_rect, + cnp.ndarray[cnp.float64_t, ndim=1] b, + long transposed=False, + long lower=False, + long overwrite_b=False): + """A cython implementation of missing scipy.linalg.solve_triangular_banded. + + Solves A . x = b (transposed == False) or A.T . x = b (transposed == True) + for x, where A is a upper or lower triangular banded matrix, x and b are + vectors, . indicates matrix multiplication, and A.T denotes the transpose + of A. + The argument `lower` indicates whether `a_rect` stores a lower triangle or + an upper triangle. + + This replicates functionality present in the LAPACK routine dtbtrs. + If this function existed in scipy, it would probably be + scipy.linalg.solve_triangular_banded. + """ + cdef unsigned long frames, depth + cdef long solve_lower + cdef cnp.ndarray[cnp.float64_t, ndim=1] x + + frames = a_rect.shape[1] + assert a_rect.shape[0] >= 1 + depth = a_rect.shape[0] - 1 + assert b.shape[0] == frames + + solve_lower = (lower != transposed) + + if overwrite_b: + x = b + else: + x = np.empty_like(b) + + cdef unsigned long pos, frame, framePrev, k + cdef double diff, denom + + for pos in range(frames): + frame = pos if solve_lower else frames - 1 - pos + diff = b[frame] + if lower: + if transposed: + for k in range(1, min(depth + 1, pos + 1)): + framePrev = frame + k + diff -= a_rect[k, frame] * x[framePrev] + denom = a_rect[(0), frame] + else: + for k in range(1, min(depth + 1, pos + 1)): + framePrev = frame - k + diff -= a_rect[k, framePrev] * x[framePrev] + denom = a_rect[(0), frame] + else: + if transposed: + for k in range(1, min(depth + 1, pos + 1)): + framePrev = frame - k + diff -= a_rect[depth - k, frame] * x[framePrev] + denom = a_rect[depth, frame] + else: + for k in range(1, min(depth + 1, pos + 1)): + framePrev = frame + k + diff -= a_rect[depth - k, framePrev] * x[framePrev] + denom = a_rect[depth, frame] + if denom == 0.0: + raise sla.LinAlgError( + 'singular matrix: resolution failed at diagonal %d' % frame + ) + x[frame] = diff / denom + + return x + +def cholesky(mat_bm, lower=False, alternative=False): + """Computes the Cholesky factor of a positive definite banded matrix. + + The conventional Cholesky decomposition of a positive definite matrix P is + P = L . L.T, where the lower Cholesky factor L is lower-triangular, the + upper Cholesky factor L.T is upper-triangular, and . indicates matrix + multiplication. + Each positive definite matrix has a unique Cholesky decomposition. + Given a positive definite banded matrix, this function computes its + Cholesky factor, which is also a banded matrix. + + This function can also work with the alternative Cholesky decomposition. + The alternative Cholesky decomposition of P is P = L.T . L, where again L + is lower-triangular. + Whereas the conventional Cholesky decomposition is intimately connected + with linear Gaussian autoregressive probabilistic models defined forwards + in time, the alternative Cholesky decomposition is intimately connected + with linear Gaussian autoregressive probabilistic models defined backwards + in time. + + `mat_bm` (representing P above) should represent the whole matrix, not just + the lower or upper triangles. + If the matrix represented by `mat_bm` is not symmetric then the behavior of + this function is undefined (currently either the upper or lower triangle is + used to compute the Cholesky factor and the rest of the matrix is ignored). + If `lower` is True then the lower Cholesky factor is returned, and + otherwise the upper Cholesky factor is returned. + If `alternative` is True then the Cholesky factor returned is for the + alternative Cholesky decomposition, and otherwise it is for the + conventional Cholesky decomposition. + """ + if mat_bm.transposed: + mat_bm = mat_bm.T + + depth = mat_bm.l + assert mat_bm.u == depth + assert depth >= 0 + + l = depth if lower else 0 + u = 0 if lower else depth + + mat_half_data = mat_bm.data[(depth - u):(depth + l + 1)] + if alternative: + chol_data = _cholesky_banded(mat_half_data[::-1, ::-1], + lower=(not lower))[::-1, ::-1] + else: + chol_data = _cholesky_banded(mat_half_data, lower=lower) + chol_bm = BandMat(l, u, chol_data) + + return chol_bm + +def solve_triangular(a_bm, b): + """Solves a triangular banded matrix equation. + + Solves A . x = b for x, where A is a upper or lower triangular banded + matrix, x and b are vectors, and . indicates matrix multiplication. + """ + assert a_bm.l == 0 or a_bm.u == 0 + + transposed = a_bm.transposed + lower = (a_bm.u == 0) + # whether a_bm.data represents a lower or upper triangular matrix + data_lower = (lower != transposed) + x = _solve_triangular_banded(a_bm.data, b, + transposed=transposed, + lower=data_lower) + return x + +def cho_solve(chol_bm, b): + """Solves a matrix equation given the Cholesky decomposition of the matrix. + + Solves A . x = b for x, where A is a positive definite banded matrix, x and + b are vectors, and . indicates matrix multiplication. + `chol_bm` is a Cholesky factor of A (either upper or lower). + """ + assert chol_bm.l == 0 or chol_bm.u == 0 + + lower = (chol_bm.u == 0) + chol_lower_bm = chol_bm if lower else chol_bm.T + + x = solve_triangular( + chol_lower_bm.T, + solve_triangular(chol_lower_bm, b) + ) + return x + +def solve(a_bm, b): + """Solves a banded matrix equation. + + Solves A . x = b for x, where A is a square banded matrix, x and b are + vectors, and . indicates matrix multiplication. + """ + assert a_bm.size == len(b) + + # below is necessary since sla.solve_banded does not have a transpose flag, + # and so is not capable of working with the transposed matrix directly. + # (In fact (surprisingly!) the underlying LAPACK function dgbsv does not + # have a transpose flag either, though gbsv calls dgbtrf and dgbtrs, and + # dgbtrs does have a transpose flag, so LAPACK is capable of working with + # the transposed matrix directly in principle). + if a_bm.transposed: + a_bm = a_bm.equiv(transposed_new=False) + + if a_bm.size == 0: + x = np.zeros_like(b) + elif a_bm.size == 1: + # workaround for https://github.com/scipy/scipy/issues/8906 + x = b / a_bm.data[a_bm.u, 0] + else: + x = sla.solve_banded((a_bm.l, a_bm.u), a_bm.data, b) + return x + +def solveh(a_bm, b): + """Solves a positive definite matrix equation. + + Solves A . x = b for x, where A is a positive definite banded matrix, x and + b are vectors, and . indicates matrix multiplication. + + `a_bm` (representing A above) should represent the whole matrix, not just + the lower or upper triangles. + If the matrix represented by `a_bm` is not symmetric then the behavior of + this function is undefined (currently either the upper or lower triangle is + used and the rest of the matrix is ignored). + """ + chol_bm = cholesky(a_bm, lower=True) + x = cho_solve(chol_bm, b) + return x + +@cython.boundscheck(False) +def band_of_inverse_from_chol(chol_bm): + """Computes the band of the inverse of a positive definite banded matrix. + + Computes the band of the inverse of a positive definite banded matrix given + its Cholesky factor. + Equivalently, finds the band of the covariance matrix of a discrete time + process which is linear-Gaussian backwards in time. + + `chol_bm` can be either the lower or upper Cholesky factor. + """ + assert chol_bm.l == 0 or chol_bm.u == 0 + + if chol_bm.u != 0: + chol_bm = chol_bm.T + + cdef unsigned long depth + cdef long frames + cdef long l_chol, u_chol, transposed_chol + cdef cnp.ndarray[cnp.float64_t, ndim=2] chol_data + cdef cnp.ndarray[cnp.float64_t, ndim=2] cov_data + + l_chol = chol_bm.l + u_chol = chol_bm.u + chol_data = chol_bm.data + transposed_chol = chol_bm.transposed + assert l_chol >= 0 + assert u_chol == 0 + assert chol_data.shape[0] == l_chol + u_chol + 1 + + depth = l_chol + frames = chol_data.shape[1] + cov_data = np.zeros((depth * 2 + 1, frames)) + + cdef long frame_l + cdef unsigned long frame + cdef long curr_depth + cdef long k_1, k_2 + cdef unsigned long row_chol + cdef long d_chol + cdef double mult + + curr_depth = 0 + for frame_l in range(frames - 1, -1, -1): + frame = frame_l + row_chol = l_chol if transposed_chol else u_chol + mult = 1.0 / chol_data[row_chol, frame] + cov_data[depth, frame] = mult * mult + for k_2 in range(curr_depth, -1, -1): + for k_1 in range(curr_depth, 0, -1): + row_chol = l_chol - k_1 if transposed_chol else u_chol + k_1 + d_chol = k_1 if transposed_chol else 0 + cov_data[depth + k_2, frame] -= ( + chol_data[row_chol, frame + d_chol] * + cov_data[depth + k_1 - k_2, frame + k_2] * + mult + ) + for k_2 in range(curr_depth, 0, -1): + cov_data[depth - k_2, frame + k_2] = ( + cov_data[depth + k_2, frame] + ) + if curr_depth < depth: + curr_depth += 1 + + cov_bm = BandMat(depth, depth, cov_data) + return cov_bm + +def band_of_inverse(mat_bm): + """Computes band of the inverse of a positive definite banded matrix.""" + depth = mat_bm.l + assert mat_bm.u == depth + chol_bm = cholesky(mat_bm, lower=True) + band_of_inv_bm = band_of_inverse_from_chol(chol_bm) + return band_of_inv_bm diff --git a/nnmnkwii/paramgen/_bandmat/misc.pyx b/nnmnkwii/paramgen/_bandmat/misc.pyx new file mode 100644 index 00000000..b1bcd335 --- /dev/null +++ b/nnmnkwii/paramgen/_bandmat/misc.pyx @@ -0,0 +1,100 @@ +# cython: language_level=3 + +"""Assorted helpful functions.""" + +# Copyright 2013, 2014, 2015, 2016, 2017, 2018 Matt Shannon + +# This file is part of bandmat. +# See `License` for details of license and warranty. + +cimport numpy as cnp +cimport cython + +cnp.import_array() +cnp.import_ufunc() + +def fancy_plus_equals(cnp.ndarray[cnp.int_t, ndim=1] target_index_seq, + cnp.ndarray[cnp.float64_t, ndim=1] source, + cnp.ndarray[cnp.float64_t, ndim=1] target): + """Implements a += method with fancy indexing. + + Does what you might expect + target[target_index_seq] += source + to do. + """ + cdef unsigned long source_size + + source_size = source.shape[0] + assert target_index_seq.shape[0] == source_size + + cdef unsigned long source_index + cdef long target_index + + for source_index in range(source_size): + target_index = target_index_seq[source_index] + target[target_index] += source[source_index] + + return + +def fancy_plus_equals_2d(cnp.ndarray[cnp.int_t, ndim=1] target_index_seq, + cnp.ndarray[cnp.float64_t, ndim=2] source, + cnp.ndarray[cnp.float64_t, ndim=2] target): + """Implements a += method with fancy indexing. + + Does what you might expect + target[target_index_seq] += source + to do. + """ + cdef unsigned long source_size + cdef unsigned long size1 + + source_size = source.shape[0] + assert target_index_seq.shape[0] == source_size + size1 = source.shape[1] + assert target.shape[1] == size1 + + cdef unsigned long source_index + cdef long target_index + cdef unsigned long index1 + + for source_index in range(source_size): + target_index = target_index_seq[source_index] + for index1 in range(size1): + target[target_index, index1] += source[source_index, index1] + + return + +def fancy_plus_equals_3d(cnp.ndarray[cnp.int_t, ndim=1] target_index_seq, + cnp.ndarray[cnp.float64_t, ndim=3] source, + cnp.ndarray[cnp.float64_t, ndim=3] target): + """Implements a += method with fancy indexing. + + Does what you might expect + target[target_index_seq] += source + to do. + """ + cdef unsigned long source_size + cdef unsigned long size1 + cdef unsigned long size2 + + source_size = source.shape[0] + assert target_index_seq.shape[0] == source_size + size1 = source.shape[1] + assert target.shape[1] == size1 + size2 = source.shape[2] + assert target.shape[2] == size2 + + cdef unsigned long source_index + cdef long target_index + cdef unsigned long index1 + cdef unsigned long index2 + + for source_index in range(source_size): + target_index = target_index_seq[source_index] + for index1 in range(size1): + for index2 in range(size2): + target[target_index, index1, index2] += ( + source[source_index, index1, index2] + ) + + return diff --git a/nnmnkwii/paramgen/_bandmat/overlap.pyx b/nnmnkwii/paramgen/_bandmat/overlap.pyx new file mode 100644 index 00000000..0b030e37 --- /dev/null +++ b/nnmnkwii/paramgen/_bandmat/overlap.pyx @@ -0,0 +1,344 @@ +# cython: language_level=3 + +"""Functions to do with overlapping subtensors.""" + +# Copyright 2013, 2014, 2015, 2016, 2017, 2018 Matt Shannon + +# This file is part of bandmat. +# See `License` for details of license and warranty. + +from nnmnkwii.paramgen import _bandmat as bm + +import numpy as np + +cimport numpy as cnp +cimport cython + +cnp.import_array() +cnp.import_ufunc() + +@cython.boundscheck(False) +def sum_overlapping_v(cnp.ndarray[cnp.float64_t, ndim=2] contribs, + unsigned long step=1, + cnp.ndarray[cnp.float64_t, ndim=1] target=None): + """Computes the overlapped sum of a sequence of vectors. + + The overlapped sum of a sequence of vectors is defined as follows. + Suppose the vectors in `contribs` are "laid out" along some larger vector + such that each element of `contribs` is offset by `step` indices relative + to the previous element. + For example `contribs[0]` occupies the left edge of the larger vector, + `contribs[1]` starts at index `step` and `contribs[2]` starts at index + `step * 2`. + The overlapped sum is the sum of these vectors laid out in this way, which + is a vector. + + If `target` is None then a new vector is returned; otherwise the overlapped + sum is added to the vector `target`. + If `contribs` has shape `(num_contribs, width)` then the returned vector + (or `target` if specified) has size `num_contribs * step + width - step`. + The value of `width` here must be at least `step`. + """ + assert contribs.shape[1] >= step + + cdef unsigned long width = contribs.shape[1] + cdef unsigned long num_contribs = contribs.shape[0] + cdef unsigned long vec_size = num_contribs * step + width - step + cdef cnp.ndarray[cnp.float64_t, ndim=1] vec + + if target is None: + vec = np.zeros((vec_size,), dtype=contribs.dtype) + else: + assert target.shape[0] == vec_size + vec = target + + cdef unsigned long index, frame, k + + for index in range(num_contribs): + frame = index * step + for k in range(width): + vec[frame + k] += contribs[index, k] + + if target is None: + return vec + else: + return + +@cython.boundscheck(False) +def sum_overlapping_m(cnp.ndarray[cnp.float64_t, ndim=3] contribs, + unsigned long step=1, + target_bm=None): + """Computes the overlapped sum of a sequence of square matrices. + + The overlapped sum of a sequence of matrices is defined as follows. + Suppose the matrices in `contribs` are "laid out" along the diagonal of + some larger matrix such that each element of `contribs` is `step` indices + further down and `step` indices further right than the previous element. + For example `contribs[0]` occupies the top left corner of the larger matrix + and `contribs[1]` is `step` indices down and right of this. + The overlapped sum is the sum of these matrices laid out in this way, which + is a banded matrix. + For this function the contributions are square, so the resulting banded + matrix is also square. + + If `target_bm` is None then a new BandMat is returned; otherwise the + overlapped sum is added to the BandMat `target_bm`. + If `contribs` has shape `(num_contribs, width, width)` then the returned + BandMat (or `target_bm` if specified) has upper and lower bandwidth + `width - 1` and size `num_contribs * step + width - step`. + The value of `width` here must be at least `step`. + """ + assert contribs.shape[1] >= 1 and contribs.shape[1] >= step + assert contribs.shape[2] == contribs.shape[1] + + cdef unsigned long width = contribs.shape[1] + cdef unsigned long num_contribs = contribs.shape[0] + cdef unsigned long depth = contribs.shape[1] - 1 + cdef unsigned long mat_size = num_contribs * step + width - step + + if target_bm is None: + mat_bm = bm.zeros(depth, depth, mat_size) + else: + assert target_bm.l == depth and target_bm.u == depth + assert target_bm.size == mat_size + mat_bm = target_bm + + cdef unsigned long transposed = mat_bm.transposed + cdef cnp.ndarray[cnp.float64_t, ndim=2] mat_data = mat_bm.data + + cdef unsigned long index, frame, k, l + + if transposed: + for index in range(num_contribs): + frame = index * step + for k in range(width): + for l in range(width): + mat_data[depth + l - k, frame + k] += contribs[index, k, l] + else: + for index in range(num_contribs): + frame = index * step + for k in range(width): + for l in range(width): + mat_data[depth + k - l, frame + l] += contribs[index, k, l] + + if target_bm is None: + return mat_bm + else: + return + +@cython.boundscheck(False) +def extract_overlapping_v(cnp.ndarray[cnp.float64_t, ndim=1] vec, + unsigned long width, + unsigned long step=1, + cnp.ndarray[cnp.float64_t, ndim=2] target=None): + """Extracts overlapping subvectors from a vector. + + The result `subvectors` is a matrix consisting of a sequence of subvectors + of `vec`. + Specifically `subvectors[i]` is `vec[(i * step):(i * step + width)]`. + + If `target` is None then a new matrix is returned; otherwise the result is + written to the matrix `target` (and all elements of `target` are guaranteed + to be overwritten, so there is no need to zero it ahead of time). + The length of `vec` should be `num_subs * step + width - step` for some + `num_subs`, i.e. it should "fit" a whole number of subvectors. + The returned matrix has shape `(num_subs, width)`. + The value of `width` here must be at least `step`. + """ + assert step >= 1 + assert width >= step + assert vec.shape[0] >= width - step + assert (vec.shape[0] + step - width) % step == 0 + + cdef unsigned long num_subs = (vec.shape[0] + step - width) // step + cdef cnp.ndarray[cnp.float64_t, ndim=2] subvectors + + if target is None: + subvectors = np.empty((num_subs, width), dtype=vec.dtype) + else: + assert target.shape[0] == num_subs + assert target.shape[1] == width + subvectors = target + + cdef unsigned long index, frame, k + + for index in range(num_subs): + frame = index * step + for k in range(width): + subvectors[index, k] = vec[frame + k] + + if target is None: + return subvectors + else: + return + +@cython.boundscheck(False) +def extract_overlapping_m(mat_bm, + unsigned long step=1, + cnp.ndarray[cnp.float64_t, ndim=3] target=None): + """Extracts overlapping submatrices along the diagonal of a banded matrix. + + The result `submats` is rank-3 tensor consisting of a sequence of + submatrices from along the diagonal of the matrix represented by `mat_bm`. + The upper and lower bandwidth of `mat_bm` should be the same. + Let `width` be `mat_bm.l + 1`. + If `mat_full` is the matrix represented by `mat_bm` then `submats[i]` is + `mat_full[(i * step):(i * step + width), (i * step):(i * step + width)]`. + + If `target` is None then a new tensor is returned; otherwise the result is + written to the tensor `target` (and all elements of `target` are guaranteed + to be overwritten, so there is no need to zero it ahead of time). + The size of `mat_bm` should be `num_subs * step + width - step` for some + `num_subs`, i.e. it should "fit" a whole number of submatrices. + The returned matrix has shape `(num_subs, width, width)`. + The value of `width` here must be at least `step`. + """ + assert mat_bm.l == mat_bm.u + + cdef unsigned long width = mat_bm.l + 1 + + assert step >= 1 + assert width >= step + assert mat_bm.size >= width - step + assert (mat_bm.size + step - width) % step == 0 + + cdef unsigned long num_subs = (mat_bm.size + step - width) // step + cdef unsigned long depth = mat_bm.l + cdef unsigned long transposed = mat_bm.transposed + cdef cnp.ndarray[cnp.float64_t, ndim=2] mat_data = mat_bm.data + cdef cnp.ndarray[cnp.float64_t, ndim=3] submats + + if target is None: + submats = np.empty((num_subs, width, width), dtype=mat_data.dtype) + else: + assert target.shape[0] == num_subs + assert target.shape[1] == width + assert target.shape[2] == width + submats = target + + cdef unsigned long index, frame, k, l + + if transposed: + for index in range(num_subs): + frame = index * step + for k in range(width): + for l in range(width): + submats[index, k, l] = mat_data[depth + l - k, frame + k] + else: + for index in range(num_subs): + frame = index * step + for k in range(width): + for l in range(width): + submats[index, k, l] = mat_data[depth + k - l, frame + l] + + if target is None: + return submats + else: + return + +def sum_overlapping_v_chunked(contribs_chunks, width, target, step=1): + """A chunked version of sum_overlapping_v. + + The elements of the iterator `contribs_chunks` should be of the form + `(start, end, contribs)`. + For example if `contribs_all` has length 10 then + + sum_overlapping_v_chunked([ + (0, 3, contribs_all[0:3]), + (3, 10, contribs_all[3:10]) + ]) + + produces the same result as `sum_overlapping_v(contribs_all)`. + This can be used to construct more memory-efficient code in some cases + (though not in the above example). + """ + assert step >= 0 + overlap = width - step + assert overlap >= 0 + + for start, end, contribs in contribs_chunks: + sum_overlapping_v( + contribs, + step=step, + target=target[(start * step):(end * step + overlap)] + ) + +def sum_overlapping_m_chunked(contribs_chunks, target_bm, step=1): + """A chunked version of sum_overlapping_m. + + The elements of the iterator `contribs_chunks` should be of the form + `(start, end, contribs)`. + For example if `contribs_all` has length 10 then + + sum_overlapping_m_chunked([ + (0, 3, contribs_all[0:3]), + (3, 10, contribs_all[3:10]) + ]) + + produces the same result as `sum_overlapping_m(contribs_all)`. + This can be used to construct more memory-efficient code in some cases + (though not in the above example). + """ + assert step >= 0 + depth = target_bm.l + assert target_bm.u == depth + width = depth + 1 + overlap = width - step + assert overlap >= 0 + + for start, end, contribs in contribs_chunks: + sum_overlapping_m( + contribs, + step=step, + target_bm=target_bm.sub_matrix_view( + start * step, end * step + overlap + ) + ) + +def extract_overlapping_v_chunked(vec, width, chunk_size, step=1): + """A chunked version of extract_overlapping_v. + + An iterator over chunks of the output of extract_overlapping_v is returned. + This can be used to construct more memory-efficient code in some cases. + """ + assert step >= 1 + overlap = width - step + assert overlap >= 0 + num_subs = (len(vec) - overlap) // step + assert num_subs * step + overlap == len(vec) + assert num_subs >= 0 + assert chunk_size >= 1 + + for start in range(0, num_subs, chunk_size): + end = min(start + chunk_size, num_subs) + subvectors = extract_overlapping_v( + vec[(start * step):(end * step + overlap)], + width, + step=step + ) + yield start, end, subvectors + +def extract_overlapping_m_chunked(mat_bm, chunk_size, step=1): + """A chunked version of extract_overlapping_m. + + An iterator over chunks of the output of extract_overlapping_m is returned. + This can be used to construct more memory-efficient code in some cases. + """ + assert step >= 1 + depth = mat_bm.l + assert mat_bm.u == depth + width = depth + 1 + overlap = width - step + assert overlap >= 0 + num_subs = (mat_bm.size - overlap) // step + assert num_subs * step + overlap == mat_bm.size + assert num_subs >= 0 + assert chunk_size >= 1 + + for start in range(0, num_subs, chunk_size): + end = min(start + chunk_size, num_subs) + submats = extract_overlapping_m( + mat_bm.sub_matrix_view(start * step, end * step + overlap), + step=step + ) + yield start, end, submats diff --git a/nnmnkwii/paramgen/_bandmat/tensor.pyx b/nnmnkwii/paramgen/_bandmat/tensor.pyx new file mode 100644 index 00000000..118f7058 --- /dev/null +++ b/nnmnkwii/paramgen/_bandmat/tensor.pyx @@ -0,0 +1,308 @@ +# cython: language_level=3 + +"""Multiplication, etc using banded matrices.""" + +# Copyright 2013, 2014, 2015, 2016, 2017, 2018 Matt Shannon + +# This file is part of bandmat. +# See `License` for details of license and warranty. + +from nnmnkwii.paramgen._bandmat import core as bm_core + +import numpy as np + +cimport numpy as cnp +cimport cython + +cnp.import_array() +cnp.import_ufunc() + +@cython.boundscheck(False) +def dot_mv_plus_equals(a_bm, + cnp.ndarray[cnp.float64_t, ndim=1] b, + cnp.ndarray[cnp.float64_t, ndim=1] target): + """Multiplies a banded matrix by a vector, adding the result to a vector. + + The statement `dot_mv_plus_equals(a_bm, b, target)` where `a_bm` is a + BandMat is the equivalent of: + + target += np.dot(a_full, b) + + where `a_full` is a square numpy array. + """ + # (FIXME : could wrap corresponding BLAS routine (gbmv) instead) + cdef long frames + cdef long l_a, u_a, transposed + cdef cnp.ndarray[cnp.float64_t, ndim=2] a_data + + l_a = a_bm.l + u_a = a_bm.u + a_data = a_bm.data + transposed = a_bm.transposed + assert l_a >= 0 + assert u_a >= 0 + assert a_data.shape[0] == l_a + u_a + 1 + + frames = a_data.shape[1] + assert b.shape[0] == frames + assert target.shape[0] == frames + + cdef long o_a + cdef unsigned long row_a + cdef long d_a + cdef unsigned long frame + + for o_a in range(-u_a, l_a + 1): + row_a = (l_a - o_a) if transposed else (u_a + o_a) + d_a = 0 if transposed else -o_a + for frame in range(max(0, o_a), max(0, frames + min(0, o_a))): + target[frame] += ( + a_data[row_a, frame + d_a] * + b[frame - o_a] + ) + + return + +def dot_mv(a_bm, b): + """Multiplies a banded matrix by a vector. + + The expression `dot_mv(a_bm, b)` where `a_bm` is a BandMat is the + equivalent of: + + np.dot(a_full, b) + + where `a_full` is a square numpy array. + """ + size = len(b) + assert a_bm.size == size + c = np.zeros((size,)) + dot_mv_plus_equals(a_bm, b, c) + return c + +@cython.boundscheck(False) +def dot_mm_plus_equals(a_bm, b_bm, target_bm, + cnp.ndarray[cnp.float64_t, ndim=1] diag=None): + """Multiplies two banded matrices, adding the result to a banded matrix. + + If `diag` is None, computes A . B, where . indicates matrix multiplication, + and adds the result to `target_bm`. + If `diag` is not None, computes A . D . B, where D is the diagonal matrix + with diagonal `diag`, and adds result to `target_bm`. + + If `target_bm` does not contain enough rows to contain the result of A . B, + then only diagonals of A . B which contribute to the rows present in the + banded representation used by `target_bm` are computed. + This is more efficient in the case that not all diagonals of `target_bm` + are needed. + + The statement `dot_mm_plus_equals(a_bm, b_bm, target_bm, diag=None)` where + `a_bm`, `b_bm` and `target_bm` are BandMats is the equivalent of: + + target_full += band_ec(l, u, np.dot(a_full, b_full)) + + where `a_full`, `b_full` and `target_full` are square numpy arrays. + Here `l` is `target_bm.l` and `u` is `target_bm.u`. + If `diag` is not None then it is the equivalent of: + + target_full += ( + band_ec(l, u, np.dot(np.dot(a_full, np.diag(diag)), b_full)) + ) + """ + cdef long frames + cdef long l_a, u_a, transposed_a + cdef long l_b, u_b, transposed_b + cdef long l_c, u_c, transposed_c + cdef long use_diag + cdef cnp.ndarray[cnp.float64_t, ndim=2] a_data + cdef cnp.ndarray[cnp.float64_t, ndim=2] b_data + cdef cnp.ndarray[cnp.float64_t, ndim=2] c_data + + l_a = a_bm.l + u_a = a_bm.u + a_data = a_bm.data + transposed_a = a_bm.transposed + assert l_a >= 0 + assert u_a >= 0 + assert a_data.shape[0] == l_a + u_a + 1 + + l_b = b_bm.l + u_b = b_bm.u + b_data = b_bm.data + transposed_b = b_bm.transposed + assert l_b >= 0 + assert u_b >= 0 + assert b_data.shape[0] == l_b + u_b + 1 + + l_c = target_bm.l + u_c = target_bm.u + c_data = target_bm.data + transposed_c = target_bm.transposed + assert l_c >= 0 + assert u_c >= 0 + assert c_data.shape[0] == l_c + u_c + 1 + + frames = a_data.shape[1] + assert b_data.shape[1] == frames + assert c_data.shape[1] == frames + + use_diag = (diag is not None) + if use_diag: + assert diag.shape[0] == frames + + cdef long o_a, o_b, o_c + cdef unsigned long row_a, row_b, row_c + cdef long d_a, d_b, d_c + cdef unsigned long frame + + for o_c in range(-min(u_c, u_a + u_b), min(l_c, l_a + l_b) + 1): + for o_a in range(-min(u_a, l_b - o_c), min(l_a, u_b + o_c) + 1): + o_b = o_c - o_a + row_a = (l_a - o_a) if transposed_a else (u_a + o_a) + row_b = (l_b - o_b) if transposed_b else (u_b + o_b) + row_c = (l_c - o_c) if transposed_c else (u_c + o_c) + d_a = o_a if transposed_a else 0 + d_b = 0 if transposed_b else -o_b + d_c = o_a if transposed_c else -o_b + for frame in range(max(0, -o_a, o_b), + max(0, frames + min(0, -o_a, o_b))): + c_data[row_c, frame + d_c] += ( + a_data[row_a, frame + d_a] * + b_data[row_b, frame + d_b] * + (diag[frame] if use_diag else 1.0) + ) + + return + +def dot_mm(a_bm, b_bm, diag=None): + """Multiplies two banded matrices. + + If `diag` is None, computes A . B, where . indicates matrix multiplication. + If `diag` is not None, computes A . D . B, where D is the diagonal matrix + with diagonal `diag`. + + The expression `dot_mm(a_bm, b_bm, diag=None)` where `a_bm` and `b_bm` are + BandMats is the equivalent of: + + np.dot(a_full, b_full) + + where `a_full` and `b_full` are square numpy arrays. + If `diag` is not None then it is the equivalent of: + + np.dot(np.dot(a_full, np.diag(diag)), b_full) + """ + assert a_bm.size == b_bm.size + c_bm = bm_core.zeros(a_bm.l + b_bm.l, a_bm.u + b_bm.u, a_bm.size) + dot_mm_plus_equals(a_bm, b_bm, c_bm, diag=diag) + return c_bm + +def dot_mm_partial(l, u, a_bm, b_bm, diag=None): + """Computes part of the result of multiplying two banded matrices. + + If `diag` is None, computes part of C = A . B, where . indicates matrix + multiplication. + If `diag` is not None, computes part of C = A . D . B, where D is the + diagonal matrix with diagonal `diag`. + + This function only computes the diagonals of C that are within the band + specified by `l` and `u`. + This is desirable in the case that not all diagonals of C are needed. + + The expression `dot_mm_partial(l, u, a_bm, b_bm, diag=None)` where `a_bm` + and `b_bm` are BandMats is the equivalent of: + + band_ec(l, u, np.dot(a_full, b_full)) + + where `a_full` and `b_full` are square numpy arrays. + If `diag` is not None then it is the equivalent of: + + band_ec(l, u, np.dot(np.dot(a_full, np.diag(diag)), b_full)) + """ + assert a_bm.size == b_bm.size + c_bm = bm_core.zeros(l, u, a_bm.size) + dot_mm_plus_equals(a_bm, b_bm, c_bm, diag=diag) + return c_bm + +def dot_mmm_partial(l, u, a_bm, b_bm, c_bm): + """Computes part of the result of multiplying three banded matrices. + + Computes D = A . (B . C), where . indicates matrix multiplication. + + This function only computes the diagonals of D that are within the band + specified by `l` and `u`. + Furthermore the intermediate result E = B . C is also only partially + computed, with rows that can no have influence on the final result never + being computed. + This behavior is desirable in the case that not all diagonals of D are + needed. + + The expression `dot_mmm_partial(l, u, a_bm, b_bm, c_bm)` where `a_bm`, + `b_bm` and `c_bm` are BandMats is the equivalent of: + + band_ec(l, u, np.dot(a_full, np.dot(b_full, c_full))) + + where `a_full`, `b_full` and `c_full` are square numpy arrays. + """ + l_i = l + a_bm.u + u_i = u + a_bm.l + return dot_mm_partial(l, u, a_bm, dot_mm_partial(l_i, u_i, b_bm, c_bm)) + +@cython.boundscheck(False) +def band_of_outer_plus_equals(cnp.ndarray[cnp.float64_t, ndim=1] a_vec, + cnp.ndarray[cnp.float64_t, ndim=1] b_vec, + target_bm, + double mult=1.0): + """Adds the band of the outer product of two vectors to a banded matrix. + + The statement `band_of_outer_plus_equals(a_vec, b_vec, target_bm, mult)` + where `target_bm` is a BandMat is the equivalent of: + + target_full += band_ec(l, u, np.outer(a_vec, b_vec) * mult) + + where `target_full` is a square numpy array. + Here `l` is `target_bm.l` and `u` is `target_bm.u`. + """ + cdef long frames + cdef long l_c, u_c, transposed_c + cdef cnp.ndarray[cnp.float64_t, ndim=2] c_data + + l_c = target_bm.l + u_c = target_bm.u + c_data = target_bm.data + transposed_c = target_bm.transposed + assert l_c >= 0 + assert u_c >= 0 + assert c_data.shape[0] == l_c + u_c + 1 + + frames = c_data.shape[1] + assert a_vec.shape[0] == frames + assert b_vec.shape[0] == frames + + cdef long o_c + cdef unsigned long row_c + cdef long d_c + cdef unsigned long frame + + for o_c in range(-u_c, l_c + 1): + row_c = (l_c - o_c) if transposed_c else (u_c + o_c) + d_c = o_c if transposed_c else 0 + for frame in range(max(0, -o_c), max(0, frames + min(0, -o_c))): + c_data[row_c, frame + d_c] += ( + a_vec[frame + o_c] * b_vec[frame] * mult + ) + + return + +def trace_dot(a_bm, b_bm): + """Convenience method to compute the matrix inner product. + + The expression `trace_dot(a_bm, b_bm)` where `a_bm` and `b_bm` are BandMats + is the equivalent of: + + np.trace(np.dot(a_full.T, b_full)) + + where `a_full` and `b_full` are square numpy arrays. + + This operation is a valid inner product over the vector space of square + matrices of a given size. + """ + return np.sum(bm_core.diag(dot_mm_partial(0, 0, a_bm.T, b_bm))) diff --git a/nnmnkwii/paramgen/_mlpg.py b/nnmnkwii/paramgen/_mlpg.py index 3f861106..95811a52 100644 --- a/nnmnkwii/paramgen/_mlpg.py +++ b/nnmnkwii/paramgen/_mlpg.py @@ -3,8 +3,8 @@ import numpy as np -import bandmat as bm -import bandmat.linalg as bla +from nnmnkwii.paramgen import _bandmat as bm +from nnmnkwii.paramgen._bandmat import linalg as bla from scipy.linalg import solve_banded from nnmnkwii.util.linalg import cholesky_inv_banded from .mlpg_helper import full_window_mat as _full_window_mat diff --git a/nnmnkwii/paramgen/mlpg_helper.pyx b/nnmnkwii/paramgen/mlpg_helper.pyx index e41c6873..f841c199 100644 --- a/nnmnkwii/paramgen/mlpg_helper.pyx +++ b/nnmnkwii/paramgen/mlpg_helper.pyx @@ -1,6 +1,7 @@ # coding: utf-8 # cython: wraparound = False # cython: boundscheck = False +# cython: language_level=3 import numpy as np cimport numpy as np diff --git a/nnmnkwii/preprocessing/generic.py b/nnmnkwii/preprocessing/generic.py index d002c1ff..d4b00df9 100644 --- a/nnmnkwii/preprocessing/generic.py +++ b/nnmnkwii/preprocessing/generic.py @@ -1,20 +1,20 @@ # coding: utf-8 -from __future__ import division, print_function, absolute_import +from __future__ import absolute_import, division, print_function import numpy as np -from sklearn.utils.extmath import _incremental_mean_and_var from scipy import signal +from sklearn.utils.extmath import _incremental_mean_and_var # from: https://github.com/scikit-learn/scikit-learn/blob/0.22.2/sklearn/preprocessing/_data.py def _handle_zeros_in_scale(scale, copy=True): - ''' Makes sure that whenever scale is zero, we handle it correctly. - This happens in most scalers when we have constant features.''' + """Makes sure that whenever scale is zero, we handle it correctly. + This happens in most scalers when we have constant features.""" # if we are fitting on 1D arrays, scale might be a scalar if np.isscalar(scale): - if scale == .0: - scale = 1. + if scale == 0.0: + scale = 1.0 return scale elif isinstance(scale, np.ndarray): if copy: @@ -46,7 +46,7 @@ def _asint(x): # ugly wrapper to support torch/numpy arrays isnumpy = isinstance(x, np.ndarray) isscalar = np.isscalar(x) - return x.astype(np.int) if isnumpy else int(x) if isscalar else x.long() + return x.astype(int) if isnumpy else int(x) if isscalar else x.long() def _asfloat(x): @@ -104,7 +104,7 @@ def inv_mulaw(y, mu=256): :func:`nnmnkwii.preprocessing.mulaw_quantize` :func:`nnmnkwii.preprocessing.inv_mulaw_quantize` """ - return _sign(y) * (1.0 / mu) * ((1.0 + mu)**_abs(y) - 1.0) + return _sign(y) * (1.0 / mu) * ((1.0 + mu) ** _abs(y) - 1.0) def mulaw_quantize(x, mu=256): @@ -197,8 +197,8 @@ def preemphasis(x, coef=0.97): >>> y = P.preemphasis(x, coef=0.97) >>> assert x.shape == y.shape """ - b = np.array([1., -coef], x.dtype) - a = np.array([1.], x.dtype) + b = np.array([1.0, -coef], x.dtype) + a = np.array([1.0], x.dtype) return signal.lfilter(b, a, x) @@ -224,8 +224,8 @@ def inv_preemphasis(x, coef=0.97): >>> x_hat = P.inv_preemphasis(P.preemphasis(x, coef=0.97), coef=0.97) >>> assert np.allclose(x, x_hat) """ - b = np.array([1.], x.dtype) - a = np.array([1., -coef], x.dtype) + b = np.array([1.0], x.dtype) + a = np.array([1.0, -coef], x.dtype) return signal.lfilter(b, a, x) @@ -281,13 +281,17 @@ def delta_features(x, windows): T, D = x.shape assert len(windows) > 0 combined_features = np.empty((T, D * len(windows)), dtype=x.dtype) - for idx, (_, _, window) in enumerate(windows): - combined_features[:, D * idx:D * idx + - D] = _apply_delta_window(x, window) + # NOTE: bandmat case + if isinstance(windows[0], tuple): + for idx, (_, _, window) in enumerate(windows): + combined_features[:, D * idx : D * idx + D] = _apply_delta_window(x, window) + else: + for idx, window in enumerate(windows): + combined_features[:, D * idx : D * idx + D] = _apply_delta_window(x, window) return combined_features -def trim_zeros_frames(x, eps=1e-7, trim='b'): +def trim_zeros_frames(x, eps=1e-7, trim="b"): """Remove leading and/or trailing zeros frames. Similar to :func:`numpy.trim_zeros`, trimming trailing zeros features. @@ -307,28 +311,28 @@ def trim_zeros_frames(x, eps=1e-7, trim='b'): >>> y = trim_zeros_frames(x) """ - assert trim in {'f', 'b', 'fb'} + assert trim in {"f", "b", "fb"} T, D = x.shape s = np.sum(np.abs(x), axis=1) - s[s < eps] = 0. + s[s < eps] = 0.0 - if trim == 'f': - return x[len(x) - len(np.trim_zeros(s, trim=trim)):] - elif trim == 'b': + if trim == "f": + return x[len(x) - len(np.trim_zeros(s, trim=trim)) :] + elif trim == "b": end = len(np.trim_zeros(s, trim=trim)) - len(x) if end == 0: return x else: - return x[: end] - elif trim == 'fb': - f = len(np.trim_zeros(s, trim='f')) - b = len(np.trim_zeros(s, trim='b')) + return x[:end] + elif trim == "fb": + f = len(np.trim_zeros(s, trim="f")) + b = len(np.trim_zeros(s, trim="b")) end = b - len(x) if end == 0: - return x[len(x) - f:] + return x[len(x) - f :] else: - return x[len(x) - f: end] + return x[len(x) - f : end] def remove_zeros_frames(x, eps=1e-7): @@ -351,7 +355,7 @@ def remove_zeros_frames(x, eps=1e-7): """ T, D = x.shape s = np.sum(np.abs(x), axis=1) - s[s < eps] = 0. + s[s < eps] = 0.0 return x[s > eps] @@ -413,8 +417,7 @@ def adjust_frame_length(x, pad=True, divisible_by=1, **kwargs): return x -def adjust_frame_lengths(x, y, pad=True, ensure_even=False, divisible_by=1, - **kwargs): +def adjust_frame_lengths(x, y, pad=True, ensure_even=False, divisible_by=1, **kwargs): """Adjust frame lengths given two feature vectors or matrices. This ensures that two feature vectors or matrices have same number of @@ -493,8 +496,14 @@ def adjust_frame_lengths(x, y, pad=True, ensure_even=False, divisible_by=1, return x, y -def meanvar(dataset, lengths=None, mean_=0., var_=0., - last_sample_count=0, return_last_sample_count=False): +def meanvar( + dataset, + lengths=None, + mean_=0.0, + var_=0.0, + last_sample_count=0, + return_last_sample_count=False, +): """Mean/variance computation given a iterable dataset Dataset can have variable length samples. In that cases, you need to @@ -532,9 +541,8 @@ def meanvar(dataset, lengths=None, mean_=0., var_=0., for idx, x in enumerate(dataset): if lengths is not None: - x = x[:lengths[idx]] - mean_, var_, _ = _incremental_mean_and_var( - x, mean_, var_, last_sample_count) + x = x[: lengths[idx]] + mean_, var_, _ = _incremental_mean_and_var(x, mean_, var_, last_sample_count) last_sample_count += len(x) mean_, var_ = mean_.astype(dtype), var_.astype(dtype) @@ -544,8 +552,14 @@ def meanvar(dataset, lengths=None, mean_=0., var_=0., return mean_, var_ -def meanstd(dataset, lengths=None, mean_=0., var_=0., - last_sample_count=0, return_last_sample_count=False): +def meanstd( + dataset, + lengths=None, + mean_=0.0, + var_=0.0, + last_sample_count=0, + return_last_sample_count=False, +): """Mean/std-deviation computation given a iterable dataset Dataset can have variable length samples. In that cases, you need to @@ -579,8 +593,9 @@ def meanstd(dataset, lengths=None, mean_=0., var_=0., >>> lengths = [len(y) for y in Y] >>> data_mean, data_std = meanstd(Y, lengths) """ - ret = meanvar(dataset, lengths, mean_, var_, - last_sample_count, return_last_sample_count) + ret = meanvar( + dataset, lengths, mean_, var_, last_sample_count, return_last_sample_count + ) m, v = ret[0], ret[1] v = _handle_zeros_in_scale(np.sqrt(v)) if return_last_sample_count: @@ -617,7 +632,7 @@ def minmax(dataset, lengths=None): for idx, x in enumerate(dataset): if lengths is not None: - x = x[:lengths[idx]] + x = x[: lengths[idx]] min_ = np.minimum(min_, np.min(x, axis=(0,))) max_ = np.maximum(max_, np.max(x, axis=(0,))) @@ -674,8 +689,9 @@ def inv_scale(x, data_mean, data_std): def __minmax_scale_factor(data_min, data_max, feature_range): data_range = data_max - data_min - scale = (feature_range[1] - feature_range[0]) / \ - _handle_zeros_in_scale(data_range, copy=False) + scale = (feature_range[1] - feature_range[0]) / _handle_zeros_in_scale( + data_range, copy=False + ) return scale @@ -718,8 +734,9 @@ def minmax_scale_params(data_min, data_max, feature_range=(0, 1)): return min_, scale_ -def minmax_scale(x, data_min=None, data_max=None, feature_range=(0, 1), - scale_=None, min_=None): +def minmax_scale( + x, data_min=None, data_max=None, feature_range=(0, 1), scale_=None, min_=None +): """Min/max scaling for given a single data. Given data min, max and feature range, apply min/max normalization to data. @@ -761,8 +778,10 @@ def minmax_scale(x, data_min=None, data_max=None, feature_range=(0, 1), >>> scaled_x = minmax_scale(X[0], data_min, data_max) """ if (scale_ is None or min_ is None) and (data_min is None or data_max is None): - raise ValueError(""" -`data_min` and `data_max` or `scale_` and `min_` must be specified to perform minmax scale""") + raise ValueError( + """ +`data_min` and `data_max` or `scale_` and `min_` must be specified to perform minmax scale""" + ) if scale_ is None: scale_ = __minmax_scale_factor(data_min, data_max, feature_range) if min_ is None: @@ -770,8 +789,9 @@ def minmax_scale(x, data_min=None, data_max=None, feature_range=(0, 1), return x * scale_ + min_ -def inv_minmax_scale(x, data_min=None, data_max=None, feature_range=(0, 1), - scale_=None, min_=None): +def inv_minmax_scale( + x, data_min=None, data_max=None, feature_range=(0, 1), scale_=None, min_=None +): """Inverse transform of min/max scaling for given a single data. Given data min, max and feature range, apply min/max denormalization to data. @@ -800,8 +820,10 @@ def inv_minmax_scale(x, data_min=None, data_max=None, feature_range=(0, 1), :func:`nnmnkwii.preprocessing.minmax_scale_params` """ if (scale_ is None or min_ is None) and (data_min is None or data_max is None): - raise ValueError(""" -`data_min` and `data_max` or `scale_` and `min_` must be specified to perform inverse of minmax scale""") + raise ValueError( + """ +`data_min` and `data_max` or `scale_` and `min_` must be specified to perform inverse of minmax scale""" + ) if scale_ is None: scale_ = __minmax_scale_factor(data_min, data_max, feature_range) if min_ is None: diff --git a/nnmnkwii/util/files.py b/nnmnkwii/util/files.py index 58a54ab2..f6995bd0 100644 --- a/nnmnkwii/util/files.py +++ b/nnmnkwii/util/files.py @@ -1,14 +1,10 @@ -# coding: utf-8 -from __future__ import division, print_function, absolute_import +from glob import glob +from os.path import join +import numpy as np import pkg_resources - from nnmnkwii.datasets import FileDataSource -import numpy as np -from glob import glob -from os.path import join - def example_label_file(phone_level=False): """Get path of example HTS-style full-context lable file. @@ -71,7 +67,7 @@ def example_question_file(): Examples: >>> from nnmnkwii.util import example_question_file >>> from nnmnkwii.io import hts - >>> binary_dict, continuous_dict = hts.load_question_set(example_question_file()) + >>> binary_dict, numeric_dict = hts.load_question_set(example_question_file()) """ return pkg_resources.resource_filename( __name__, '_example_data/questions-radio_dnn_416.hed') diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..a78cca3c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires = [ + "wheel", + "setuptools", + "cython>=0.21.0", + "numpy>=1.8.0", + "scipy", +] diff --git a/setup.py b/setup.py index 99494abf..3a0bfafc 100644 --- a/setup.py +++ b/setup.py @@ -12,17 +12,20 @@ import os import sys -version = '0.0.23' +version = "0.0.23" # Adapted from https://github.com/pytorch/pytorch cwd = os.path.dirname(os.path.abspath(__file__)) -if os.getenv('NNMNKWII_BUILD_VERSION'): - version = os.getenv('NNMNKWII_BUILD_VERSION') +if os.getenv("NNMNKWII_BUILD_VERSION"): + version = os.getenv("NNMNKWII_BUILD_VERSION") else: try: - sha = subprocess.check_output( - ['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip() - version += '+' + sha[:7] + sha = ( + subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=cwd) + .decode("ascii") + .strip() + ) + version += "+" + sha[:7] except subprocess.CalledProcessError: pass except IOError: # FileNotFoundError for python 3 @@ -36,11 +39,11 @@ def finalize_options(self): # Prevent numpy from thinking it is still in its setup process: __builtins__.__NUMPY_SETUP__ = False import numpy + self.include_dirs.append(numpy.get_include()) class build_py(setuptools.command.build_py.build_py): - def run(self): self.create_version_file() setuptools.command.build_py.build_py.run(self) @@ -48,14 +51,13 @@ def run(self): @staticmethod def create_version_file(): global version, cwd - print('-- Building version ' + version) - version_path = os.path.join(cwd, 'nnmnkwii', 'version.py') - with open(version_path, 'w') as f: + print("-- Building version " + version) + version_path = os.path.join(cwd, "nnmnkwii", "version.py") + with open(version_path, "w") as f: f.write("__version__ = '{}'\n".format(version)) class develop(setuptools.command.develop.develop): - def run(self): build_py.create_version_file() setuptools.command.develop.develop.run(self) @@ -63,9 +65,10 @@ def run(self): cmdclass = {"build_py": build_py, "develop": develop} -min_cython_ver = '0.28.0' +min_cython_ver = "0.28.0" try: import Cython + ver = Cython.__version__ _CYTHON_INSTALLED = ver >= LooseVersion(min_cython_ver) except ImportError: @@ -73,20 +76,22 @@ def run(self): try: if not _CYTHON_INSTALLED: - raise ImportError('No supported version of Cython installed.') + raise ImportError("No supported version of Cython installed.") from Cython.Distutils import build_ext + cython = True except ImportError: cython = False include_dirs = [] -cmdclass['build_ext'] = build_ext +cmdclass["build_ext"] = build_ext if cython: - ext = '.pyx' + ext = ".pyx" import numpy as np + include_dirs.insert(0, np.get_include()) else: - ext = '.c' + ext = ".c" print("Building extentions from pre-generated C source") if not os.path.exists(join("nnmnkwii", "paramgen", "mlpg_helper" + ext)): raise RuntimeError("Cython is required to generate C code.") @@ -104,56 +109,74 @@ def run(self): sources=[join("nnmnkwii", "paramgen", "mlpg_helper" + ext)], include_dirs=include_dirs, language="c", - extra_compile_args=["-std=c99"] + extra_compile_args=["-std=c99"], ), ] +### [start] bandmat related code ### +# https://github.com/MattShannon/bandmat/issues/10 +bandmat_upstream_fixed = False +if not bandmat_upstream_fixed: + cython_locs = [ + ("nnmnkwii", "paramgen", "_bandmat", "full"), + ("nnmnkwii", "paramgen", "_bandmat", "core"), + ("nnmnkwii", "paramgen", "_bandmat", "tensor"), + ("nnmnkwii", "paramgen", "_bandmat", "linalg"), + ("nnmnkwii", "paramgen", "_bandmat", "misc"), + ("nnmnkwii", "paramgen", "_bandmat", "overlap"), + ] + ext_modules.extend( + Extension( + ".".join(loc), + [join(*loc) + ".pyx"], + extra_compile_args=["-O3"], + include_dirs=include_dirs, + ) + for loc in cython_locs + ) +### [end] bandmat related code ### + def package_files(directory): # https://stackoverflow.com/questions/27664504/ paths = [] for (path, directories, filenames) in os.walk(directory): for filename in filenames: - paths.append(os.path.join('..', path, filename)) + paths.append(os.path.join("..", path, filename)) return paths package_data = package_files("./nnmnkwii/util/_example_data") install_requires = [ - 'scipy', - 'cython >= ' + min_cython_ver, - 'fastdtw', - 'scikit-learn', - 'pysptk >= 0.1.17', - 'tqdm', + "scipy", + "cython >= " + min_cython_ver, + "fastdtw", + "scikit-learn", + "pysptk >= 0.1.17", + "tqdm", ] -# Workaround for python >= 3.7 and bandmat -# https://github.com/r9y9/deepvoice3_pytorch/issues/154 -if sys.version_info[:2] < (3, 7): - install_requires.append('bandmat >= 0.7') - setup( - name='nnmnkwii', + name="nnmnkwii", version=version, - description='Library to build speech synthesis systems designed for easy and fast prototyping.', - long_description=open('README.md', 'rb').read().decode("utf-8"), - long_description_content_type='text/markdown', - author='Ryuichi Yamamoto', - author_email='zryuichi@gmail.com', - url='https://github.com/r9y9/nnmnkwii', - license='MIT', + description="Library to build speech synthesis systems designed for easy and fast prototyping.", + long_description=open("README.md", "rb").read().decode("utf-8"), + long_description_content_type="text/markdown", + author="Ryuichi Yamamoto", + author_email="zryuichi@gmail.com", + url="https://github.com/r9y9/nnmnkwii", + license="MIT", packages=find_packages(exclude=["tests", "perf"]), - package_data={'': package_data}, + package_data={"": package_data}, ext_modules=ext_modules, cmdclass=cmdclass, setup_requires=["numpy >= 1.11.0"], install_requires=install_requires, - tests_require=['nose', 'coverage'], + tests_require=["nose", "coverage"], extras_require={ - 'docs': ['numpydoc', 'sphinx_rtd_theme'], - 'test': ['nose', 'pyworld', 'librosa'], + "docs": ["numpydoc", "sphinx_rtd_theme"], + "test": ["nose", "pyworld", "librosa"], }, classifiers=[ "Operating System :: Microsoft :: Windows", @@ -173,5 +196,5 @@ def package_files(directory): "Intended Audience :: Science/Research", "Intended Audience :: Developers", ], - keywords=["Research"] + keywords=["Research"], ) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index b40b9668..70bbe11c 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -1,15 +1,15 @@ -from __future__ import division, print_function, absolute_import +from __future__ import absolute_import, division, print_function -from nnmnkwii.datasets import FileSourceDataset, PaddedFileSourceDataset -from nnmnkwii.datasets import MemoryCacheFramewiseDataset -from nnmnkwii.datasets import FileDataSource -from nnmnkwii.util import example_file_data_sources_for_acoustic_model -from nnmnkwii.util import example_file_data_sources_for_duration_model +from os.path import dirname, join import numpy as np -from nose.tools import raises +from nnmnkwii.datasets import (FileDataSource, FileSourceDataset, + MemoryCacheFramewiseDataset, + PaddedFileSourceDataset) +from nnmnkwii.util import (example_file_data_sources_for_acoustic_model, + example_file_data_sources_for_duration_model) from nose.plugins.attrib import attr -from os.path import join, dirname +from nose.tools import raises DATA_DIR = join(dirname(__file__), "data") @@ -153,7 +153,7 @@ def test_fixed_length_sequence_wise_iteration(): def test_frame_wise_iteration(): X, Y = _get_small_datasets(padded=False) - lengths = np.array([len(x) for x in X], dtype=np.int) + lengths = np.array([len(x) for x in X], dtype=int) num_utterances = len(lengths) # With sufficient cache size @@ -198,7 +198,7 @@ def __len__(self): def __test(X, Y, batch_size): dataset = TorchDataset(X, Y) loader = data_utils.DataLoader( - dataset, batch_size=batch_size, num_workers=1, shuffle=True) + dataset, batch_size=batch_size, num_workers=0, shuffle=True) for idx, (x, y) in enumerate(loader): assert len(x.shape) == len(y.shape) assert len(x.shape) == 3 @@ -228,7 +228,7 @@ def test_frame_wise_torch_data_loader(): # fixed size length, i.e., implements `__len__` method, we need to know # number of frames for each utterance. # Sum of the number of frames is the dataset size for frame-wise iteration. - lengths = np.array([len(x) for x in X], dtype=np.int) + lengths = np.array([len(x) for x in X], dtype=int) # For the above reason, we need to explicitly give the number of frames. X = MemoryCacheFramewiseDataset(X, lengths, cache_size=len(X)) @@ -248,7 +248,7 @@ def __len__(self): def __test(X, Y, batch_size): dataset = TorchDataset(X, Y) loader = data_utils.DataLoader( - dataset, batch_size=batch_size, num_workers=1, shuffle=True) + dataset, batch_size=batch_size, num_workers=0, shuffle=True) for idx, (x, y) in enumerate(loader): assert len(x.shape) == 2 assert len(y.shape) == 2 diff --git a/tests/test_frontend.py b/tests/test_frontend.py index 70f44095..229f2514 100644 --- a/tests/test_frontend.py +++ b/tests/test_frontend.py @@ -1,25 +1,22 @@ -from __future__ import division, print_function, absolute_import - -from nnmnkwii.io import hts -from nnmnkwii.frontend import merlin as fe -from nnmnkwii.util import example_question_file, example_label_file - from os.path import dirname, join -import numpy as np +import numpy as np +from nnmnkwii.frontend import merlin as fe +from nnmnkwii.io import hts +from nnmnkwii.util import example_label_file, example_question_file from nose.tools import raises DATA_DIR = join(dirname(__file__), "data") def test_invalid_linguistic_features(): - binary_dict, continuous_dict = hts.load_question_set(example_question_file()) + binary_dict, numeric_dict = hts.load_question_set(example_question_file()) phone_labels = hts.load(example_label_file(phone_level=True)) state_labels = hts.load(example_label_file(phone_level=False)) @raises(ValueError) def __test(labels, subphone_features, add_frame_features): - fe.linguistic_features(labels, binary_dict, continuous_dict, + fe.linguistic_features(labels, binary_dict, numeric_dict, subphone_features=subphone_features, add_frame_features=add_frame_features) @@ -40,13 +37,13 @@ def __test(labels, unit_size, feature_size): def test_silence_frame_removal_given_hts_labels(): qs_file_name = join(DATA_DIR, "questions-radio_dnn_416.hed") - binary_dict, continuous_dict = hts.load_question_set(qs_file_name) + binary_dict, numeric_dict = hts.load_question_set(qs_file_name) input_state_label = join(DATA_DIR, "label_state_align", "arctic_a0001.lab") labels = hts.load(input_state_label) features = fe.linguistic_features(labels, binary_dict, - continuous_dict, + numeric_dict, add_frame_features=True, subphone_features="full" ) @@ -64,7 +61,7 @@ def test_silence_frame_removal_given_hts_labels(): # Make sure we can get same results with Merlin def test_linguistic_and_duration_features_for_duration_model(): qs_file_name = join(DATA_DIR, "questions-radio_dnn_416.hed") - binary_dict, continuous_dict = hts.load_question_set(qs_file_name) + binary_dict, numeric_dict = hts.load_question_set(qs_file_name) # Phone-level linguistic features # Linguistic features @@ -73,7 +70,7 @@ def test_linguistic_and_duration_features_for_duration_model(): assert labels.is_state_alignment_label() x = fe.linguistic_features(labels, binary_dict, - continuous_dict, + numeric_dict, add_frame_features=False, subphone_features=None ) @@ -93,7 +90,7 @@ def test_linguistic_and_duration_features_for_duration_model(): def test_linguistic_features_for_acoustic_model(): qs_file_name = join(DATA_DIR, "questions-radio_dnn_416.hed") - binary_dict, continuous_dict = hts.load_question_set(qs_file_name) + binary_dict, numeric_dict = hts.load_question_set(qs_file_name) # Linguistic features # To train acoustic model paired with linguistic features, @@ -103,7 +100,7 @@ def test_linguistic_features_for_acoustic_model(): assert labels.is_state_alignment_label() x = fe.linguistic_features(labels, binary_dict, - continuous_dict, + numeric_dict, add_frame_features=True, subphone_features="full" ) @@ -114,18 +111,18 @@ def test_linguistic_features_for_acoustic_model(): def test_phone_alignment_label(): qs_file_name = join(DATA_DIR, "questions-radio_dnn_416.hed") - binary_dict, continuous_dict = hts.load_question_set(qs_file_name) + binary_dict, numeric_dict = hts.load_question_set(qs_file_name) input_state_label = join(DATA_DIR, "label_phone_align", "arctic_a0001.lab") labels = hts.load(input_state_label) - x = fe.linguistic_features(labels, binary_dict, continuous_dict, + x = fe.linguistic_features(labels, binary_dict, numeric_dict, add_frame_features=False, subphone_features=None) assert not labels.is_state_alignment_label() assert np.all(np.isfinite(x)) for subphone_features in ["coarse_coding", "minimal_phoneme"]: - x = fe.linguistic_features(labels, binary_dict, continuous_dict, + x = fe.linguistic_features(labels, binary_dict, numeric_dict, add_frame_features=True, subphone_features=subphone_features) assert np.all(np.isfinite(x)) diff --git a/tests/test_io.py b/tests/test_io.py index f01aff56..719c7089 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,35 +1,34 @@ -# coding: utf-8 -from nnmnkwii.io import hts -from nnmnkwii.frontend import merlin as fe -from os.path import dirname, join import copy -from nnmnkwii.util import example_question_file import re -from nose.tools import raises +from os.path import dirname, join +from nnmnkwii.frontend import merlin as fe +from nnmnkwii.io import hts +from nnmnkwii.util import example_question_file +from nose.tools import raises DATA_DIR = join(dirname(__file__), "data") def test_labels_number_of_frames(): # https://github.com/r9y9/nnmnkwii/issues/85 - binary_dict, continuous_dict = hts.load_question_set( - join(DATA_DIR, "jp.hed")) + binary_dict, numeric_dict = hts.load_question_set(join(DATA_DIR, "jp.hed")) labels = hts.load(join(DATA_DIR, "BASIC5000_0619.lab")) linguistic_features = fe.linguistic_features( - labels, binary_dict, continuous_dict, add_frame_features=True) + labels, binary_dict, numeric_dict, add_frame_features=True + ) assert labels.num_frames() == linguistic_features.shape[0] def test_load_question_set(): - binary_dict, continuous_dict = hts.load_question_set( - example_question_file()) - assert len(binary_dict) + len(continuous_dict) == 416 + binary_dict, numeric_dict = hts.load_question_set(example_question_file()) + assert len(binary_dict) + len(numeric_dict) == 416 def test_htk_style_question_basics(): - binary_dict, continuous_dict = hts.load_question_set( - join(DATA_DIR, "test_question.hed")) + binary_dict, numeric_dict = hts.load_question_set( + join(DATA_DIR, "test_question.hed") + ) # sil k o n i ch i w a sil input_phone_label = join(DATA_DIR, "hts-nit-atr503", "phrase01.lab") labels = hts.load(input_phone_label) @@ -44,12 +43,12 @@ def test_htk_style_question_basics(): QS "R-Phone_o" {*+o=*} QS "RR-Phone_o" {*=o/A:*} """ - LL_muon1 = binary_dict[0][0] - LL_muon2 = binary_dict[1][0] - L_muon1 = binary_dict[2][0] - C_sil = binary_dict[3][0] - R_phone_o = binary_dict[4][0] - RR_phone_o = binary_dict[5][0] + LL_muon1 = binary_dict[0][1][0] + LL_muon2 = binary_dict[1][1][0] + L_muon1 = binary_dict[2][1][0] + C_sil = binary_dict[3][1][0] + R_phone_o = binary_dict[4][1][0] + RR_phone_o = binary_dict[5][1][0] # xx^xx-sil+k=o label = labels[0][-1] @@ -88,25 +87,29 @@ def test_singing_voice_question(): QS "L-Phone_Yuusei_Boin" {*^a-*,*^i-*,*^u-*,*^e-*,*^o-*} CQS "e1" {/E:(\\NOTE)]} """ - binary_dict, continuous_dict = hts.load_question_set( - join(DATA_DIR, "test_jp_svs.hed"), append_hat_for_LL=False, convert_svs_pattern=True) + binary_dict, numeric_dict = hts.load_question_set( + join(DATA_DIR, "test_jp_svs.hed"), + append_hat_for_LL=False, + convert_svs_pattern=True, + ) input_phone_label = join(DATA_DIR, "song070_f00001_063.lab") labels = hts.load(input_phone_label) - feats = fe.linguistic_features(labels, binary_dict, continuous_dict) + feats = fe.linguistic_features(labels, binary_dict, numeric_dict) assert feats.shape == (74, 3) # CQS e1: get the current MIDI number - C_e1 = continuous_dict[0] + C_e1 = numeric_dict[0][1] for idx, lab in enumerate(labels): context = lab[-1] if C_e1.search(context) is not None: from nnmnkwii.frontend import NOTE_MAPPING + assert NOTE_MAPPING[C_e1.findall(context)[0]] == feats[idx, 1] # CQS e57: get pitch diff # In contrast to other continous features, the pitch diff has a prefix "m" or "p" # to indiecate th sign of numbers. - C_e57 = continuous_dict[1] + C_e57 = numeric_dict[1][1] for idx, lab in enumerate(labels): context = lab[-1] if "~p2+" in context: @@ -157,8 +160,9 @@ def test_mono(): sil_regex = re.compile("sil") for indices in [ - labels.silence_label_indices(sil_regex), - labels.silence_phone_indices(sil_regex)]: + labels.silence_label_indices(sil_regex), + labels.silence_phone_indices(sil_regex), + ]: assert len(indices) == 2 assert indices[0] == 0 assert indices[1] == len(labels) - 1 diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 4ac68632..2e052a38 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -110,7 +110,7 @@ def test_real_metrics(): mgc = X[:, :, :source.mgc_dim // 3] lf0 = X[:, :, source.lf0_start_idx] - vuv = (X[:, :, source.vuv_start_idx] > 0).astype(np.int) + vuv = (X[:, :, source.vuv_start_idx] > 0).astype(int) bap = X[:, :, source.bap_start_idx] mgc_tgt = mgc + 0.01 diff --git a/tests/test_pack_pad_sequence.py b/tests/test_pack_pad_sequence.py index 08520a65..79a97f6c 100644 --- a/tests/test_pack_pad_sequence.py +++ b/tests/test_pack_pad_sequence.py @@ -82,7 +82,7 @@ def test_pack_sequnce(): """ X, Y = _get_small_datasets(padded=False) - lengths = np.array([len(x) for x in X], dtype=np.int)[:, None] + lengths = np.array([len(x) for x in X], dtype=int)[:, None] # We need padded dataset X, Y = _get_small_datasets(padded=True) diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index 8cc880a9..4e83ac09 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -1,29 +1,32 @@ -from __future__ import division, print_function, absolute_import +from __future__ import absolute_import, division, print_function -from nnmnkwii.preprocessing.f0 import interp1d -from nnmnkwii.preprocessing import trim_zeros_frames, remove_zeros_frames -from nnmnkwii.preprocessing import adjust_frame_lengths, delta_features -from nnmnkwii.preprocessing import adjust_frame_length -from nnmnkwii.preprocessing import modspec, modphase -from nnmnkwii.preprocessing import preemphasis, inv_preemphasis +import librosa +import numpy as np +import pysptk +import pyworld from nnmnkwii import preprocessing as P -from nnmnkwii.util import example_file_data_sources_for_duration_model -from nnmnkwii.util import example_file_data_sources_for_acoustic_model -from nnmnkwii.util import example_audio_file from nnmnkwii.datasets import FileSourceDataset, PaddedFileSourceDataset - +from nnmnkwii.preprocessing import ( + adjust_frame_length, + adjust_frame_lengths, + delta_features, + inv_preemphasis, + preemphasis, + remove_zeros_frames, + trim_zeros_frames, +) +from nnmnkwii.preprocessing.f0 import interp1d +from nnmnkwii.util import ( + example_audio_file, + example_file_data_sources_for_acoustic_model, + example_file_data_sources_for_duration_model, +) +from nose.plugins.attrib import attr from nose.tools import raises - from scipy.io import wavfile -import numpy as np -import pyworld -import librosa -import pysptk - -from nose.plugins.attrib import attr -def _get_windows_set(): +def _get_windows_set_bandmat(): windows_set = [ # Static [ @@ -44,6 +47,27 @@ def _get_windows_set(): return windows_set +def _get_windows_set(): + windows_set = [ + # Static + [ + [1.0], + ], + # Static + delta + [ + [1.0], + [-0.5, 0.0, 0.5], + ], + # Static + delta + deltadelta + [ + [1.0], + [-0.5, 0.0, 0.5], + [1.0, -2.0, 1.0], + ], + ] + return windows_set + + def test_mulaw(): # Check corner cases assert P.mulaw_quantize(-1.0, 2) == 0 @@ -84,7 +108,9 @@ def test_mulaw(): # torch array input from warnings import warn + import torch + torch.manual_seed(1234) for mu in [128, 256, 512]: x = torch.rand(10) @@ -121,24 +147,22 @@ def test_meanvar_incremental(): assert np.allclose(X_var, X_var_inc) # Split dataset and compute meanvar incrementaly - X_a = X[:N // 2] - X_b = X[N // 2:] - X_mean_a, X_var_a, last_sample_count = P.meanvar( - X_a, return_last_sample_count=True) - assert last_sample_count == np.sum(lengths[:N // 2]) + X_a = X[: N // 2] + X_b = X[N // 2 :] + X_mean_a, X_var_a, last_sample_count = P.meanvar(X_a, return_last_sample_count=True) + assert last_sample_count == np.sum(lengths[: N // 2]) X_mean_b, X_var_b = P.meanvar( - X_b, mean_=X_mean_a, var_=X_var_a, - last_sample_count=last_sample_count) + X_b, mean_=X_mean_a, var_=X_var_a, last_sample_count=last_sample_count + ) assert np.allclose(X_mean, X_mean_b) assert np.allclose(X_var, X_var_b) # meanstd - X_mean_a, X_std_a, last_sample_count = P.meanstd( - X_a, return_last_sample_count=True) - assert last_sample_count == np.sum(lengths[:N // 2]) + X_mean_a, X_std_a, last_sample_count = P.meanstd(X_a, return_last_sample_count=True) + assert last_sample_count == np.sum(lengths[: N // 2]) X_mean_b, X_std_b = P.meanstd( - X_b, mean_=X_mean_a, var_=X_std_a**2, - last_sample_count=last_sample_count) + X_b, mean_=X_mean_a, var_=X_std_a ** 2, last_sample_count=last_sample_count + ) assert np.allclose(X_mean, X_mean_b) assert np.allclose(X_std, X_std_b) @@ -224,7 +248,8 @@ def __test_raise2(x, X_min, X_max): assert np.allclose(x, x_hat) x_hat = P.inv_minmax_scale( - P.minmax_scale(x, scale_=scale_, min_=min_), scale_=scale_, min_=min_) + P.minmax_scale(x, scale_=scale_, min_=min_), scale_=scale_, min_=min_ + ) assert np.allclose(x, x_hat) @@ -279,29 +304,29 @@ def test_trim_zeros_frames(): np.testing.assert_array_equal(actual_default, desired_default) desired_b = np.array(((0, 0), (0, 0), (1, 1), (2, 2))) - actual_b = trim_zeros_frames(arr, trim='b') + actual_b = trim_zeros_frames(arr, trim="b") assert desired_b.shape[1] == actual_b.shape[1] np.testing.assert_array_equal(actual_b, desired_b) desired_f = np.array(((1, 1), (2, 2), (0, 0))) - actual_f = trim_zeros_frames(arr, trim='f') + actual_f = trim_zeros_frames(arr, trim="f") assert desired_f.shape[1] == actual_f.shape[1] np.testing.assert_array_equal(actual_f, desired_f) desired_fb = np.array(((1, 1), (2, 2))) - actual_fb = trim_zeros_frames(arr, trim='fb') + actual_fb = trim_zeros_frames(arr, trim="fb") assert desired_fb.shape[1] == actual_fb.shape[1] np.testing.assert_array_equal(actual_fb, desired_fb) non_zeros = np.array(((1, 1), (2, 2), (3, 3), (4, 4), (5, 5))) desired_b_or_fb_non_zeros = np.array(((1, 1), (2, 2), (3, 3), (4, 4), (5, 5))) - actual_b = trim_zeros_frames(non_zeros, trim='b') + actual_b = trim_zeros_frames(non_zeros, trim="b") np.testing.assert_array_equal(actual_b, desired_b_or_fb_non_zeros) - actual_fb = trim_zeros_frames(non_zeros, trim='fb') + actual_fb = trim_zeros_frames(non_zeros, trim="fb") np.testing.assert_array_equal(actual_fb, desired_b_or_fb_non_zeros) @@ -326,8 +351,12 @@ def test_adjust_frame_length_divisible(): assert (adjust_frame_length(x, pad=True, divisible_by=3)[-1] == 0).all() # make sure we passes extra kwargs to np.pad - assert (adjust_frame_length(x, pad=True, divisible_by=3, - mode="constant", constant_values=1)[-1] == 1).all() + assert ( + adjust_frame_length( + x, pad=True, divisible_by=3, mode="constant", constant_values=1 + )[-1] + == 1 + ).all() # Should preserve dtype for dtype in [np.float32, np.float64]: @@ -342,8 +371,10 @@ def test_adjust_frame_lengths(): D = 5 # 1d and 2d padding - for (x, y) in [(np.random.rand(T1), np.random.rand(T2)), - (np.random.rand(T1, D), np.random.rand(T2, D))]: + for (x, y) in [ + (np.random.rand(T1), np.random.rand(T2)), + (np.random.rand(T1, D), np.random.rand(T2, D)), + ]: x_hat, y_hat = adjust_frame_lengths(x, y, pad=True) assert x_hat.shape == y_hat.shape assert x_hat.shape[0] == 11 @@ -352,24 +383,20 @@ def test_adjust_frame_lengths(): assert x_hat.shape == y_hat.shape assert x_hat.shape[0] == 10 - x_hat, y_hat = adjust_frame_lengths(x, y, pad=True, - divisible_by=2) + x_hat, y_hat = adjust_frame_lengths(x, y, pad=True, divisible_by=2) assert x_hat.shape == y_hat.shape assert x_hat.shape[0] == 12 - x_hat, y_hat = adjust_frame_lengths(x, y, pad=False, - divisible_by=2) + x_hat, y_hat = adjust_frame_lengths(x, y, pad=False, divisible_by=2) assert x_hat.shape == y_hat.shape assert x_hat.shape[0] == 10 # Divisible - x_hat, y_hat = adjust_frame_lengths(x, y, pad=False, - divisible_by=3) + x_hat, y_hat = adjust_frame_lengths(x, y, pad=False, divisible_by=3) assert x_hat.shape == y_hat.shape assert x_hat.shape[0] == 9 - x_hat, y_hat = adjust_frame_lengths(x, y, pad=True, - divisible_by=3) + x_hat, y_hat = adjust_frame_lengths(x, y, pad=True, divisible_by=3) assert x_hat.shape == y_hat.shape assert x_hat.shape[0] == 12 @@ -381,7 +408,8 @@ def test_adjust_frame_lengths(): # make sure we passes extra kwargs to np.pad x_hat, y_hat = adjust_frame_lengths( - x, y, pad=True, divisible_by=3, mode="constant", constant_values=1) + x, y, pad=True, divisible_by=3, mode="constant", constant_values=1 + ) assert x_hat[-1] == 1 and y_hat[-1] == 1 @@ -392,6 +420,9 @@ def test_delta_features(): for windows in _get_windows_set(): y = delta_features(x, windows) assert y.shape == (T, static_dim * len(windows)) + for windows in _get_windows_set_bandmat(): + y = delta_features(x, windows) + assert y.shape == (T, static_dim * len(windows)) def _get_mcep(x, fs, frame_period=5, order=24): @@ -413,11 +444,14 @@ def test_dtw_frame_length_adjustment(): X = FileSourceDataset(X) X_unaligned = X.asarray() # This should trigger frame length adjustment - Y_unaligned = np.pad(X_unaligned, [(0, 0), (5, 0), (0, 0)], - mode="constant", constant_values=0) + Y_unaligned = np.pad( + X_unaligned, [(0, 0), (5, 0), (0, 0)], mode="constant", constant_values=0 + ) Y_unaligned = Y_unaligned[:, :-5, :] - for aligner in [DTWAligner(), IterativeDTWAligner( - n_iter=1, max_iter_gmm=1, n_components_gmm=1)]: + for aligner in [ + DTWAligner(), + IterativeDTWAligner(n_iter=1, max_iter_gmm=1, n_components_gmm=1), + ]: X_aligned, Y_aligned = aligner.transform((X_unaligned, Y_unaligned)) assert X_aligned.shape == Y_aligned.shape @@ -426,7 +460,8 @@ def test_dtw_frame_length_adjustment(): def test_dtw_aligner(): from nnmnkwii.preprocessing.alignment import DTWAligner, IterativeDTWAligner - x, fs = librosa.load(example_audio_file(), sr=None) + fs, x = wavfile.read(example_audio_file()) + x = (x / 32768.0).astype(np.float32) assert fs == 16000 x_fast = librosa.effects.time_stretch(x, 2.0) @@ -447,12 +482,14 @@ def test_dtw_aligner(): assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y) X_aligned, Y_aligned = IterativeDTWAligner( - n_iter=2, max_iter_gmm=10, n_components_gmm=2).transform((X, Y)) + n_iter=2, max_iter_gmm=10, n_components_gmm=2 + ).transform((X, Y)) assert X_aligned.shape == Y_aligned.shape assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y) # Custom dist function from nnmnkwii.metrics import melcd + X_aligned, Y_aligned = DTWAligner(dist=melcd).transform((X, Y)) assert np.linalg.norm(X_aligned - Y_aligned) < np.linalg.norm(X - Y) @@ -482,15 +519,15 @@ def test_modspec_smoothing(): for norm in [None, "ortho"]: for n in [1024, 2048]: # Nyquist freq - y_hat = P.modspec_smoothing(y, modfs, n=n, norm=norm, - cutoff=modfs // 2, - log_domain=log_domain) + y_hat = P.modspec_smoothing( + y, modfs, n=n, norm=norm, cutoff=modfs // 2, log_domain=log_domain + ) assert np.allclose(y, y_hat) # Smooth - P.modspec_smoothing(y, modfs, n=n, norm=norm, - cutoff=modfs // 4, - log_domain=log_domain) + P.modspec_smoothing( + y, modfs, n=n, norm=norm, cutoff=modfs // 4, log_domain=log_domain + ) # Cutoff frequency larger than modfs//2 @raises(ValueError) diff --git a/tests/test_util.py b/tests/test_util.py index 1b1ce4a6..dd787762 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -55,7 +55,7 @@ def dummmy_func2d(x): def _get_banded_test_mat(win_mats, T): - import bandmat as bm + from nnmnkwii.paramgen import _bandmat as bm sdw = max([win_mat.l + win_mat.u for win_mat in win_mats]) P = bm.zeros(sdw, sdw, T)