diff --git a/README.md b/README.md index 6cb69901..fd5baf63 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,9 @@ running. Install the package by typing the following command in a command termin To install the latest development version via pip, see the [documentation][doc_install_link]. +One thing to point out is that this way, the non-parallel version of GSTools +is installed. In case you want the parallel version, follow these easy +[steps][doc_install_link]. ## Citation diff --git a/docs/source/index.rst b/docs/source/index.rst index ecad0583..0f5e181f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -79,17 +79,26 @@ If something went wrong during installation, try the :code:`-I` `flag from pip < **Speeding up GSTools by parallelization** -To enable the OpenMP support, you have to provide a C compiler and OpenMP. -Parallel support is controlled by an environment variable ``GSTOOLS_BUILD_PARALLEL``, -that can be ``0`` or ``1`` (interpreted as ``0`` if not present). -GSTools then needs to be installed from source: +We provide two possibilities to run GSTools in parallel, often causing a +massive improvement in runtime. In either case, the number of parallel +threads can be set with the global variable `config.NUM_THREADS`. If not set, +all cores are used. +When using conda, the parallel version of GSTools is installed per default. + +***Parallelizing Cython*** + +To enable the OpenMP support in Cython when using pip, you have to provide a C +compiler and OpenMP. Parallel support is controlled by an environment variable +``GSTOOLS_BUILD_PARALLEL``, that can be ``0`` or ``1`` (interpreted as ``0`` +if not present). GSTools then needs to be installed from source: .. code-block:: none export GSTOOLS_BUILD_PARALLEL=1 pip install --no-binary=gstools gstools -Note, that the ``--no-binary=gstools`` option forces pip to not use a wheel for GSTools. +Note, that the ``--no-binary=gstools`` option forces pip to not use a wheel +for GSTools. For the development version, you can do almost the same: @@ -98,19 +107,18 @@ For the development version, you can do almost the same: export GSTOOLS_BUILD_PARALLEL=1 pip install git+git://github.com/GeoStat-Framework/GSTools.git@main -The number of parallel threads can be set with the global variable `config.NUM_THREADS`. -**Using experimental GSTools-Core for even more speed** +***Using GSTools-Core for parallelization and even more speed*** You can install the optional dependency `GSTools-Core `_, -which is a re-implementation of the main algorithms used in GSTools. The new +which is a re-implementation of the algorithms used in GSTools. The new package uses the language Rust and it should be faster (in some cases by orders of magnitude), safer, and it will potentially completely replace the current standard implementation in Cython. Once the package GSTools-Core is available on your machine, it will be used by default. In case you want to switch back to -the Cython implementation, you can set :code:`gstools.config.USE_RUST=False` in -your code. This also works at runtime. You can install the optional dependency -e.g. by +the Cython implementation, you can set +:code:`gstools.config.USE_GSTOOLS_CORE=False` in your code. This also works at +runtime. You can install the optional dependency e.g. by .. code-block:: none @@ -122,10 +130,8 @@ or by manually installing the package pip install gstools-core -GSTools-Core will automatically use all your cores in parallel, without having -to use OpenMP or a local C compiler. -In case you want to restrict the number of threads used, you can use the -global variable `config.NUM_THREADS` to the desired number. +GSTools-Core will automatically run in parallel, without having to use provide +OpenMP or a local C compiler. Citation diff --git a/src/gstools/config.py b/src/gstools/config.py index 24ce20c7..9c399f68 100644 --- a/src/gstools/config.py +++ b/src/gstools/config.py @@ -11,6 +11,8 @@ try: # pragma: no cover import gstools_core - USE_RUST = True + _GSTOOLS_CORE_AVAIL = True + USE_GSTOOLS_CORE = True except ImportError: - USE_RUST = False + _GSTOOLS_CORE_AVAIL = False + USE_GSTOOLS_CORE = False diff --git a/src/gstools/field/generator.py b/src/gstools/field/generator.py index 5beab10d..d19bd6d3 100644 --- a/src/gstools/field/generator.py +++ b/src/gstools/field/generator.py @@ -22,13 +22,14 @@ from gstools import config from gstools.covmodel.base import CovModel +from gstools.field.summator import summate as summate_c +from gstools.field.summator import summate_incompr as summate_incompr_c from gstools.random.rng import RNG -if config.USE_RUST: # pragma: no cover +if config._GSTOOLS_CORE_AVAIL: # pylint: disable=W0212; # pragma: no cover # pylint: disable=E0401 - from gstools_core import summate, summate_incompr -else: - from gstools.field.summator import summate, summate_incompr + from gstools_core import summate as summate_gsc + from gstools_core import summate_incompr as summate_incompr_gsc __all__ = ["Generator", "RandMeth", "IncomprRandMeth"] @@ -36,6 +37,37 @@ SAMPLING = ["auto", "inversion", "mcmc"] +def _summate(cov_samples, z_1, z_2, pos, num_threads=None): + """A wrapper function for calling the randomization algorithms.""" + if ( + config.USE_GSTOOLS_CORE + and config._GSTOOLS_CORE_AVAIL # pylint: disable=W0212 + ): + summate_fct = summate_gsc # pylint: disable=E0606 + else: + summate_fct = summate_c + return summate_fct(cov_samples, z_1, z_2, pos, num_threads) + + +def _summate_incompr( + cov_samples, + z_1, + z_2, + pos, + num_threads=None, +): + """A wrapper function for calling the incompr. randomization algorithms.""" + + if ( + config.USE_GSTOOLS_CORE + and config._GSTOOLS_CORE_AVAIL # pylint: disable=W0212 + ): + summate_incompr_fct = summate_incompr_gsc # pylint: disable=E0606 + else: + summate_incompr_fct = summate_incompr_c + return summate_incompr_fct(cov_samples, z_1, z_2, pos, num_threads) + + class Generator(ABC): """ Abstract generator class. @@ -194,8 +226,8 @@ def __init__( def __call__(self, pos, add_nugget=True): """Calculate the random modes for the randomization method. - This method calls the `summate_*` Cython methods, which are the - heart of the randomization method. + This method calls the `summate_*` Rust or Cython methods, which are + the heart of the randomization method. Parameters ---------- @@ -210,7 +242,7 @@ def __call__(self, pos, add_nugget=True): the random modes """ pos = np.asarray(pos, dtype=np.double) - summed_modes = summate( + summed_modes = _summate( self._cov_sample, self._z_1, self._z_2, pos, config.NUM_THREADS ) nugget = self.get_nugget(summed_modes.shape) if add_nugget else 0.0 @@ -473,7 +505,7 @@ def __init__( def __call__(self, pos, add_nugget=True): """Calculate the random modes for the randomization method. - This method calls the `summate_incompr_*` Cython methods, + This method calls the `summate_incompr_*` Rust or Cython methods, which are the heart of the randomization method. In this class the method contains a projector to ensure the incompressibility of the vector field. @@ -491,7 +523,7 @@ def __call__(self, pos, add_nugget=True): the random modes """ pos = np.asarray(pos, dtype=np.double) - summed_modes = summate_incompr( + summed_modes = _summate_incompr( self._cov_sample, self._z_1, self._z_2, diff --git a/src/gstools/field/summator.pyx b/src/gstools/field/summator.pyx index 8f6c6f7f..5af23b91 100644 --- a/src/gstools/field/summator.pyx +++ b/src/gstools/field/summator.pyx @@ -6,7 +6,7 @@ This is the randomization method summator, implemented in cython. import numpy as np from cython.parallel import prange -IF OPENMP: +if OPENMP: cimport openmp cimport numpy as np @@ -17,9 +17,9 @@ def set_num_threads(num_threads): cdef int num_threads_c = 1 if num_threads is None: # OPENMP set during setup - IF OPENMP: + if OPENMP: num_threads_c = openmp.omp_get_num_procs() - ELSE: + else: ... else: num_threads_c = num_threads diff --git a/src/gstools/krige/base.py b/src/gstools/krige/base.py index 78aa2a9f..b30190c2 100755 --- a/src/gstools/krige/base.py +++ b/src/gstools/krige/base.py @@ -18,18 +18,22 @@ from gstools import config from gstools.field.base import Field +from gstools.krige.krigesum import calc_field_krige as calc_field_krige_c +from gstools.krige.krigesum import ( + calc_field_krige_and_variance as calc_field_krige_and_variance_c, +) from gstools.krige.tools import get_drift_functions, set_condition from gstools.tools.geometric import rotated_main_axes from gstools.tools.misc import eval_func from gstools.variogram import vario_estimate -if config.USE_RUST: # pragma: no cover +if config._GSTOOLS_CORE_AVAIL: # pylint: disable=W0212; # pragma: no cover # pylint: disable=E0401 - from gstools_core import calc_field_krige, calc_field_krige_and_variance -else: - from gstools.krige.krigesum import ( - calc_field_krige, - calc_field_krige_and_variance, + from gstools_core import ( + calc_field_krige as calc_field_krige_gsc, # pylint: disable=E0606 + ) + from gstools_core import ( + calc_field_krige_and_variance as calc_field_krige_and_variance_gsc, # pylint: disable=E0606 ) __all__ = ["Krige"] @@ -39,6 +43,40 @@ """dict: Standard pseudo-inverse routines""" +def _calc_field_krige(krig_mat, krig_vecs, cond, num_threads=None): + """A wrapper function for calling the krige algorithms.""" + if ( + config.USE_GSTOOLS_CORE + and config._GSTOOLS_CORE_AVAIL # pylint: disable=W0212 + ): # pylint: disable=W0212 + calc_field_krige_fct = ( # pylint: disable=W0201 + calc_field_krige_gsc # pylint: disable=E0606 + ) + else: + calc_field_krige_fct = calc_field_krige_c # pylint: disable=W0201 + return calc_field_krige_fct(krig_mat, krig_vecs, cond, num_threads) + + +def _calc_field_krige_and_variance( + krig_mat, krig_vecs, cond, num_threads=None +): + """A wrapper function for calling the krige algorithms.""" + if ( + config.USE_GSTOOLS_CORE + and config._GSTOOLS_CORE_AVAIL # pylint: disable=W0212 + ): + calc_field_krige_and_variance_fct = ( # pylint: disable=W0201 + calc_field_krige_and_variance_gsc # pylint: disable=E0606 + ) + else: + calc_field_krige_and_variance_fct = ( # pylint: disable=W0201 + calc_field_krige_and_variance_c + ) + return calc_field_krige_and_variance_fct( + krig_mat, krig_vecs, cond, num_threads + ) + + class Krige(Field): """ A Swiss Army knife for kriging. @@ -284,11 +322,13 @@ def __call__( def _summate(self, field, krige_var, c_slice, k_vec, return_var): if return_var: # estimate error variance - field[c_slice], krige_var[c_slice] = calc_field_krige_and_variance( - self._krige_mat, k_vec, self._krige_cond + field[c_slice], krige_var[c_slice] = ( + _calc_field_krige_and_variance( + self._krige_mat, k_vec, self._krige_cond + ) ) else: # solely calculate the interpolated field - field[c_slice] = calc_field_krige( + field[c_slice] = _calc_field_krige( self._krige_mat, k_vec, self._krige_cond ) diff --git a/src/gstools/krige/krigesum.pyx b/src/gstools/krige/krigesum.pyx index 7611f4a0..b32c70a7 100644 --- a/src/gstools/krige/krigesum.pyx +++ b/src/gstools/krige/krigesum.pyx @@ -6,7 +6,7 @@ This is a summator for the kriging routines import numpy as np from cython.parallel import prange -IF OPENMP: +if OPENMP: cimport openmp cimport numpy as np @@ -16,9 +16,9 @@ def set_num_threads(num_threads): cdef int num_threads_c = 1 if num_threads is None: # OPENMP set during setup - IF OPENMP: + if OPENMP: num_threads_c = openmp.omp_get_num_procs() - ELSE: + else: ... else: num_threads_c = num_threads @@ -60,7 +60,7 @@ def calc_field_krige( const double[:, :] krig_mat, const double[:, :] krig_vecs, const double[:] cond, - const int num_threads=1, + num_threads=None, ): cdef int mat_i = krig_mat.shape[0] diff --git a/src/gstools/variogram/estimator.pyx b/src/gstools/variogram/estimator.pyx index e00824be..9387aa82 100644 --- a/src/gstools/variogram/estimator.pyx +++ b/src/gstools/variogram/estimator.pyx @@ -7,20 +7,23 @@ This is the variogram estimater, implemented in cython. import numpy as np from cython.parallel import parallel, prange -IF OPENMP: +if OPENMP: cimport openmp cimport numpy as np from libc.math cimport M_PI, acos, atan2, cos, fabs, isnan, pow, sin, sqrt +# numpy's "bool" +ctypedef unsigned char uint8 + def set_num_threads(num_threads): cdef int num_threads_c = 1 if num_threads is None: # OPENMP set during setup - IF OPENMP: + if OPENMP: num_threads_c = openmp.omp_get_num_procs() - ELSE: + else: ... else: num_threads_c = num_threads @@ -342,7 +345,7 @@ def structured( def ma_structured( const double[:, :] f, - const bint[:, :] mask, + uint8[:, :] mask, # numpy's bools are 8bit vars str estimator_type='m', num_threads=None, ): @@ -365,7 +368,7 @@ def ma_structured( for i in range(i_max): for j in range(j_max): for k in prange(1, k_max-i): - if not mask[i, j] and not mask[i+k, j]: + if mask[i, j] == 0 and mask[i+k, j] == 0: counts[k] += 1 variogram[k] += estimator_func(f[i, j] - f[i+k, j]) diff --git a/src/gstools/variogram/variogram.py b/src/gstools/variogram/variogram.py index afcf336f..6c51f172 100644 --- a/src/gstools/variogram/variogram.py +++ b/src/gstools/variogram/variogram.py @@ -23,20 +23,17 @@ generate_grid, ) from gstools.variogram.binning import standard_bins +from gstools.variogram.estimator import directional as directional_c +from gstools.variogram.estimator import ma_structured as ma_structured_c +from gstools.variogram.estimator import structured as structured_c +from gstools.variogram.estimator import unstructured as unstructured_c -if config.USE_RUST: # pragma: no cover +if config._GSTOOLS_CORE_AVAIL: # pylint: disable=W0212; # pragma: no cover # pylint: disable=E0401 - from gstools_core import variogram_directional as directional - from gstools_core import variogram_ma_structured as ma_structured - from gstools_core import variogram_structured as structured - from gstools_core import variogram_unstructured as unstructured -else: - from gstools.variogram.estimator import ( - directional, - ma_structured, - structured, - unstructured, - ) + from gstools_core import variogram_directional as directional_gsc + from gstools_core import variogram_ma_structured as ma_structured_gsc + from gstools_core import variogram_structured as structured_gsc + from gstools_core import variogram_unstructured as unstructured_gsc __all__ = [ "vario_estimate", @@ -50,6 +47,97 @@ AXIS_DIR = {"x": 0, "y": 1, "z": 2} +def _directional( + field, + bin_edges, + pos, + direction, + angles_tol=np.pi / 8.0, + bandwidth=-1.0, + separate_dirs=False, + estimator_type="m", + num_threads=None, +): + """A wrapper function for calling the directional variogram algorithms.""" + if ( + config.USE_GSTOOLS_CORE + and config._GSTOOLS_CORE_AVAIL # pylint: disable=W0212 + ): # pylint: disable=W0212 + directional_fct = directional_gsc # pylint: disable=E0606 + else: + directional_fct = directional_c + return directional_fct( + field, + bin_edges, + pos, + direction, + angles_tol, + bandwidth, + separate_dirs, + estimator_type, + num_threads, + ) + + +def _unstructured( + field, + bin_edges, + pos, + estimator_type="m", + distance_type="e", + num_threads=None, +): + """A wrapper function for calling the unstructured variogram algorithms.""" + if ( + config.USE_GSTOOLS_CORE + and config._GSTOOLS_CORE_AVAIL # pylint: disable=W0212 + ): # pylint: disable=W0212 + unstructured_fct = unstructured_gsc # pylint: disable=E0606 + else: + unstructured_fct = unstructured_c + return unstructured_fct( + field, + bin_edges, + pos, + estimator_type, + distance_type, + num_threads, + ) + + +def _structured( + field, + estimator_type="m", + num_threads=None, +): + """A wrapper function for calling the structured variogram algorithms.""" + if ( + config.USE_GSTOOLS_CORE + and config._GSTOOLS_CORE_AVAIL # pylint: disable=W0212 + ): # pylint: disable=W0212 + structured_fct = structured_gsc # pylint: disable=E0606 + else: + structured_fct = structured_c + return structured_fct(field, estimator_type, num_threads) + + +def _ma_structured( + field, + mask, + estimator_type="m", + num_threads=None, +): + """A wrapper function for calling the masked struct. variogram algorithms.""" + if ( + config.USE_GSTOOLS_CORE + and config._GSTOOLS_CORE_AVAIL # pylint: disable=W0212 + ): # pylint: disable=W0212 + ma_structured_fct = ma_structured_gsc # pylint: disable=E0606 + else: + ma_structured_fct = ma_structured_c + return ma_structured_fct(field, mask, estimator_type, num_threads) + + def _set_estimator(estimator): """Translate the verbose Python estimator identifier to single char.""" if estimator.lower() == "matheron": @@ -369,7 +457,7 @@ def vario_estimate( if dir_no == 0: # "h"aversine or "e"uclidean distance type distance_type = "h" if latlon else "e" - estimates, counts = unstructured( + estimates, counts = _unstructured( field, bin_edges, pos, @@ -378,7 +466,7 @@ def vario_estimate( num_threads=config.NUM_THREADS, ) else: - estimates, counts = directional( + estimates, counts = _directional( field, bin_edges, pos, @@ -471,8 +559,6 @@ def vario_estimate_axis( if missing: field.mask = np.logical_or(field.mask, missing_mask) mask = np.ma.getmaskarray(field) - if not config.USE_RUST: - mask = np.asarray(mask, dtype=np.int32) else: field = np.atleast_1d(np.asarray(field, dtype=np.double)) missing_mask = None # free space @@ -488,10 +574,10 @@ def vario_estimate_axis( cython_estimator = _set_estimator(estimator) if masked: - return ma_structured( + return _ma_structured( field, mask, cython_estimator, num_threads=config.NUM_THREADS ) - return structured(field, cython_estimator, num_threads=config.NUM_THREADS) + return _structured(field, cython_estimator, num_threads=config.NUM_THREADS) # for backward compatibility