Skip to content

Commit

Permalink
Merge pull request #557 from xylar/add-cull-mpas-dataset
Browse files Browse the repository at this point in the history
Add functions for culling an MPAS dataset
  • Loading branch information
xylar authored Mar 14, 2024
2 parents 2070522 + e147e4b commit b83f9d1
Show file tree
Hide file tree
Showing 3 changed files with 300 additions and 0 deletions.
10 changes: 10 additions & 0 deletions conda_package/docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ Mesh conversion
cull
mask

.. currentmodule:: mpas_tools.mesh.cull

.. autosummary::
:toctree: generated/

write_map_culled_to_base
map_culled_to_base
write_culled_dataset
cull_dataset

.. currentmodule:: mpas_tools.mesh.mask

.. autosummary::
Expand Down
73 changes: 73 additions & 0 deletions conda_package/docs/mesh_conversion.rst
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,79 @@ The command-line tool takes the following arguments:
creation ('fork', 'spawn' or 'forkserver')
.. _cull_mpas_dataset:

Culling MPAS Datasets
=====================

The tools described in :ref:`cell_culler` can be used to create a culled
horizontal MPAS mesh. Once a culled MPAS mesh has been created, an MPAS
dataset on the unculled mesh can be cropped to the culled mesh using the
the :py:func:`mpas_tools.mesh.cull.cull_dataset()` or
:py:func:`mpas_tools.mesh.cull.write_culled_dataset()` functions. These
functions take a dataset (or filename) to crop as well as datasets (or
filenames) for the unculled and culled horizontal MPAS meshes. They return
(or write out) the culled version of the data set. Fields that exist in
the culled horizonal mesh are copied from the culled mesh, rather than cropped
from the dataset. This because we wish to keep the cropped horizontal mesh
exactly as it was produced by the culling tool, which may not correspond to
a cropped version of the field from the original mesh. For example, fields
are reindexed during culling and coordinates are recomputed.

It may be useful to compute and store the maps from cells, edges and vertices
on the culled mesh back to the unculled mesh for reuse. This can be
accomplished by calling the :py:func:`mpas_tools.mesh.cull.map_culled_to_base()`
or :py:func:`mpas_tools.mesh.cull.write_map_culled_to_base()` functions.

An example workflow that culls out ice-shelf cavities from an MPAS-Ocean
initial condition might look like the following. In this case the file
``culled_mesh.nc`` is a mesh where land (and the grounded portion of the
ice sheet) has been removed but where ice-shelf cavities are still present.
It serves as the "base" mesh for the purposes of this example.
``culled_mesh_no_isc.nc`` is created (if it doesn't already exist) with the
ice-shelf cavities removed as well, so it is the "culled" mesh in this example.
We store the mapping betwen the two horizontal meshes in
``no_isc_to_culled_map.nc`` in case we want to resue it later. The initial
condition is read from ``initial_state.nc`` and the culled version is written
to ``initial_state_no_isc.nc``:

.. code-block:: python
import os
import xarray as xr
from mpas_tools.io import write_netcdf
from mpas_tools.mesh.conversion import cull
from mpas_tools.mesh.cull import write_map_culled_to_base, write_culled_dataset
from mpas_tools.logging import LoggingContext
in_filename = 'initial_state.nc'
out_filename = 'initial_state_no_isc.nc'
base_mesh_filename = 'culled_mesh.nc'
culled_mesh_filename = 'culled_mesh_no_isc.nc'
map_filename = 'no_isc_to_culled_map.nc'
if not os.path.exists(culled_mesh_filename):
ds_culled_mesh = xr.open_dataset(base_mesh_filename)
ds_init = xr.open_dataset(in_filename)
ds_culled_mesh['cullCell'] = ds_init.landIceMask
ds_culled_mesh_no_isc = cull(ds_culled_mesh)
write_netcdf(ds_culled_mesh_no_isc, culled_mesh_filename)
if not os.path.exists(map_filename):
write_map_culled_to_base(base_mesh_filename=base_mesh_filename,
culled_mesh_filename=culled_mesh_filename,
out_filename=map_filename)
with LoggingContext('test') as logger:
write_culled_dataset(in_filename=in_filename, out_filename=out_filename,
base_mesh_filename=base_mesh_filename,
culled_mesh_filename=culled_mesh_filename,
map_culled_to_base_filename=map_filename,
logger=logger)
.. _merge_split:

Merging and Splitting
Expand Down
217 changes: 217 additions & 0 deletions conda_package/mpas_tools/mesh/cull.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
import numpy as np
import xarray as xr
from scipy.spatial import KDTree

from mpas_tools.io import write_netcdf


def write_map_culled_to_base(base_mesh_filename, culled_mesh_filename,
out_filename, workers=-1):
"""
Write out a file with maps from cells, edges and vertices on a culled mesh
to the same elements on a base mesh. All elements in the culled mesh must
be in the base mesh.
Parameters
----------
base_mesh_filename : str
A file with the horizontal MPAS mesh before culling
culled_mesh_filename : str
A file with the culled horizonal MPAS mesh
out_filename : str
A file to which the maps should be written. The dataset will include
``mapCulledToBaseCell``, ``mapCulledToBaseEdge`` and
``mapCulledToBaseVertex``, each of which contains the base mesh index
corresponding to the same element from the culled mesh.
workers : int, optional
The number of threads to use to query base mesh elements. The default
is all available threads (``workers=-1``)
"""
ds_base = xr.open_dataset(base_mesh_filename)
ds_culled = xr.open_dataset(culled_mesh_filename)
ds_map_culled_to_base = map_culled_to_base(ds_base, ds_culled,
workers=workers)
write_netcdf(ds_map_culled_to_base, out_filename)


def map_culled_to_base(ds_base, ds_culled, workers=-1):
"""
Create maps from cells, edges and vertices on a culled mesh to the same
elements on a base mesh. All elements in the culled mesh must be in the
base mesh.
Parameters
----------
ds_base : xarray.Dataset
The horizontal MPAS mesh before culling
ds_culled : xarray.Dataset
The culled horizonal MPAS mesh
workers : int, optional
The number of threads to use to query base mesh elements. The default
is all available threads (``workers=-1``)
Returns
-------
ds_map_culled_to_base : xarray.Dataset
A dataset with ``mapCulledToBaseCell``, ``mapCulledToBaseEdge`` and
``mapCulledToBaseVertex``, each of which contains the base mesh index
corresponding to the same element from the culled mesh.
"""
ds_map_culled_to_base = xr.Dataset()
for dim, suffix in [('nCells', 'Cell'),
('nEdges', 'Edge'),
('nVertices', 'Vertex')]:
_map_culled_to_base_grid_type(ds_base, ds_culled,
ds_map_culled_to_base, dim, suffix,
workers)

return ds_map_culled_to_base


def write_culled_dataset(in_filename, out_filename, base_mesh_filename,
culled_mesh_filename,
map_culled_to_base_filename=None, workers=-1,
logger=None):
"""
Create a new dataset in which all fields from ``ds`` have been culled
from the base mesh to the culled mesh. Fields present in
``ds_culled_mesh`` are copied over rather than culled from ``ds``.
Parameters
----------
in_filename : str
A file containing an MPAS dataset to cull
output_filename : str
A file to write the culled MPAS dataset to
base_mesh_filename : str
A file with the horizontal MPAS mesh before culling
culled_mesh_filename : str
A file with the culled horizonal MPAS mesh
map_culled_to_base_filename : str, optional
A file with an existing map from the base to the culled mesh created
with ``write_map_culled_to_base()`` or ``map_culled_to_base()``. The
dataset will be created (but not returned or saved to disk) if it is
not passed as an argument.
workers : int, optional
The number of threads to use to query base mesh elements. The default
is all available threads (``workers=-1``)
logger : logging.Logger, optional
A logger for the output
"""
ds = xr.open_dataset(in_filename)
ds_base_mesh = xr.open_dataset(base_mesh_filename)
ds_culled_mesh = xr.open_dataset(culled_mesh_filename)
if map_culled_to_base_filename is None:
ds_map_culled_to_base = None
else:
ds_map_culled_to_base = xr.open_dataset(map_culled_to_base_filename)

ds_culled = cull_dataset(
ds=ds, ds_base_mesh=ds_base_mesh, ds_culled_mesh=ds_culled_mesh,
ds_map_culled_to_base=ds_map_culled_to_base,
workers=workers, logger=logger)
write_netcdf(ds_culled, out_filename)


def cull_dataset(ds, ds_base_mesh, ds_culled_mesh, ds_map_culled_to_base=None,
workers=-1, logger=None):
"""
Create a new dataset in which all fields from ``ds`` have been culled
from the base mesh to the culled mesh. Fields present in
``ds_culled_mesh`` are copied over rather than culled from ``ds``.
Parameters
----------
ds : xarray.Dataset
An MPAS dataset to cull
ds_base_mesh : xarray.Dataset
The horizontal MPAS mesh before culling
ds_culled_mesh : xarray.Dataset
The culled horizonal MPAS mesh
ds_map_culled_to_base : xarray.Dataset, optional
An existing map from the base to the culled mesh created with
``write_map_culled_to_base()`` or ``map_culled_to_base()``. The dataset
will be created (but not returned or saved to disk) if it is not passed
as an argument.
workers : int, optional
The number of threads to use to query base mesh elements. The default
is all available threads (``workers=-1``)
logger : logging.Logger, optional
A logger for the output
Returns
-------
ds_culled : xarray.Dataset
An culled MPAS dataset
"""
if ds_map_culled_to_base is None:
if logger is not None:
logger.info('Creating culled-to-base mapping')
ds_map_culled_to_base = map_culled_to_base(
ds_base=ds_base_mesh, ds_culled=ds_culled_mesh, workers=workers)

if logger is not None:
logger.info('Culling dataset')
ds_culled = ds
if 'nCells' in ds_culled.dims:
ds_culled = ds_culled.isel(
nCells=ds_map_culled_to_base['mapCulledToBaseCell'].values)
if 'nEdges' in ds_culled.dims:
ds_culled = ds_culled.isel(
nEdges=ds_map_culled_to_base['mapCulledToBaseEdge'].values)
if 'nVertices' in ds_culled.dims:
ds_culled = ds_culled.isel(
nVertices=ds_map_culled_to_base['mapCulledToBaseVertex'].values)

if logger is not None:
logger.info('Replacing variables from culled mesh')
for var in ds.data_vars:
if var in ds_culled_mesh:
if logger is not None:
logger.info(f' replacing: {var}')
# replace this field with the version from the culled mesh
ds_culled[var] = ds_culled_mesh[var]
else:
if logger is not None:
logger.info(f' keeping: {var}')

return ds_culled


def _map_culled_to_base_grid_type(ds_base, ds_culled, ds_map_culled_to_base,
dim, suffix, workers):
x_base = ds_base[f'x{suffix}'].values
y_base = ds_base[f'y{suffix}'].values
z_base = ds_base[f'z{suffix}'].values

x_culled = ds_culled[f'x{suffix}'].values
y_culled = ds_culled[f'y{suffix}'].values
z_culled = ds_culled[f'z{suffix}'].values

# create a map from lat-lon pairs to base-mesh cell indices
points = np.vstack((x_base, y_base, z_base)).T

tree = KDTree(points)

points = np.vstack((x_culled, y_culled, z_culled)).T

_, culled_to_base_map = tree.query(points, workers=workers)

ds_map_culled_to_base[f'mapCulledToBase{suffix}'] = \
((dim,), culled_to_base_map)

0 comments on commit b83f9d1

Please sign in to comment.