diff --git a/Changelog.rst b/Changelog.rst index b8de197a02..8eacae8af3 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -5,8 +5,13 @@ Version NEXTVERSION * Python 3.9 support removed (https://github.com/NCAS-CMS/cf-python/issues/896) +* Allow regridding for very large grids. New keyword parameter to + `cf.Field.regrids` and `cf.Field.regridc`: ``dst_grid_partitions`` + (https://github.com/NCAS-CMS/cf-python/issues/878) * Changed dependency: ``Python>=3.10.0`` +---- + Version 3.18.1 -------------- diff --git a/README.md b/README.md index ce752fd0af..d422dc0a07 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ of its array manipulation and can: * regrid structured grid, mesh and DSG field constructs with (multi-)linear, nearest neighbour, first- and second-order conservative and higher order patch recovery methods, including 3-d - regridding, + regridding, and large-grid support, * apply convolution filters to field constructs, * create running means from field constructs, * apply differential operators to field constructs, @@ -125,12 +125,8 @@ Visualization Powerful and flexible visualizations of `cf` field constructs, designed to be produced and configured in as few lines of code as possible, are available with the [cf-plot -package](https://ncas-cms.github.io/cf-plot/build/index.html), which -needs to be installed separately to the `cf` package. - -See the [cf-plot -gallery](https://ncas-cms.github.io/cf-plot/build/gallery.html) for a -range of plotting possibilities with example code. +package](https://ncas-cms.github.io/cf-plot), which needs to be +installed separately to the `cf` package. ![Example outputs of cf-plot displaying selected aspects of `cf` field constructs](https://raw.githubusercontent.com/NCAS-CMS/cf-plot/master/docs/source/images/cf_gallery_image.png) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 99b546400f..2536c4c606 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -507,10 +507,10 @@ def _regrid( # 'weights.indptr', 'weights.indices', and # 'weights.data' directly, rather than iterating # over rows of 'weights' and using - # 'weights.getrow'. Also, 'np.count_nonzero' is much - # faster than 'np.any' and 'np.all'. + # 'weights.getrow'. Also, `np.count_nonzero` is much + # faster than `np.any` and `np.all`. count_nonzero = np.count_nonzero - indptr = weights.indptr.tolist() + indptr = weights.indptr indices = weights.indices data = weights.data for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): @@ -529,8 +529,6 @@ def _regrid( w[mask] = 0 data[i0:i1] = w - del indptr - elif method in ("linear", "bilinear"): # 2) Linear methods: # @@ -549,23 +547,21 @@ def _regrid( # 'weights.indptr', 'weights.indices', and # 'weights.data' directly, rather than iterating # over rows of 'weights' and using - # 'weights.getrow'. Also, 'np.count_nonzero' is much - # faster than 'np.any' and 'np.all'. + # 'weights.getrow'. Also, `np.count_nonzero` is much + # faster than `np.any` and `np.all`. count_nonzero = np.count_nonzero where = np.where - indptr = weights.indptr.tolist() + indptr = weights.indptr indices = weights.indices - pos_data = weights.data >= min_weight + data = weights.data for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] if not count_nonzero(mask): continue - if where((mask) & (pos_data[i0:i1]))[0].size: + if where(data[i0:i1][mask] >= min_weight)[0].size: dst_mask[j] = True - del indptr, pos_data - elif method == "nearest_dtos": # 3) Nearest neighbour dtos method: # @@ -584,10 +580,10 @@ def _regrid( # 'weights.indptr', 'weights.indices', and # 'weights.data' directly, rather than iterating # over rows of 'weights' and using - # 'weights.getrow'. Also, 'np.count_nonzero' is much - # faster than 'np.any' and 'np.all'. + # 'weights.getrow'. Also, `np.count_nonzero` is much + # faster than `np.any` and `np.all`. count_nonzero = np.count_nonzero - indptr = weights.indptr.tolist() + indptr = weights.indptr indices = weights.indices for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] @@ -597,8 +593,6 @@ def _regrid( elif n_masked: weights.data[np.arange(i0, i1)[mask]] = 0 - del indptr - elif method in ( "patch", "conservative_2nd", diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 9006299d75..dfd65e28d1 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -74,7 +74,11 @@ weights with the source data. (Note that whilst the `esmpy` package is also able to create the regridded data from its weights, this feature can't be integrated with the `dask` - framework that underpins the field's data.)""", + framework that underpins the field's data.) + + The calculation of weights for large grids can have a very + high memory requirement, but this can be reduced by setting + the *dst_grid_partitions* parameter.""", # regrid Logging "{{regrid Logging}}": """**Logging** @@ -436,9 +440,10 @@ **Performance** - The computation of the weights can be much more costly - than the regridding itself, in which case reading - pre-calculated weights can improve performance. + The computation of the weights can take much longer, + and take much more memory, than the regridding itself, + in which case reading pre-calculated weights can + improve performance. Ignored if *dst* is a `RegridOperator`.""", # aggregated_units @@ -564,6 +569,43 @@ If True then do not perform the regridding, rather return the `esmpy.Regrid` instance that defines the regridding operation.""", + # dst_grid_partitions + "{{dst_grid_partitions: `int` or `str`, optional}}": """dst_grid_partitions: `int` or `str`, optional + Calculating the weights matrix for grids with a very large + number of source and/or destination grid points can + potentially require more memory than is + available. However, the memory requirement can be greatly + reduced by calculating weights separately for + non-overlapping partitions of the destination grid, and + then combining the weights from each partition to create + the final weights matrix. The more partitions there are, + the smaller the memory requirement for the weights + calculations, at the expense of the weights calculations + taking longer. + + The *dst_grid_partitions* parameter sets the number of + destination grid partitions for the weights + calculations. The default value is ``1``, i.e. one + partition for the entire destination grid, maximising + memory usage and minimising the calculation time. If the + string ``'maximum'`` is given then the largest possible + number of partitions of the destination grid will be used, + minimising memory usage and maximising the calculation + time. A positive integer specifies the exact number of + partitions, capped by the maximum allowed, allowing the + balance between memory usage and calculation time to be + adjusted. + + The actual number of destination grid partitions and each + partition's shape, and weights calculation time and memory + requirement are displayed when ``'DEBUG'`` logging is + activated. See *verbose* for details. + + .. note:: If setting *dst_grid_partitions* is required for + the regridding to work, then it is worth + considering storing the weights in a file for + fast future access, via the *weights_file* + parameter.""", # ---------------------------------------------------------------- # Method description substitutions (4 levels of indentation) # ---------------------------------------------------------------- diff --git a/cf/field.py b/cf/field.py index 937b4c383a..215a656734 100644 --- a/cf/field.py +++ b/cf/field.py @@ -382,14 +382,6 @@ def __getitem__(self, indices): (6, 4, 3) """ - debug = is_log_level_debug(logger) - - if debug: - logger.debug( - self.__class__.__name__ + ".__getitem__" - ) # pragma: no cover - logger.debug(f" input indices = {indices}") # pragma: no cover - if indices is Ellipsis: return self.copy() @@ -437,12 +429,6 @@ def __getitem__(self, indices): else: findices = indices - if debug: - logger.debug(f" shape = {shape}") # pragma: no cover - logger.debug(f" indices = {indices}") # pragma: no cover - logger.debug(f" indices2 = {indices2}") # pragma: no cover - logger.debug(f" findices = {findices}") # pragma: no cover - new_data = data[tuple(findices)] if 0 in new_data.shape: @@ -496,11 +482,6 @@ def __getitem__(self, indices): else: dice.append(slice(None)) - if debug: - logger.debug( - f" dice = {tuple(dice)}" - ) # pragma: no cover - # Generally we do not apply an ancillary mask to the # metadata items, but for DSGs we do. if ancillary_mask and new.DSG: @@ -12985,6 +12966,7 @@ def regrids( ln_z=None, verbose=None, return_esmpy_regrid_operator=False, + dst_grid_partitions=1, inplace=False, i=False, _compute_field_mass=None, @@ -13229,6 +13211,17 @@ def regrids( .. versionadded:: 3.16.2 + {{dst_grid_partitions: `int` or `str`, optional}} + + The maximum number of partitions, Nmax, depends on the + nature of the destination grid: If the Z axis is being + regridded, Nmax = the size of the Z axis. For a 2-d + structured grid, Nmax = the size of the Y axis. For a + UGRID, HEALPix, or DSG grid, Nmax = the size of the + horizontal discrete axis. + + .. versionadded:: NEXTVERSION + axis_order: sequence, optional Deprecated at version 3.14.0. @@ -13322,11 +13315,13 @@ def regrids( z=z, ln_z=ln_z, return_esmpy_regrid_operator=return_esmpy_regrid_operator, + dst_grid_partitions=dst_grid_partitions, inplace=inplace, ) @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) + @_manage_log_level_via_verbosity def regridc( self, dst, @@ -13346,6 +13341,8 @@ def regridc( z=None, ln_z=None, return_esmpy_regrid_operator=False, + dst_grid_partitions=1, + verbose=None, inplace=False, i=False, _compute_field_mass=None, @@ -13525,6 +13522,19 @@ def regridc( .. versionadded:: 3.16.2 + {{dst_grid_partitions: `int` or `str`, optional}} + + Partitioning is only available for 2-d or 3-d + regridding. The maximum number of partitions is the + size of the first of the destination grid axes + specified by the *axes* parameter. + + .. versionadded:: NEXTVERSION + + {{verbose: `int` or `str` or `None`, optional}} + + .. versionadded:: NEXTVERSION + axis_order: sequence, optional Deprecated at version 3.14.0. @@ -13617,6 +13627,7 @@ def regridc( z=z, ln_z=ln_z, return_esmpy_regrid_operator=return_esmpy_regrid_operator, + dst_grid_partitions=dst_grid_partitions, inplace=inplace, ) diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index c6dc1cba07..8e103173bf 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -1,7 +1,7 @@ import logging import numpy as np -from cfdm import is_log_level_debug, is_log_level_info +from cfdm import is_log_level_info from ..data import Data from ..decorators import ( @@ -81,15 +81,6 @@ def __getitem__(self, indices): else: findices = tuple(indices) - cname = self.__class__.__name__ - if is_log_level_debug(logger): - logger.debug( - f"{cname}.__getitem__: shape = {self.shape}\n" - f"{cname}.__getitem__: indices2 = {indices2}\n" - f"{cname}.__getitem__: indices = {indices}\n" - f"{cname}.__getitem__: findices = {findices}" - ) # pragma: no cover - data = self.get_data(None, _fill_value=False) if data is not None: new_data = data[findices] @@ -133,12 +124,6 @@ def __getitem__(self, indices): mask.insert_dimension(-1) for mask in findices[1] ] - if is_log_level_debug(logger): - logger.debug( - f"{self.__class__.__name__}.__getitem__: findices for " - f"bounds = {tuple(findices)}" - ) # pragma: no cover - new.bounds.set_data(bounds_data[tuple(findices)], copy=False) # Remove the direction, as it may now be wrong diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index aa696b95a1..3feae9687c 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -9,25 +9,15 @@ import numpy as np from cfdm import is_log_level_debug -from ..functions import DeprecationError, regrid_logging +from ..functions import DeprecationError, free_memory, regrid_logging from ..units import Units from .regridoperator import RegridOperator -# ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow -# either for now for backwards compatibility. -esmpy_imported = False +esmpy_imported = True try: import esmpy - - esmpy_imported = True except ImportError: - try: - # Take the new name to use in preference to the old one. - import ESMF as esmpy - - esmpy_imported = True - except ImportError: - pass + esmpy_imported = False logger = logging.getLogger(__name__) @@ -79,6 +69,9 @@ class Grid: # The shape of the regridding axes, in the same order as the # 'axis_keys' attribute. E.g. (96, 73) or (1243,) shape: tuple = None + # The shape of the regridding axes in `esmpy` order. E.g. (73, 96) + # or (1243,) + esmpy_shape: tuple = None # The regrid axis coordinates, in the order expected by # `esmpy`. If the coordinates are 2-d (or more) then the axis # order of each coordinate object must be as expected by `esmpy`. @@ -92,9 +85,13 @@ class Grid: cyclic: Any = None # The regridding method. method: str = "" - # If True then, for 1-d regridding, the esmpy weights are generated - # for a 2-d grid for which one of the dimensions is a size 2 dummy - # dimension. + # If True then, for Cartesian 1-d conservative regridding, the + # esmpy weights are generated for a 2-d grid for which one of the + # dimensions is a size 1 dummy dimension. + dummy_size_1_dimension: bool = False + # If True then, for Cartesian 1-d non-conservative regridding, the + # esmpy weights are generated for a 2-d grid for which one of the + # dimensions is a size 2 dummy dimension. dummy_size_2_dimension: bool = False # Whether or not the grid is a structured grid. is_grid: bool = False @@ -154,6 +151,7 @@ def regrid( min_weight=None, weights_file=None, return_esmpy_regrid_operator=False, + dst_grid_partitions=1, inplace=False, ): """Regrid a field to a new spherical or Cartesian grid. @@ -323,6 +321,19 @@ def regrid( .. versionadded:: 3.16.2 + dst_grid_partitions: `int` or `str`, optional + The number of destination grid partitions for the weights + calculations. If the string ``'maximum'`` is given then + the largest possible number of partitions of the + destination grid will be used. A positive integer + specifies the exact number of partitions, capped by the + maximum allowed. + + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. + + .. versionadded:: NEXTVERSION + :Returns: `Field` or `None` or `RegridOperator` or `esmpy.Regrid` @@ -335,6 +346,8 @@ def regrid( operation is returned instead. """ + debug = is_log_level_debug(logger) + if not inplace: src = src.copy() @@ -404,6 +417,20 @@ def regrid( f"using the {method!r} regridding method." ) + if dst_grid_partitions != 1: + if method in ("conservative_2nd", "nearest_dtos"): + raise ValueError( + f"Can't use destination grid partitioning for {method!r} " + f"regridding. Got dst_grid_partitions={dst_grid_partitions!r}" + ) + + if return_esmpy_regrid_operator: + raise ValueError( + "Can't use destination grid partitioning when " + "return_esmpy_regrid_operator=True. " + f"Got dst_grid_partitions={dst_grid_partitions!r}" + ) + if cartesian: if isinstance(axes, str): axes = (axes,) @@ -494,7 +521,7 @@ def regrid( ln_z=ln_z, ) - if is_log_level_debug(logger): + if debug: logger.debug( f"Source Grid:\n{src_grid}\n\nDestination Grid:\n{dst_grid}\n" ) # pragma: no cover @@ -566,7 +593,19 @@ def regrid( dst_mask = None # Create the destination esmpy.Grid - dst_esmpy_grid = create_esmpy_grid(dst_grid, grid_dst_mask) + if dst_grid.is_mesh: + dst_esmpy_grids = create_esmpy_mesh( + dst_grid, grid_dst_mask, dst_grid_partitions + ) + elif dst_grid.is_locstream: + dst_esmpy_grids = create_esmpy_locstream( + dst_grid, grid_dst_mask, dst_grid_partitions + ) + else: + dst_esmpy_grids = create_esmpy_grid( + dst_grid, grid_dst_mask, dst_grid_partitions + ) + del grid_dst_mask # Create a mask for the source grid @@ -591,27 +630,36 @@ def regrid( grid_src_mask = src_mask # Create the source esmpy.Grid - src_esmpy_grid = create_esmpy_grid(src_grid, grid_src_mask) - del grid_src_mask + if src_grid.is_mesh: + src_esmpy_grids = create_esmpy_mesh(src_grid, grid_src_mask) + elif src_grid.is_locstream: + src_esmpy_grids = create_esmpy_locstream(src_grid, grid_src_mask) + else: + src_esmpy_grids = create_esmpy_grid(src_grid, grid_src_mask) - if is_log_level_debug(logger): - logger.debug( - f"Source ESMF Grid:\n{src_esmpy_grid}\n\nDestination ESMF Grid:\n{dst_esmpy_grid}\n" - ) # pragma: no cover + del grid_src_mask esmpy_regrid_operator = [] if return_esmpy_regrid_operator else None + # Find the actual number of partitions + requested_dst_grid_partitions = dst_grid_partitions + dst_grid_partitions = partitions( + dst_grid, requested_dst_grid_partitions, return_n=True + ) + # Create regrid weights weights, row, col, start_index, from_file = create_esmpy_weights( method, - src_esmpy_grid, - dst_esmpy_grid, + src_esmpy_grids, + dst_esmpy_grids, src_grid=src_grid, dst_grid=dst_grid, ignore_degenerate=ignore_degenerate, quarter=src_grid.dummy_size_2_dimension, esmpy_regrid_operator=esmpy_regrid_operator, weights_file=weights_file, + dst_grid_partitions=dst_grid_partitions, + requested_dst_grid_partitions=requested_dst_grid_partitions, ) if return_esmpy_regrid_operator: @@ -655,6 +703,7 @@ def regrid( dst=dst, weights_file=weights_file if from_file else None, src_mesh_location=src_grid.mesh_location, + dst_mesh_location=dst_grid.mesh_location, src_featureType=src_grid.featureType, dst_featureType=dst_grid.featureType, src_z=src_grid.z, @@ -674,15 +723,29 @@ def regrid( ) if return_operator: - # Note: The `RegridOperator.tosparse` method will also set - # 'dst_mask' to False for destination points with all - # zero weights. - regrid_operator.tosparse() + if regrid_operator.weights is not None: + regrid_operator.tosparse() + + if debug: + logger.debug( + "Sparse weights array for all partitions:\n" + f"{regrid_operator.weights!r}\n" + f"{regrid_operator.weights.__dict__}" + ) # pragma: no cover + return regrid_operator # ---------------------------------------------------------------- # Still here? Then do the regridding # ---------------------------------------------------------------- + from scipy.sparse import issparse + + if debug and issparse(regrid_operator.weights): + logger.debug( + f"Sparse weights: {regrid_operator.weights!r}\n" + f" {regrid_operator.weights.__dict__}" + ) # pragma: no cover + if src_grid.n_regrid_axes == dst_grid.n_regrid_axes: regridded_axis_sizes = { src_iaxis: (dst_size,) @@ -690,6 +753,7 @@ def regrid( src_grid.axis_indices, dst_grid.shape ) } + elif src_grid.n_regrid_axes == 1: # Fewer source grid axes than destination grid axes (e.g. mesh # regridded to lat/lon). @@ -1414,6 +1478,7 @@ def spherical_grid( n_regrid_axes=n_regrid_axes, dimensionality=regridding_dimensionality, shape=shape, + esmpy_shape=shape[::-1], coords=coords, bounds=get_bounds(method, coords, mesh_location), cyclic=cyclic, @@ -1607,6 +1672,7 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): bounds = get_bounds(method, coords, mesh_location) + dummy_size_1_dimension = False dummy_size_2_dimension = False if not (mesh_location or featureType) and len(coords) == 1: # Create a dummy axis because esmpy doesn't like creating @@ -1617,6 +1683,7 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): # size 1 coords.append(np.array([0.0])) bounds.append(np.array([data])) + dummy_size_1_dimension = True else: # For linear regridding the extra dimension must be size 2 coords.append(data) @@ -1631,6 +1698,8 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): is_locstream = bool(featureType) is_grid = not is_mesh and not is_locstream + shape = tuple(axis_sizes) + grid = Grid( name=name, coord_sys="Cartesian", @@ -1640,10 +1709,12 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): axes=axis_keys, n_regrid_axes=n_regrid_axes, dimensionality=regridding_dimensionality, - shape=tuple(axis_sizes), + shape=shape, + esmpy_shape=shape[::-1], coords=coords, bounds=bounds, cyclic=cyclic, + dummy_size_1_dimension=dummy_size_1_dimension, dummy_size_2_dimension=dummy_size_2_dimension, is_mesh=is_mesh, is_locstream=is_locstream, @@ -1868,8 +1939,8 @@ def esmpy_initialise(): return esmpy.Manager(debug=bool(regrid_logging())) -def create_esmpy_grid(grid, mask=None): - """Create an `esmpy.Grid` or `esmpy.Mesh`. +def create_esmpy_grid(grid, mask=None, grid_partitions=1): + """Create an `esmpy.Grid`. .. versionadded:: 3.14.0 @@ -1886,21 +1957,21 @@ def create_esmpy_grid(grid, mask=None): :Returns: - `esmpy.Grid` or `esmpy.Mesh` - The `esmpy.Grid` or `esmpy.Mesh` derived from *grid*. + generator + An iterator through the `esmpy.Grid` instances for each + grid partition. """ - if grid.is_mesh: - # Create an `esmpy.Mesh` - return create_esmpy_mesh(grid, mask) + debug = is_log_level_debug(logger) - if grid.is_locstream: - # Create an `esmpy.LocStream` - return create_esmpy_locstream(grid, mask) + if mask is not None: + if mask.dtype != bool: + raise ValueError( + "'mask' must be None or a Boolean array. Got: " + f"dtype={mask.dtype}" + ) # Create an `esmpy.Grid` - coords = grid.coords - bounds = grid.bounds cyclic = grid.cyclic num_peri_dims = 0 @@ -1917,209 +1988,242 @@ def create_esmpy_grid(grid, mask=None): spherical = False coord_sys = esmpy.CoordSys.CART - # Parse coordinates for the esmpy.Grid, and get its shape. - n_axes = len(coords) - coords = [np.asanyarray(c) for c in coords] - shape = [None] * n_axes - for dim, c in enumerate(coords[:]): - ndim = c.ndim - if ndim == 1: - # 1-d - shape[dim] = c.size - c = c.reshape([c.size if i == dim else 1 for i in range(n_axes)]) - elif ndim == 2: - # 2-d lat or lon - shape[:ndim] = c.shape - if n_axes == 3: - c = c.reshape(c.shape + (1,)) - elif ndim == 3: - # 3-d Z - shape[:ndim] = c.shape - else: - raise ValueError( - f"Can't create an esmpy.Grid from coordinates with {ndim} " - f"dimensions: {c!r}" - ) + # Create the esmpy.Grid for each partition + for i, partition in enumerate(partitions(grid, grid_partitions)): + coords = grid.coords[:] + bounds = grid.bounds[:] + + if partition is not None: + # Subspace the coordinates for this partition + if debug: + logger.debug( + f"Partition {i}: Index: {partition}" + ) # pragma: no cover + + ndim = coords[-1].ndim + if ndim == 1: + # Each coordinate spans a different axis, and we're + # only partitioning the axis of the last coordinate in + # the list. + coords[-1] = coords[-1][partition] + if bounds: + bounds[-1] = bounds[-1][partition] + else: + # All coordinates span the same axes + coords = [c[partition] for c in coords] + if bounds: + bounds = [b[partition] for b in bounds] + + # Parse coordinates for the esmpy.Grid, and get its shape. + coords = [np.asanyarray(c) for c in coords] + n_axes = len(coords) + shape = [None] * n_axes + for dim, c in enumerate(coords[:]): + ndim = c.ndim + if ndim == 1: + # 1-d + shape[dim] = c.size + c = c.reshape( + [c.size if i == dim else 1 for i in range(n_axes)] + ) + elif ndim == 2: + # 2-d lat or lon + shape[:ndim] = c.shape + if n_axes == 3: + c = c.reshape(c.shape + (1,)) + elif ndim == 3: + # 3-d Z + shape[:ndim] = c.shape + else: + raise ValueError( + "Can't create an esmpy.Grid from coordinates with " + f"{ndim} dimensions: {c!r}" + ) - coords[dim] = c + coords[dim] = c - # Parse bounds for the esmpy.Grid - if bounds: - bounds = [np.asanyarray(b) for b in bounds] + # Parse bounds for the esmpy.Grid + if bounds: + bounds = [np.asanyarray(b) for b in bounds] - if spherical: - bounds[lat] = np.clip(bounds[lat], -90, 90) - if not contiguous_bounds(bounds[lat]): - raise ValueError( - f"The {grid.name} latitude coordinates must have " - f"contiguous, non-overlapping bounds for {grid.method} " - "regridding." - ) + if spherical: + bounds[lat] = np.clip(bounds[lat], -90, 90) + if not contiguous_bounds(bounds[lat]): + raise ValueError( + f"The {grid.name} latitude coordinates must have " + "contiguous, non-overlapping bounds for " + f"{grid.method} regridding." + ) - if not contiguous_bounds(bounds[lon], cyclic=cyclic, period=360): - raise ValueError( - f"The {grid.name} longitude coordinates must have " - f"contiguous, non-overlapping bounds for {grid.method} " - "regridding." - ) - else: - # Cartesian - for b in bounds: - if not contiguous_bounds(b): + if not contiguous_bounds( + bounds[lon], cyclic=cyclic, period=360 + ): raise ValueError( - f"The {grid.name} coordinates must have contiguous, " - "non-overlapping bounds for " + f"The {grid.name} longitude coordinates must have " + "contiguous, non-overlapping bounds for " f"{grid.method} regridding." ) + else: + # Cartesian + for b in bounds: + if not contiguous_bounds(b): + raise ValueError( + f"The {grid.name} coordinates must have " + "contiguous, non-overlapping bounds for " + f"{grid.method} regridding." + ) + + # Convert each bounds to a grid with no repeated values + for dim, b in enumerate(bounds[:]): + ndim = b.ndim + if ndim == 2: + # Bounds for 1-d coordinates. + # + # E.g. if the esmpy.Grid is (X, Y) then for + # non-cyclic bounds we create a new bounds + # array with shape (97, 1); and for + # non-cyclic bounds we create a new bounds + # array with shape (1, 74). When multiplied, + # these arrays would create the 2-d (97, 74) + # bounds grid expected by esmpy.Grid. + # + # Note that if the X axis were cyclic, then its + # new bounds array would have shape (96, 1). + if spherical and cyclic and dim == lon: + tmp = b[:, 0] + else: + n = b.shape[0] + tmp = np.empty((n + 1,), dtype=b.dtype) + tmp[:n] = b[:, 0] + tmp[n] = b[-1, 1] - # Convert each bounds to a grid with no repeated values - for dim, b in enumerate(bounds[:]): - ndim = b.ndim - if ndim == 2: - # Bounds for 1-d coordinates. - # - # E.g. if the esmpy.Grid is (X, Y) then for non-cyclic - # bounds we create a new bounds array with - # shape (97, 1); and for non-cyclic bounds we - # create a new bounds array with shape (1, - # 74). When multiplied, these arrays would create - # the 2-d (97, 74) bounds grid expected by - # esmpy.Grid. - # - # Note that if the X axis were cyclic, then its - # new bounds array would have shape (96, 1). - if spherical and cyclic and dim == lon: - tmp = b[:, 0] - else: - n = b.shape[0] - tmp = np.empty((n + 1,), dtype=b.dtype) - tmp[:n] = b[:, 0] - tmp[n] = b[-1, 1] - - tmp = tmp.reshape( - [tmp.size if i == dim else 1 for i in range(n_axes)] - ) - elif ndim == 3: - # Bounds for 2-d coordinates - # - # E.g. if the esmpy.Grid is (X, Y) then for bounds with - # a non-cyclic X axis, we create a new bounds - # array with shape (97, 74). - # - # Note that if the X axis were cyclic, then the - # new bounds array would have shape (96, 74). - n, m = b.shape[:2] - if spherical and cyclic: - tmp = np.empty((n, m + 1), dtype=b.dtype) - tmp[:, :m] = b[:, :, 0] - if dim == lon: - tmp[:, m] = b[:, -1, 0] + tmp = tmp.reshape( + [tmp.size if i == dim else 1 for i in range(n_axes)] + ) + elif ndim == 3: + # Bounds for 2-d coordinates + # + # E.g. if the esmpy.Grid is (X, Y) then for bounds + # with a non-cyclic X axis, we + # create a new bounds array with shape (97, + # 74). + # + # Note that if the X axis were cyclic, then + # the new bounds array would have shape (96, + # 74). + n, m = b.shape[:2] + if spherical and cyclic: + tmp = np.empty((n, m + 1), dtype=b.dtype) + tmp[:, :m] = b[:, :, 0] + if dim == lon: + tmp[:, m] = b[:, -1, 0] + else: + tmp[:, m] = b[:, -1, 1] else: - tmp[:, m] = b[:, -1, 1] - else: - tmp = np.empty((n + 1, m + 1), dtype=b.dtype) - tmp[:n, :m] = b[:, :, 0] - tmp[:n, m] = b[:, -1, 1] - tmp[n, :m] = b[-1, :, 3] - tmp[n, m] = b[-1, -1, 2] + tmp = np.empty((n + 1, m + 1), dtype=b.dtype) + tmp[:n, :m] = b[:, :, 0] + tmp[:n, m] = b[:, -1, 1] + tmp[n, :m] = b[-1, :, 3] + tmp[n, m] = b[-1, -1, 2] - if n_axes == 3: - tmp = tmp.reshape(tmp.shape + (1,)) + if n_axes == 3: + tmp = tmp.reshape(tmp.shape + (1,)) - elif ndim == 4: - # Bounds for 3-d coordinates - raise ValueError( - f"Can't do {grid.method} 3-d {grid.coord_sys} regridding " - f"with {grid.coord_sys} 3-d coordinates " - f"{coords[z].identity!r}." - ) + elif ndim == 4: + # Bounds for 3-d coordinates + raise ValueError( + f"Can't do {grid.method} 3-d {grid.coord_sys} " + f"regridding with {grid.coord_sys} 3-d coordinates " + f"{coords[z].identity!r}." + ) - bounds[dim] = tmp - - # Define the esmpy.Grid stagger locations. For details see - # - # 2-d: - # https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node5.html#fig:gridstaggerloc2d - # - # 3-d: - # https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node5.html#fig:gridstaggerloc3d - if bounds: - if n_axes == 3: - staggerlocs = [ - esmpy.StaggerLoc.CENTER_VCENTER, - esmpy.StaggerLoc.CORNER_VFACE, - ] - else: - staggerlocs = [esmpy.StaggerLoc.CORNER, esmpy.StaggerLoc.CENTER] - else: - if n_axes == 3: - staggerlocs = [esmpy.StaggerLoc.CENTER_VCENTER] - else: - staggerlocs = [esmpy.StaggerLoc.CENTER] - - # Create an empty esmpy.Grid - esmpy_grid = esmpy.Grid( - max_index=np.array(shape, dtype="int32"), - coord_sys=coord_sys, - num_peri_dims=num_peri_dims, - periodic_dim=periodic_dim, - staggerloc=staggerlocs, - ) + bounds[dim] = tmp - # Populate the esmpy.Grid centres - for dim, c in enumerate(coords): - if n_axes == 3: - grid_centre = esmpy_grid.get_coords( - dim, staggerloc=esmpy.StaggerLoc.CENTER_VCENTER - ) + # Define the esmpy.Grid stagger locations. For details see + # + # 2-d: + # https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node5.html#fig:gridstaggerloc2d + # + # 3-d: + # https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node5.html#fig:gridstaggerloc3d + if bounds: + if n_axes == 3: + staggerlocs = [ + esmpy.StaggerLoc.CENTER_VCENTER, + esmpy.StaggerLoc.CORNER_VFACE, + ] + else: + staggerlocs = [ + esmpy.StaggerLoc.CORNER, + esmpy.StaggerLoc.CENTER, + ] else: - grid_centre = esmpy_grid.get_coords( - dim, staggerloc=esmpy.StaggerLoc.CENTER - ) + if n_axes == 3: + staggerlocs = [esmpy.StaggerLoc.CENTER_VCENTER] + else: + staggerlocs = [esmpy.StaggerLoc.CENTER] - grid_centre[...] = c + # Create an empty esmpy.Grid + esmpy_grid = esmpy.Grid( + max_index=np.array(shape, dtype="int32"), + coord_sys=coord_sys, + num_peri_dims=num_peri_dims, + periodic_dim=periodic_dim, + staggerloc=staggerlocs, + ) - # Populate the esmpy.Grid corners - if bounds: - if n_axes == 3: - staggerloc = esmpy.StaggerLoc.CORNER_VFACE - else: - staggerloc = esmpy.StaggerLoc.CORNER + # Populate the esmpy.Grid centres + for dim, c in enumerate(coords): + if n_axes == 3: + grid_centre = esmpy_grid.get_coords( + dim, staggerloc=esmpy.StaggerLoc.CENTER_VCENTER + ) + else: + grid_centre = esmpy_grid.get_coords( + dim, staggerloc=esmpy.StaggerLoc.CENTER + ) - for dim, b in enumerate(bounds): - grid_corner = esmpy_grid.get_coords(dim, staggerloc=staggerloc) - grid_corner[...] = b + grid_centre[...] = c - # Add an esmpy.Grid mask - if mask is not None: - if mask.dtype != bool: - raise ValueError( - "'mask' must be None or a Boolean array. Got: " - f"dtype={mask.dtype}" - ) + # Populate the esmpy.Grid corners + if bounds: + if n_axes == 3: + staggerloc = esmpy.StaggerLoc.CORNER_VFACE + else: + staggerloc = esmpy.StaggerLoc.CORNER - if not mask.any(): - mask = None + for dim, b in enumerate(bounds): + grid_corner = esmpy_grid.get_coords(dim, staggerloc=staggerloc) + grid_corner[...] = b - if mask is not None: - grid_mask = esmpy_grid.add_item(esmpy.GridItem.MASK) - if len(grid.coords) == 2 and mask.ndim == 1: - # esmpy grid has a dummy size 1 dimension, so we need to - # include this in the mask as well. - mask = np.expand_dims(mask, 1) + # Add an esmpy.Grid mask + mask1 = mask + if mask1 is not None: + if partition is not None: + mask1 = mask1[..., *partition] - # Note: 'mask' has True/False for masked/unmasked - # elements, but the esmpy mask requires 0/1 for - # masked/unmasked elements. - grid_mask[...] = np.invert(mask).astype("int32") + if not mask1.any(): + mask1 = None - return esmpy_grid + if mask1 is not None: + grid_mask = esmpy_grid.add_item(esmpy.GridItem.MASK) + if len(grid.coords) == 2 and mask1.ndim == 1: + # esmpy grid has a dummy size 1 dimension, so we + # need to include this in the mask as well. + mask1 = np.expand_dims(mask1, 1) + # Note: 'mask1' has True/False for masked/unmasked + # elements, but the esmpy mask requires 0/1 for + # masked/unmasked elements. + grid_mask[...] = np.invert(mask1).astype("int32") -def create_esmpy_mesh(grid, mask=None): + yield esmpy_grid + + +def create_esmpy_mesh(grid, mask=None, grid_partitions=1): """Create an `esmpy.Mesh`. .. versionadded:: 3.16.0 @@ -2138,98 +2242,126 @@ def create_esmpy_mesh(grid, mask=None): :Returns: - `esmpy.Mesh` - The `esmpy.Mesh` derived from *grid*. + generator + An iterator through the `esmpy.Mesh` instances for each + grid partition. """ + debug = is_log_level_debug(logger) + if grid.mesh_location != "face": raise ValueError( f"Can't regrid {'from' if grid.name == 'source' else 'to'} " f"a {grid.name} grid of UGRID {grid.mesh_location!r} cells" ) + if mask is not None: + if mask.dtype != bool: + raise ValueError( + "'mask' must be None or a Boolean array. " + f"Got: dtype={mask.dtype}" + ) + if grid.coord_sys == "spherical": coord_sys = esmpy.CoordSys.SPH_DEG else: # Cartesian coord_sys = esmpy.CoordSys.CART - # Create an empty esmpy.Mesh - esmpy_mesh = esmpy.Mesh( - parametric_dim=2, spatial_dim=2, coord_sys=coord_sys - ) - - element_conn = grid.domain_topology.normalise().array - element_count = element_conn.shape[0] - element_types = np.ma.count(element_conn, axis=1) - element_conn = np.ma.compressed(element_conn) + # Create the esmpy.Mesh for each partition + for i, partition in enumerate(partitions(grid, grid_partitions)): + # Initialise the esmpy.Mesh + esmpy_mesh = esmpy.Mesh( + parametric_dim=2, spatial_dim=2, coord_sys=coord_sys + ) - # Element coordinates - if grid.coords: - try: - element_coords = [c.array for c in grid.coords] - except AttributeError: - # The coordinate constructs have no data - element_coords = None + grid_coords = grid.coords + grid_bounds = grid.bounds + domain_topology = grid.domain_topology + if partition is not None: + # Subspace the coordinates and domain topology for this + # partition + if debug: + logger.debug( + f"Partition {i} index: {partition}" + ) # pragma: no cover + + # All coordinates and the domain topology span the same + # discrete axis + grid_coords = [c[partition] for c in grid_coords] + grid_bounds = [b[partition] for b in grid_bounds] + domain_topology = domain_topology[partition] + + element_conn = domain_topology.normalise(start_index=0).array + element_count = element_conn.shape[0] + element_types = np.ma.count(element_conn, axis=1) + element_conn = np.ma.compressed(element_conn) + + # Element coordinates + if grid_coords: + try: + element_coords = [c.array for c in grid_coords] + except AttributeError: + # The coordinate constructs have no data + element_coords = None + else: + element_coords = np.stack(element_coords, axis=-1) else: - element_coords = np.stack(element_coords, axis=-1) - else: - element_coords = None - - node_ids, index = np.unique(element_conn, return_index=True) - node_coords = [b.data.compressed().array[index] for b in grid.bounds] - node_coords = np.stack(node_coords, axis=-1) - node_count = node_ids.size - node_owners = np.zeros(node_count) - - # Make sure that node IDs are >= 1, as needed by newer versions of - # esmpy. - min_id = node_ids.min() - if min_id < 1: - node_ids = node_ids + min_id + 1 - - # Add nodes. This must be done before `add_elements`. - esmpy_mesh.add_nodes( - node_count=node_count, - node_ids=node_ids, - node_coords=node_coords, - node_owners=node_owners, - ) + element_coords = None - # Mask - if mask is not None: - if mask.dtype != bool: - raise ValueError( - "'mask' must be None or a Boolean array. " - f"Got: dtype={mask.dtype}" - ) + node_ids, index = np.unique(element_conn, return_index=True) + node_coords = [b.data.compressed().array[index] for b in grid_bounds] + node_coords = np.stack(node_coords, axis=-1) + node_count = node_ids.size + node_owners = np.zeros(node_count, "int32") + + # esmpy requires that node IDs are >= 1 (they're currently >= + # 0 due to using 'start_index=0' above) + node_ids += 1 + + # Add nodes. This must be done before `add_elements`. + esmpy_mesh.add_nodes( + node_count=node_count, + node_ids=node_ids, + node_coords=node_coords, + node_owners=node_owners, + ) - # Note: 'mask' has True/False for masked/unmasked elements, - # but the esmpy mask requires 0/1 for masked/unmasked - # elements. - mask = np.invert(mask).astype("int32") - if mask.all(): - # There are no masked elements - mask = None - - # Add elements. This must be done after `add_nodes`. - # - # Note: The element_ids should start at 1, since when writing the - # weights to a file, these ids are used for the column - # indices. - esmpy_mesh.add_elements( - element_count=element_count, - element_ids=np.arange(1, element_count + 1), - element_types=element_types, - element_conn=element_conn, - element_mask=mask, - element_area=None, - element_coords=element_coords, - ) - return esmpy_mesh + # Mask + mask1 = mask + if mask1 is not None: + if partition is not None: + # The mask spans the same discrete axis as the + # coordinates + mask1 = mask1[partition] + + # Note: 'mask1' has True/False for masked/unmasked + # elements, but the esmpy mask requires 0/1 for + # masked/unmasked elements. + mask1 = np.invert(mask1).astype("int32") + if mask1.all(): + # There are no masked elements + mask1 = None + + # Add elements. This must be done after `add_nodes`. + # + # Note: The element_ids should start at 1, since when writing the + # weights to a file, these ids are used for the column + # indices. + esmpy_mesh.add_elements( + element_count=element_count, + element_ids=np.arange(1, element_count + 1), + element_types=element_types, + element_conn=element_conn, + element_mask=mask1, + element_area=None, + element_coords=element_coords, + ) + + yield esmpy_mesh -def create_esmpy_locstream(grid, mask=None): +def create_esmpy_locstream(grid, mask=None, grid_partitions=1): """Create an `esmpy.LocStream`. .. versionadded:: 3.16.2 @@ -2248,10 +2380,20 @@ def create_esmpy_locstream(grid, mask=None): :Returns: - `esmpy.LocStream` - The `esmpy.LocStream` derived from *grid*. + generator + An iterator through the `esmpy.LocStream` instances for + each grid partition. """ + debug = is_log_level_debug(logger) + + if mask is not None: + if mask.dtype != bool: + raise ValueError( + "'mask' must be None or a Boolean array. " + f"Got: dtype={mask.dtype}" + ) + if grid.coord_sys == "spherical": coord_sys = esmpy.CoordSys.SPH_DEG keys = ("ESMF:Lon", "ESMF:Lat", "ESMF:Radius") @@ -2260,54 +2402,69 @@ def create_esmpy_locstream(grid, mask=None): coord_sys = esmpy.CoordSys.CART keys = ("ESMF:X", "ESMF:Y", "ESMF:Z") - # Create an empty esmpy.LocStream - location_count = grid.shape[0] - esmpy_locstream = esmpy.LocStream( - location_count=location_count, - coord_sys=coord_sys, - name=grid.featureType, - ) + # Create the esmpy.LocStream for each partition + for i, partition in enumerate(partitions(grid, grid_partitions)): + coords = grid.coords + if partition is not None: + # Subspace the coordinates for this partition + if debug: + logger.debug( + f"Partition {i} index: {partition}" + ) # pragma: no cover + + # All coordinates span the same discrete axis + coords = [c[partition] for c in grid.coords] + + # Create an empty esmpy.LocStream + location_count = coords[0].size + esmpy_locstream = esmpy.LocStream( + location_count=location_count, + coord_sys=coord_sys, + name=grid.featureType, + ) - # Add coordinates (must be of type float64) - for coord, key in zip(grid.coords, keys): - esmpy_locstream[key] = coord.array.astype(float) + # Add coordinates (must be of type float64) + for coord, key in zip(coords, keys): + esmpy_locstream[key] = coord.array.astype(float) - # Add mask (always required, and must be of type int32) - if mask is not None: - if mask.dtype != bool: - raise ValueError( - "'mask' must be None or a Boolean array. " - f"Got: dtype={mask.dtype}" - ) + # Add mask (always required, and must be of type int32) + mask1 = mask + if mask1 is not None: + if partition is not None: + # The mask spans the same discrete axis as the + # coordinates + mask1 = mask1[partition] - # Note: 'mask' has True/False for masked/unmasked elements, - # but the esmpy mask requires 0/1 for masked/unmasked - # elements. - mask = np.invert(mask).astype("int32") - if mask.size == 1: - # Make sure that there's a mask element for each point in - # the locstream (rather than a scalar that applies to all - # elements). - mask = np.full((location_count,), mask, dtype="int32") - else: - # No masked points - mask = np.full((location_count,), 1, dtype="int32") + # Note: 'mask1' has True/False for masked/unmasked + # elements, but the esmpy mask requires 0/1 for + # masked/unmasked elements. + mask1 = np.invert(mask1).astype("int32") + if mask1.size == 1: + # Make sure that there's a mask element for each point in + # the locstream (rather than a scalar that applies to all + # elements). + mask1 = np.full((location_count,), mask1, dtype="int32") + else: + # No masked points + mask1 = np.full((location_count,), 1, dtype="int32") - esmpy_locstream["ESMF:Mask"] = mask + esmpy_locstream["ESMF:Mask"] = mask1 - return esmpy_locstream + yield esmpy_locstream def create_esmpy_weights( method, - src_esmpy_grid, - dst_esmpy_grid, + src_esmpy_grids, + dst_esmpy_grids, src_grid, dst_grid, ignore_degenerate, quarter=False, esmpy_regrid_operator=None, weights_file=None, + dst_grid_partitions=1, + requested_dst_grid_partitions=1, ): """Create the `esmpy` regridding weights. @@ -2318,10 +2475,10 @@ def create_esmpy_weights( method: `str` The regridding method. - src_esmpy_grid: `esmpy.Grid` + src_esmpy_grids: sequence of `esmpy.Grid` The source grid. - dst_esmpy_grid: `esmpy.Grid` + dst_esmpy_gridss: sequence of `esmpy.Grid` The destination grid. src_grid: `Grid` @@ -2362,30 +2519,54 @@ def create_esmpy_weights( .. versionadded:: 3.15.2 + dst_grid_partitions: `int`, optional + The actual number of destination grid partitions to be + used. + + .. versionadded:: NEXTVERSION + + requested_dst_grid_partitions: `int` or `str`, optional + The requested number of destination grid + partitions. Either an integer, or ``'maximum'``. + + .. versionadded:: NEXTVERSION + :Returns: 5-`tuple` + * weights: Either the 1-d `numpy` array of the regridding - weights. Or `None` if the regridding weights are to - be read from a file. - * row: The 1-d `numpy` array of the row indices of the - regridding weights in the dense weights matrix, - which has J rows and I columns, where J and I are - the total number of cells in the destination and - source grids respectively. The start index is 1. Or - `None` if the indices are to be read from a file. - * col: The 1-d `numpy` array of column indices of the - regridding weights in the dense weights matrix, + weights. Or a CSR sparse array of the regridding + weights. Or `None` if the weights are to be read + from, or written to, a file. + + * row: The 1-d `numpy` array of the destination/row + indices of the weights in the dense weights matrix, which has J rows and I columns, where J and I are the total number of cells in the destination and - source grids respectively. The start index is 1. Or - `None` if the indices are to be read from a file. + source grids respectively. Or `None` if *weights* + is a CSR sparse array, or the weights are to be + read from a file. + + * col: The 1-d `numpy` array of source/column indices of + the weights in the dense weights matrix, which has + J rows and I columns, where J and I are the total + number of cells in the destination and source grids + respectively. Or `None` if *weights* is a CSR + sparse array, or the weights are to be read from a + file. + * start_index: The non-negative integer start index of the row and column indices. - * from_file: `True` if the weights were read from a file, - otherwise `False`. + + * from_file: `True` if the weights were read from, or + written to, a file. Otherwise `False`. """ + from cfdm import integer_dtype + + debug = is_log_level_debug(logger) + start_index = 1 compute_weights = True @@ -2400,8 +2581,31 @@ def create_esmpy_weights( row = None col = None + if esmpy_regrid_operator is not None: + # If we're returning the esmpy regrid operator, we need to + # create it by computing the weights. + compute_weights = True + from_file = True - if compute_weights or esmpy_regrid_operator is not None: + if compute_weights: + partitioned_dst_grid = dst_grid_partitions > 1 + if debug: + from resource import RUSAGE_SELF, getrusage + from time import time + + maxrss = getrusage(RUSAGE_SELF).ru_maxrss # pragma: no cover + start_time0 = time() # pragma: no cover + logger.debug( + "Calculating weights ...\n\n" + "Number of destination grid partitions: " + f"{dst_grid_partitions} " + f"({requested_dst_grid_partitions!r} requested)\n" + "Free memory before weights creation: " + f"{free_memory() / 2**30} GiB\n" + "Maximum RSS before weights creation: " + f"{maxrss * 1000 / 2**30} GiB\n" + ) # pragma: no cover + # Create the weights using ESMF from_file = False @@ -2421,79 +2625,203 @@ def create_esmpy_weights( elif not dst_mesh_location: dst_meshloc = None + src_esmpy_grids = tuple(src_esmpy_grids) + if len(src_esmpy_grids) != 1: + raise ValueError( + "There must be exactly one source grid partition. " + f"Got {len(src_esmpy_grids)}" + ) + + # Create source esmpy field + src_esmpy_grid = src_esmpy_grids[0] + if debug: + logger.debug(f"Source ESMF {src_esmpy_grid}\n") # pragma: no cover + src_esmpy_field = esmpy.Field( src_esmpy_grid, name="src", meshloc=src_meshloc ) - dst_esmpy_field = esmpy.Field( - dst_esmpy_grid, name="dst", meshloc=dst_meshloc - ) - mask_values = np.array([0], dtype="int32") - - # Create the esmpy.Regrid operator - r = esmpy.Regrid( - src_esmpy_field, - dst_esmpy_field, - regrid_method=esmpy_methods.get(method), - unmapped_action=esmpy.UnmappedAction.IGNORE, - ignore_degenerate=bool(ignore_degenerate), - src_mask_values=mask_values, - dst_mask_values=mask_values, - norm_type=esmpy.api.constants.NormType.FRACAREA, - factors=True, - ) - - weights = r.get_weights_dict(deep_copy=True) - row = weights["row_dst"] - col = weights["col_src"] - weights = weights["weights"] + src_size = src_esmpy_field.data.size + src_rank = src_esmpy_grid.rank + + if partitioned_dst_grid: + from scipy.sparse import csr_array, vstack + + # Initialise the list of the sparse weights arrays for + # each destination grid partition + w = [] + + if debug: + start_time = time() + + # Loop round destination grid partitions + for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): + if debug: + logger.debug( + f"Partition {i}: Time taken to create ESMF " + f"{dst_esmpy_grid.__class__.__name__}: " + f"{time() - start_time} s\n" + f"Partition {i}: Destination ESMF {dst_esmpy_grid}" + ) # pragma: no cover + start_time = time() # pragma: no cover + + # Create destination esmpy field + dst_esmpy_field = esmpy.Field( + dst_esmpy_grid, name="dst", meshloc=dst_meshloc + ) - if quarter: - # The weights were created with a dummy size 2 dimension - # such that the weights for each dummy axis element are - # identical. The duplicate weights need to be removed. - # - # To do this, only retain the indices that correspond to - # the top left quarter of the weights matrix in dense - # form. I.e. if w is the NxM dense form of the weights (N, - # M both even), then this is equivalent to w[:N//2, - # :M//2]. - index = np.where( - (row <= dst_esmpy_field.data.size // 2) - & (col <= src_esmpy_field.data.size // 2) + dst_size = dst_esmpy_field.data.size + dst_rank = dst_esmpy_grid.rank + + mask_values = np.array([0], dtype="int32") + + # Create the esmpy.Regrid operator + r = esmpy.Regrid( + src_esmpy_field, + dst_esmpy_field, + regrid_method=esmpy_methods.get(method), + unmapped_action=esmpy.UnmappedAction.IGNORE, + ignore_degenerate=bool(ignore_degenerate), + src_mask_values=mask_values, + dst_mask_values=mask_values, + norm_type=esmpy.api.constants.NormType.FRACAREA, + factors=True, ) - weights = weights[index] - row = row[index] - col = col[index] + + weights = r.get_weights_dict(deep_copy=True) + row = weights["row_dst"] + col = weights["col_src"] + weights = weights["weights"] + + ESMF_unmapped_action = r.unmapped_action + ESMF_ignore_degenerate = int(r.ignore_degenerate) + + if esmpy_regrid_operator is None: + # Destroy esmpy objects that are no longer needed + dst_esmpy_grid.destroy() + dst_esmpy_field.destroy() + r.dstfield.grid.destroy() + r.dstfield.destroy() + r.destroy() + + if debug: + maxrss = getrusage(RUSAGE_SELF).ru_maxrss + logger.debug( + f"Partition {i}: Time taken by ESMF to create weights: " + f"{time() - start_time} s\n" + f"Partition {i}: Maximum RSS after ESMF weights creation: " + f"creation: {maxrss * 1000 / 2**30} GiB" + ) # pragma: no cover + start_time = time() # pragma: no cover + + if quarter: + # The weights were created with a dummy size 2 + # dimension such that the weights for each dummy axis + # element are identical. The duplicate weights need to + # be removed. + # + # To do this, only retain the indices that correspond + # to the top left quarter of the weights matrix in + # dense form. I.e. if w is the NxM dense form of the + # weights (N, M both even), then this is equivalent to + # w[:N//2, :M//2]. + index = np.where( + (row <= dst_size // 2) & (col <= src_size // 2) + ) + weights = weights[index] + row = row[index] + col = col[index] + del index + + # Cast as 32-bit integers, if possible. + row = row.astype(integer_dtype(dst_size), copy=False) + col = col.astype(integer_dtype(src_size), copy=False) + + if partitioned_dst_grid: + # The destination grid has been partitioned, so create + # the sparse weights array for this partition. + row -= 1 + col -= 1 + weights = csr_array( + (weights, (row, col)), shape=[dst_size, src_size] + ) + del row, col + w.append(weights) + + if debug: + logger.debug( + f"Partition {i}: Time taken to create sparse weights " + f"array: {time() - start_time} s\n" + f"Partition {i}: Sparse weights array: {weights!r}" + ) # pragma: no cover + start_time = time() + + if debug: + logger.debug( + f"Partition {i}: Free memory after weights calculation: " + f"{free_memory() / 2**30} GiB\n" + ) # pragma: no cover + start_time = time() # pragma: no cover + + if esmpy_regrid_operator is None: + # Destroy esmpy objects that are no longer needed + src_esmpy_grid.destroy() + src_esmpy_field.destroy() + r.srcfield.grid.destroy() + r.srcfield.destroy() + + if partitioned_dst_grid: + # The destination grid has been partitioned, so + # concatenate the sparse weights arrays for all + # destination grid partitions. + weights = vstack(w, format="csr") + dst_size = weights.shape[0] + row = None + col = None + del w + if debug: + logger.debug( + f"Time taken to concatenate sparse weights arrays: " + f"{time() - start_time} s\n" + f"Free memory after concatenation of sparse weights " + f"arrays: {free_memory() / 2**30} GiB\n" + f"Sparse weights array for all partitions: {weights!r}\n" + ) # pragma: no + start_time = time() # pragma: no cover + + if debug: + logger.debug( + "Total time taken to calculate all weights: " + f"{time() - start_time0} s\n" + "Free memory after calculation of all weights: " + f"{free_memory() / 2**30} GiB\n" + ) # pragma: no cover if weights_file is not None: # Write the weights to a netCDF file (copying the # dimension and variable names and structure of a weights # file created by ESMF). + # + # Be careful with memory by using `netCDF4.Dataset.sync` + # and `del`, because the weights may be large, and keeping + # copies of them in memory may not be possible. from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset from .. import __version__ - if ( - max(dst_esmpy_field.data.size, src_esmpy_field.data.size) - <= np.iinfo("int32").max - ): - i_dtype = "i4" - else: - i_dtype = "i8" - - upper_bounds = src_esmpy_grid.upper_bounds - if len(upper_bounds) > 1: - upper_bounds = upper_bounds[0] - - src_shape = tuple(upper_bounds) - - upper_bounds = dst_esmpy_grid.upper_bounds - if len(upper_bounds) > 1: - upper_bounds = upper_bounds[0] + from_file = True - dst_shape = tuple(upper_bounds) + if partitioned_dst_grid: + # 'weights' is a CSR sparse array, so we have to get + # from it 'row', 'col', and weights as a numpy array. + row, col = weights.tocoo(copy=False).coords + weights = weights.data + if start_index: + # 'row' and 'col' come out of `tocoo().coords` as + # zero-based values + row += start_index + col += start_index regrid_method = f"{src_grid.coord_sys} {src_grid.method}" if src_grid.ln_z: @@ -2504,62 +2832,84 @@ def create_esmpy_weights( nc.title = ( f"Regridding weights from source {src_grid.type} " - f"with shape {src_shape} to destination " - f"{dst_grid.type} with shape {dst_shape}" + f"with shape {src_grid.esmpy_shape} to destination " + f"{dst_grid.type} with shape {dst_grid.esmpy_shape}" ) + nc.comment = "See https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc for information on the dataset structure" nc.source = f"cf v{__version__}, esmpy v{esmpy.__version__}" nc.history = f"Created at {datetime.now()}" - nc.regrid_method = regrid_method - nc.ESMF_unmapped_action = r.unmapped_action - nc.ESMF_ignore_degenerate = int(r.ignore_degenerate) - + nc.map_method = regrid_method + nc.domain_a = src_grid.type + nc.domain_b = dst_grid.type + nc.ESMF_unmapped_action = ESMF_unmapped_action + nc.ESMF_ignore_degenerate = ESMF_ignore_degenerate + + # The number of source cells + nc.createDimension("n_a", src_size) + # The number of destination cells + nc.createDimension("n_b", dst_size) + # The number of entries in the regridding matrix nc.createDimension("n_s", weights.size) - nc.createDimension("src_grid_rank", src_esmpy_grid.rank) - nc.createDimension("dst_grid_rank", dst_esmpy_grid.rank) + nc.createDimension("src_grid_rank", src_rank) + nc.createDimension("dst_grid_rank", dst_rank) v = nc.createVariable( - "src_grid_dims", i_dtype, ("src_grid_rank",) + "src_grid_dims", col.dtype, ("src_grid_rank",) ) v.long_name = "Source grid shape" - v[...] = src_shape + v[...] = src_grid.esmpy_shape v = nc.createVariable( - "dst_grid_dims", i_dtype, ("dst_grid_rank",) + "dst_grid_dims", row.dtype, ("dst_grid_rank",) ) v.long_name = "Destination grid shape" - v[...] = dst_shape + v[...] = dst_grid.esmpy_shape - v = nc.createVariable("S", weights.dtype, ("n_s",)) - v.long_name = "Weights values" + v = nc.createVariable("S", weights.dtype, ("n_s",), zlib=True) + v.long_name = ( + "The weight for each entry in the regridding matrix" + ) v[...] = weights + nc.sync() + del weights - v = nc.createVariable("row", i_dtype, ("n_s",), zlib=True) - v.long_name = "Destination/row indices" + v = nc.createVariable("row", row.dtype, ("n_s",), zlib=True) + v.long_name = ( + "The position in the destination grid for each entry " + "in the weight matrix" + ) v.start_index = start_index v[...] = row + nc.sync() + del row - v = nc.createVariable("col", i_dtype, ("n_s",), zlib=True) - v.long_name = "Source/col indices" + v = nc.createVariable("col", col.dtype, ("n_s",), zlib=True) + v.long_name = ( + "The position in the source grid for each entry " + "in the regridding matrix" + ) v.start_index = start_index v[...] = col + nc.sync() + del col nc.close() - if esmpy_regrid_operator is None: - # Destroy esmpy objects (the esmpy.Grid objects exist even if - # we didn't create any weights using esmpy.Regrid). - src_esmpy_grid.destroy() - dst_esmpy_grid.destroy() - if compute_weights: - src_esmpy_field.destroy() - dst_esmpy_field.destroy() - r.srcfield.grid.destroy() - r.srcfield.destroy() - r.dstfield.grid.destroy() - r.dstfield.destroy() - r.destroy() - else: - # Make the Regrid instance available via the + if debug: + logger.debug( + f"Time taken to create weights file {weights_file}: " + f"{time() - start_time} s\n" + f"Free memory after creation of weights file: " + f"{free_memory() / 2**30} GiB\n" + ) # pragma: no cover + start_time = time() # pragma: no cover + + weights = None + row = None + col = None + + if esmpy_regrid_operator is not None: + # Make the `esmpy.Regrid` instance available via the # 'esmpy_regrid_operator' list esmpy_regrid_operator.append(r) @@ -3132,3 +3482,92 @@ def set_grid_type(grid): grid.type = f"UGRID {grid.mesh_location} mesh" elif grid.is_locstream: grid.type = f"DSG {grid.featureType}" + + +def partitions(grid, grid_partitions, return_n=False): + """Partitions of the grid. + + Each partition is defined as an index to cell coordinates. Only + the last (left-most) dimension in esmpy order is partitioned. Note + that the coordinates stored in `grid.coords` are in esmpy order. + + Only a destinaton grid without a dummy size 2 dimension can be + partitioned. + + .. versionadded:: NEXTVERSION + + .. seealso:: `create_esmpy_grid`, `create_esmpy_mesh`, + `create_esmpy_locstream`, `create_esmpy_weights` + + :Parameters: + + grid: `Grid` + The definition of the source or destination grid. + + grid_partitions: `int` or `str` + The number of partitions to split the grid into. Only the + last (i.e. slowest moving) dimension in `esmpy` order is + partitioned. If ``'maximum'`` then the maximum possible + number of partitions are defined. + + return_n: `bool` + If True then return the number of partitions. + + :Returns: + + generator or `tuple` or `int` + The partition specifications. Each partition specification + is a tuple of `slice` objects. Each of these tuples + contains a slice for each axis of the coordinate + constructs, i.e. for N-d coordinates, each tuple has N + elements. + + When there is a single partition that spans the entire + grid, then the special value of ``(None,)`` is + returned. + + If *return_n* is True then the integer number of + partitions will be returned, instead. + + """ + if ( + grid_partitions == 1 + or grid.name == "source" + or grid.dummy_size_1_dimension + or grid.dummy_size_2_dimension + ): + # One partition + if return_n: + return 1 + + return (None,) + + from math import ceil + + from cfdm.data.utils import chunk_indices + from dask.array.core import normalize_chunks + + shape = grid.coords[-1].shape + + if grid_partitions == "maximum": + # Maximum possible number of partitions + if return_n: + return shape[-1] + + size = 1 + else: + if grid_partitions <= 1: + # One partition + if return_n: + return 1 + + return (None,) + + # Set the partition size to somewhere in the range [1, maximum + # number of partitions] + size = ceil(shape[-1] / grid_partitions) + + if return_n: + return len(normalize_chunks((size,), shape=shape[-1:])[0]) + + return chunk_indices(normalize_chunks(shape[:-1] + (size,), shape=shape)) diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index 7621addc7e..fe6387acb9 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -5,6 +5,8 @@ from ..decorators import _display_or_return from ..functions import _DEPRECATION_ERROR_ATTRIBUTE, _DEPRECATION_ERROR_METHOD +from ..functions import atol as cf_atol +from ..functions import rtol as cf_rtol from ..mixin_container import Container as mixin_Container @@ -50,6 +52,7 @@ def __init__( src_z=None, dst_z=None, ln_z=False, + _dst_mask_adjusted=False, ): """**Initialisation** @@ -192,6 +195,8 @@ def __init__( .. versionadded:: 3.16.2 """ + from scipy.sparse import issparse + super().__init__() if weights is None and weights_file is None: @@ -224,6 +229,13 @@ def __init__( self._set_component("src_z", src_z, copy=False) self._set_component("dst_z", dst_z, copy=False) self._set_component("ln_z", bool(ln_z), copy=False) + self._set_component( + "_dst_mask_adjusted", bool(_dst_mask_adjusted), copy=False + ) + + if issparse(weights): + # Make sure that the destination mask has been adjusted + self.tosparse() def __repr__(self): """x.__repr__() <==> repr(x)""" @@ -231,6 +243,20 @@ def __repr__(self): f"" ) + @property + def _dst_mask_adjusted(self): + """Whether or not the destination grid mask has been adjusted. + + Whether or not the destination grid mask has been set to True + where the weights for destination grid points are all zero. + + See `tosparse` for details. + + .. versionadded:: NEXTVERSION + + """ + return self._get_component("_dst_mask_adjusted") + @property def col(self): """The 1-d array of the column indices of the regridding @@ -515,6 +541,10 @@ def copy(self): The deep copy. """ + weights = self.weights + if weights is not None: + weights = weights.copy() + row = self.row if row is not None: row = row.copy() @@ -524,7 +554,7 @@ def copy(self): col = col.copy() return type(self)( - self.weights.copy(), + weights, row, col, method=self.method, @@ -549,6 +579,7 @@ def copy(self): src_z=self.src_z, dst_z=self.dst_z, ln_z=self.ln_z, + _dst_mask_adjusted=self._dst_mask_adjusted, ) @_display_or_return @@ -607,6 +638,74 @@ def dump(self, display=True): return "\n".join(string) + def equal_dst_mask(self, other): + """Whether two regrid operators have identical destination masks. + + :Parameters: + + other: `RegridOperator` + The other regrid operator to compare. + + :Returns: + + `bool` + True if the regrid operators have identical destination + masks, otherwise False. + + """ + self.tosparse() + other.tosparse() + + m0 = self.dst_mask + m1 = other.dst_mask + + if m0 is None and m1 is None: + return True + + if m0 is None or m1 is None: + return False + + return np.array_equal(m0, m1) + + def equal_weights(self, other, rtol=None, atol=None): + """Whether two regrid operators have identical weights. + + :Parameters: + + other: `RegridOperator` + The other regrid operator to compare. + + {{rtol: number, optional}} + + {{atol: number, optional}} + + :Returns: + + `bool` + True if the regrid operators have identical weights, + otherwise False. + + """ + if atol is None: + atol = float(cf_atol()) + + if rtol is None: + rtol = float(cf_rtol()) + + self.tosparse() + other.tosparse() + + w0 = self.weights + w1 = other.weights + + return ( + w0.shape == w1.shape + and w0.data.shape == w1.data.shape + and np.array_equal(w0.indices, w1.indices) + and np.array_equal(w0.indptr, w1.indptr) + and np.allclose(w0.data, w1.data, rtol=rtol, atol=atol) + ) + def get_parameter(self, parameter, *default): """Return a regrid operation parameter. @@ -706,75 +805,79 @@ def tosparse(self): `None` """ - weights = self.weights - row = self.row - col = self.col - if weights is not None and row is None and col is None: - # Weights are already in sparse array format - return - from math import prod - from scipy.sparse import csr_array - - start_index = self.start_index - col_start_index = None - row_start_index = None - - if weights is None: - weights_file = self.weights_file - if weights_file is not None: - # Read the weights from the weights file - from cfdm.data.locks import netcdf_lock - from netCDF4 import Dataset - - with netcdf_lock: - nc = Dataset(weights_file, "r") - weights = nc.variables["S"][...] - row = nc.variables["row"][...] - col = nc.variables["col"][...] - - try: - col_start_index = nc.variables["col"].start_index - except AttributeError: - col_start_index = 1 - - try: - row_start_index = nc.variables["row"].start_index - except AttributeError: - row_start_index = 1 - - nc.close() - else: - raise ValueError( - "Conversion to sparse array format requires at least " - "one of the 'weights' or 'weights_file' attributes to " - "be set" - ) + from scipy.sparse import csr_array, issparse - # Convert to sparse array format - if col_start_index: - col = col - col_start_index - elif start_index: - col = col - start_index - - if row_start_index: - row = row - row_start_index - elif start_index: - row = row - start_index - - src_size = prod(self.src_shape) dst_size = prod(self.dst_shape) - weights = csr_array((weights, (row, col)), shape=[dst_size, src_size]) - - self._set_component("weights", weights, copy=False) - self._set_component("row", None, copy=False) - self._set_component("col", None, copy=False) - del row, col + weights = self.weights + if not issparse(weights): + row = self.row + col = self.col + start_index = self.start_index + col_start_index = None + row_start_index = None + + if weights is None: + weights_file = self.weights_file + if weights_file is not None: + # Read the weights from the weights file + from cfdm.data.locks import netcdf_lock + from netCDF4 import Dataset + + with netcdf_lock: + nc = Dataset(weights_file, "r") + weights = nc.variables["S"][...] + row = nc.variables["row"][...] + col = nc.variables["col"][...] + + try: + col_start_index = nc.variables["col"].start_index + except AttributeError: + col_start_index = 1 + + try: + row_start_index = nc.variables["row"].start_index + except AttributeError: + row_start_index = 1 + + nc.close() + else: + raise ValueError( + "Conversion to sparse array format requires at least " + "one of the 'weights' or 'weights_file' attributes to " + "be set" + ) + + # Convert to sparse array format + if col_start_index is not None: + col -= col_start_index + elif start_index: + col = col - start_index + + if row_start_index is not None: + row -= row_start_index + elif start_index: + row = row - start_index + + src_size = prod(self.src_shape) + + weights = csr_array( + (weights, (row, col)), shape=[dst_size, src_size] + ) + del row, col + + self._set_component("weights", weights, copy=False) + self._set_component("row", None, copy=False) + self._set_component("col", None, copy=False) + + if self._dst_mask_adjusted: + # The destination grid mask has already been adjusted + return - # Set the destination grid mask to True where the weights for - # destination grid points are all zero + # Still here? Then set the destination grid mask to True where + # the weights for destination grid points are all zero. dst_mask = self.dst_mask if dst_mask is not None: if dst_mask.dtype != bool or dst_mask.shape != self.dst_shape: @@ -807,3 +910,4 @@ def tosparse(self): dst_mask = dst_mask.reshape(self.dst_shape) self._set_component("dst_mask", dst_mask, copy=False) + self._set_component("_dst_mask_adjusted", True, copy=False) diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index 92a72ff8a8..52fc191a3b 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -1,12 +1,12 @@ import atexit import datetime import faulthandler -from importlib.util import find_spec import itertools import os import re import tempfile import unittest +from importlib.util import find_spec import numpy import numpy as np @@ -1154,7 +1154,8 @@ def test_Field_insert_dimension(self): f.insert_dimension(1, "qwerty") @unittest.skipUnless( - find_spec("matplotlib"), "matplotlib required but not installed") + find_spec("matplotlib"), "matplotlib required but not installed" + ) def test_Field_indices(self): f = cf.read(self.filename)[0] diff --git a/cf/test/test_RegridOperator.py b/cf/test/test_RegridOperator.py index d92bf9b554..28cf79f215 100644 --- a/cf/test/test_RegridOperator.py +++ b/cf/test/test_RegridOperator.py @@ -1,13 +1,12 @@ import datetime import faulthandler -from importlib.util import find_spec import unittest +from importlib.util import find_spec faulthandler.enable() # to debug seg faults and timeouts import cf - # ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow # either for now for backwards compatibility. esmpy_imported = False @@ -56,6 +55,25 @@ def test_RegridOperator_attributes(self): def test_RegridOperator_copy(self): self.assertIsInstance(self.r.copy(), self.r.__class__) + def test_RegridOperator_equal_weights(self): + r0 = self.r + r1 = r0.copy() + self.assertTrue(r0.equal_weights(r1)) + r1.weights.data += 0.1 + self.assertFalse(r0.equal_weights(r1)) + + def test_RegridOperator_equal_dst_mask(self): + r0 = self.r.copy() + r1 = r0.copy() + self.assertTrue(r0.equal_dst_mask(r1)) + mask = [True, False] + r0._set_component("dst_mask", mask) + self.assertFalse(r0.equal_dst_mask(r1)) + r1._set_component("dst_mask", mask) + self.assertTrue(r0.equal_dst_mask(r1)) + r1._set_component("dst_mask", mask[::-1]) + self.assertFalse(r0.equal_dst_mask(r1)) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/test/test_functions.py b/cf/test/test_functions.py index 7ddf71d0e0..1a26a1afdc 100644 --- a/cf/test/test_functions.py +++ b/cf/test/test_functions.py @@ -38,7 +38,6 @@ def test_keyword_deprecation(self): def test_aliases(self): self.assertEqual(cf.log_level(), cf.LOG_LEVEL()) - self.assertEqual(cf.free_memory(), cf.FREE_MEMORY()) self.assertEqual(cf.total_memory(), cf.TOTAL_MEMORY()) self.assertEqual(cf.regrid_logging(), cf.REGRID_LOGGING()) self.assertEqual(cf.relaxed_identities(), cf.RELAXED_IDENTITIES()) diff --git a/cf/test/test_quantization.py b/cf/test/test_quantization.py index 03960da987..19f7fdc20a 100644 --- a/cf/test/test_quantization.py +++ b/cf/test/test_quantization.py @@ -63,7 +63,6 @@ def test_quantization_read_write(self): f.set_quantize_on_write(q0) # Write the field and read it back in - tmpfile1 = "delme1.nc" cf.write(f, tmpfile1) g = cf.read(tmpfile1)[0] diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index 2810c335ed..93d58c1d2e 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -4,7 +4,6 @@ import inspect import os import shutil -import shutil import subprocess import tempfile import unittest @@ -644,7 +643,8 @@ def test_read_write_unlimited(self): self.assertTrue(domain_axes["domainaxis2"].nc_is_unlimited()) @unittest.skipUnless( - shutil.which("ncdump"), "ncdump required - install nco") + shutil.which("ncdump"), "ncdump required - install nco" + ) def test_read_CDL(self): subprocess.run( " ".join(["ncdump", self.filename, ">", tmpfile]), @@ -707,7 +707,8 @@ def test_read_CDL(self): cf.read("test_read_write.py") @unittest.skipUnless( - shutil.which("ncdump"), "ncdump required - install nco") + shutil.which("ncdump"), "ncdump required - install nco" + ) def test_read_cdl_string(self): """Test the cf.read 'cdl_string' keyword.""" f = cf.read("example_field_0.nc")[0] @@ -882,7 +883,8 @@ def test_read_url(self): self.assertEqual(len(f), 1) @unittest.skipUnless( - shutil.which("ncdump"), "ncdump required - install nco") + shutil.which("ncdump"), "ncdump required - install nco" + ) def test_read_dataset_type(self): """Test the cf.read 'dataset_type' keyword.""" # netCDF dataset diff --git a/cf/test/test_regrid.py b/cf/test/test_regrid.py index 2001b4cce6..839915227c 100644 --- a/cf/test/test_regrid.py +++ b/cf/test/test_regrid.py @@ -31,21 +31,12 @@ def _remove_tmpfiles(): atexit.register(_remove_tmpfiles) -# ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow -# either for now for backwards compatibility. -esmpy_imported = False +esmpy_imported = True try: - import esmpy - - esmpy_imported = True + import esmpy # noqa: F401 except ImportError: - try: - # Take the new name to use in preference to the old one. - import ESMF as esmpy + esmpy_imported = False - esmpy_imported = True - except ImportError: - pass all_methods = ( "linear", @@ -316,6 +307,8 @@ def test_Field_regrid_2d_field(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_coords(self): """Spherical regridding with coords destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst.copy() src = self.src.copy() @@ -384,6 +377,8 @@ def test_Field_regrids_coords(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_2d_coords(self): """2-d Cartesian regridding with coords destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst.copy() src = self.src.copy() @@ -412,6 +407,8 @@ def test_Field_regridc_2d_coords(self): def test_Field_regrids_bad_dst(self): """Disallowed destination grid types raise an exception.""" + self.assertFalse(cf.regrid_logging()) + with self.assertRaises(TypeError): self.src.regrids(999, method="conservative") @@ -424,6 +421,8 @@ def test_Field_regrids_bad_dst(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_domain(self): """Spherical regridding with Domain destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src @@ -446,6 +445,8 @@ def test_Field_regrids_domain(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_domain(self): """Spherical regridding with Domain destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src @@ -470,6 +471,8 @@ def test_Field_regridc_domain(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_field_operator(self): """Spherical regridding with operator destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src @@ -506,6 +509,8 @@ def test_Field_regrids_field_operator(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_non_coordinates(self): """Check setting of non-coordinate metadata.""" + self.assertFalse(cf.regrid_logging()) + dst = cf.example_field(1) src = self.src @@ -553,6 +558,8 @@ def test_Field_regrids_non_coordinates(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_3d_field(self): """3-d Cartesian regridding with Field destination grid.""" + self.assertFalse(cf.regrid_logging()) + methods = list(all_methods) methods.remove("conservative_2nd") @@ -646,6 +653,8 @@ def test_Field_regridc_3d_field(self): self.assertTrue(np.allclose(y, a, atol=atol, rtol=rtol)) # These methods aren't meant to work for 3-d regridding + # + # Note: This test leaves behind a PET0.ESMF_LogFile for method in ("conservative_2nd",): with self.assertRaises(ValueError): src.regridc(dst, method=method, axes=axes) @@ -653,6 +662,8 @@ def test_Field_regridc_3d_field(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_1d_field(self): """1-d Cartesian regridding with Field destination grid.""" + self.assertFalse(cf.regrid_logging()) + methods = list(all_methods) methods.remove("conservative_2nd") methods.remove("patch") @@ -742,6 +753,8 @@ def test_Field_regridc_1d_field(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_1d_coordinates_z(self): """1-d Z Cartesian regridding with coordinates destination grid.""" + self.assertFalse(cf.regrid_logging()) + src = cf.read(self.filename_xyz)[0] dst = cf.DimensionCoordinate( data=cf.Data([800, 705, 632, 510, 320.0], "hPa") @@ -753,6 +766,8 @@ def test_Field_regridc_1d_coordinates_z(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrid_chunks(self): """Regridding of chunked axes""" + self.assertFalse(cf.regrid_logging()) + filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "regrid.nc" ) @@ -768,6 +783,8 @@ def test_Field_regrid_chunks(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrid_weights_file(self): """Regridding creation/use of weights file""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src @@ -780,12 +797,14 @@ def test_Field_regrid_weights_file(self): dst, method="linear", return_operator=True, weights_file=tmpfile ) self.assertTrue(os.path.isfile(tmpfile)) - self.assertIsNone(r.weights_file) + self.assertEqual(r.weights_file, tmpfile) + self.assertIsNone(r.weights) r = src.regrids( dst, method="linear", return_operator=True, weights_file=tmpfile ) self.assertEqual(r.weights_file, tmpfile) + self.assertIsNone(r.weights) # Can't provide weights_file when dst is a RegridOperator with self.assertRaises(ValueError): @@ -796,6 +815,8 @@ def test_Field_regrid_weights_file(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_return_esmpy_regrid_operator(self): """esmpy regrid operator returns esmpy.Regrid in regrids and regridc""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src diff --git a/cf/test/test_regrid_featureType.py b/cf/test/test_regrid_featureType.py index 3a1967f3d5..9a674023f2 100644 --- a/cf/test/test_regrid_featureType.py +++ b/cf/test/test_regrid_featureType.py @@ -9,21 +9,12 @@ import cf -# ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow -# either for now for backwards compatibility. -esmpy_imported = False +esmpy_imported = True try: - import esmpy - - esmpy_imported = True + import esmpy # noqa: F401 except ImportError: - try: - # Take the new name to use in preference to the old one. - import ESMF as esmpy + esmpy_imported = False - esmpy_imported = True - except ImportError: - pass methods = ( "linear", @@ -168,7 +159,7 @@ def test_Field_regrid_featureType_to_grid_2d(self): self.assertFalse(cf.regrid_logging()) # Create some nice data - src = self.dst_featureType + src = self.dst_featureType.copy() src.del_construct("cellmethod0") src = src[:12] src[...] = 273 + np.arange(12) diff --git a/cf/test/test_regrid_mesh.py b/cf/test/test_regrid_mesh.py index 39e29f26fe..5931f69273 100644 --- a/cf/test/test_regrid_mesh.py +++ b/cf/test/test_regrid_mesh.py @@ -9,21 +9,12 @@ import cf -# ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow -# either for now for backwards compatibility. -esmpy_imported = False +esmpy_imported = True try: - import esmpy - - esmpy_imported = True + import esmpy # noqa: F401 except ImportError: - try: - # Take the new name to use in preference to the old one. - import ESMF as esmpy + esmpy_imported = False - esmpy_imported = True - except ImportError: - pass all_methods = ( "linear", diff --git a/cf/test/test_regrid_partitions.py b/cf/test/test_regrid_partitions.py new file mode 100644 index 0000000000..11847c4806 --- /dev/null +++ b/cf/test/test_regrid_partitions.py @@ -0,0 +1,331 @@ +import datetime +import faulthandler +import os +import unittest + +faulthandler.enable() # to debug seg faults and timeouts + +import numpy as np + +import cf + +esmpy_imported = True +try: + import esmpy # noqa: F401 +except ImportError: + esmpy_imported = False + + +valid_methods = ( + "linear", + "conservative", + "nearest_stod", + "patch", +) +invalid_methods = ("conservative_2nd", "nearest_dtos") + + +class RegridPartitionsTest(unittest.TestCase): + # Get the test source and destination fields + filename = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "regrid.nc" + ) + src_mesh_file = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ugrid_global_1.nc" + ) + dst_mesh_file = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ugrid_global_2.nc" + ) + filename_xyz = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "regrid_xyz.nc" + ) + dst_featureType_file = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "dsg_trajectory.nc" + ) + + dst_src_grid = cf.read(filename) + dst_grid = dst_src_grid[0] + src_grid = dst_src_grid[1] + src_mesh = cf.read(src_mesh_file)[0] + dst_mesh = cf.read(dst_mesh_file)[0] + dst_featureType = cf.read(dst_featureType_file)[0] + src_grid_xyz = cf.read(filename_xyz)[0] + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrid_partitions_2d_grid_to_grid(self): + self.assertFalse(cf.regrid_logging()) + + src = cf.example_field(0) + + dst = src.copy() + with cf.bounds_combination_mode("XOR"): + x = dst.dimension_coordinate("X") + x += 1 + + # Mask some destination grid points + dst[2:3, 4] = cf.masked + + # Loop round spherical and Cartesian coordinate systems + for coord_sys, regrid_func, kwargs in zip( + ("spherical", "Cartesian"), + ("regrids", "regridc"), + ({}, {"axes": ["Y", "X"]}), + ): + kwargs["return_operator"] = True + # Loop over whether or not to use the destination grid + # masked points + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + kwargs["method"] = method + r0 = getattr(src, regrid_func)(dst, **kwargs) + for n in (2, 3, "maximum"): + r1 = getattr(src, regrid_func)( + dst, dst_grid_partitions=n, **kwargs + ) + self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) + + # ------------------------------------------------------------ + # Destination grid defined by 2-d lats and lons + # ------------------------------------------------------------ + x = dst.coord("X") + y = dst.coord("Y") + x_bounds = x.bounds.array + y_bounds = y.bounds.array + + lat = np.empty((y.size, x.size)) + lat[...] = y.array.reshape(y.size, 1) + lon = np.empty((y.size, x.size)) + lon[...] = x.array + + lon_bounds = np.empty(lon.shape + (4,)) + lon_bounds[..., [0, 3]] = x_bounds[:, 0].reshape(1, x.size, 1) + lon_bounds[..., [1, 2]] = x_bounds[:, 1].reshape(1, x.size, 1) + + lat_bounds = np.empty(lat.shape + (4,)) + lat_bounds[..., [0, 1]] = y_bounds[:, 0].reshape(y.size, 1, 1) + lat_bounds[..., [2, 3]] = y_bounds[:, 1].reshape(y.size, 1, 1) + + lon_2d_coord = cf.AuxiliaryCoordinate( + data=cf.Data(lon, units=x.Units), bounds=cf.Bounds(data=lon_bounds) + ) + lat_2d_coord = cf.AuxiliaryCoordinate( + data=cf.Data(lat, units=y.Units), bounds=cf.Bounds(data=lat_bounds) + ) + dst = [lon_2d_coord, lat_2d_coord] + + kwargs = { + "return_operator": True, + "dst_axes": {"X": 1, "Y": 0}, + "dst_cyclic": True, + } + for method in valid_methods: + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2, 3, "maximum"): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_3d_grid_to_grid(self): + self.assertFalse(cf.regrid_logging()) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrid_partitions_cannot_partition(self): + src = self.src_grid + dst = self.dst_grid + for n in (2, "maximum"): + # Can't partition for particular regrid methods + for method in invalid_methods: + with self.assertRaises(ValueError): + src.regrids(dst, method=method, dst_grid_partitions=n) + + # Can't partition when return_esmpy_regrid_operator=True + with self.assertRaises(ValueError): + src.regrids( + dst, + method="linear", + dst_grid_partitions=n, + return_esmpy_regrid_operator=True, + ) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regridc_partitions_1d_3d_grid_to_grid(self): + src = self.src_grid.copy() + dst = self.dst_grid.copy() + + src.transpose(["X", "Y", "T"], inplace=True) + dst.transpose(["Y", "T", "X"], inplace=True) + + # Mask some destination grid points + dst[2:25, 0, 2:35] = cf.masked + + for axes in (["T", "Y", "X"], ["Y"]): + kwargs = {"axes": axes, "return_operator": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + if method in ("patch",) and len(axes) == 1: + # 'patch' regridding is not available for 1-d + # regridding + continue + + kwargs["method"] = method + r0 = src.regridc(dst, **kwargs) + for n in (2,): + r1 = src.regridc(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_mesh_to_mesh(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_mesh.copy() + src = self.src_mesh.copy() + + # Mask some destination grid points + dst[0, 2:35] = cf.masked + + kwargs = {"return_operator": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_mesh_to_grid(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_grid.copy() + src = self.src_mesh.copy() + + # Mask some destination grid points + dst[0, 30, 2:35] = cf.masked + + kwargs = {"return_operator": True} + for src_masked in (False, True): + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_grid_to_mesh(self): + self.assertFalse(cf.regrid_logging()) + + src = self.src_grid.copy() + dst = self.src_mesh.copy() + + # Mask some destination grid points + dst[100:300] = cf.masked + + kwargs = {"return_operator": True} + for src_masked in (False, True): + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_grid_to_featureType_3d(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_featureType.copy() + src = self.src_grid_xyz.copy() + + # Mask some destination grid points + dst[20:25] = cf.masked + + kwargs = {"return_operator": True, "z": "air_pressure", "ln_z": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + if method == "conservative": + # Can't do conservative regridding to a + # destination DSG featureType + continue + + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_grid_to_featureType_2d(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_featureType.copy() + src = self.src_grid_xyz.copy() + src = src[0, 0, :, :] + + # Mask some destination grid points + dst[20:25] = cf.masked + + kwargs = {"return_operator": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + if method == "conservative": + # Can't do conservative regridding to a + # destination DSG featureType + continue + + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_mesh_to_featureType_2d(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_featureType.copy() + src = self.src_mesh.copy() + + # Mask some destination grid points + dst[20:25] = cf.masked + + kwargs = {"return_operator": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + if method == "conservative": + # Can't do conservative regridding to a + # destination DSG featureType + continue + + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) + + +if __name__ == "__main__": + print("Run date:", datetime.datetime.now()) + cf.environment() + print("") + unittest.main(verbosity=2) diff --git a/cf/test/test_style.py b/cf/test/test_style.py index 907cfd892b..e6ea9ed894 100644 --- a/cf/test/test_style.py +++ b/cf/test/test_style.py @@ -1,8 +1,8 @@ import datetime import faulthandler -from importlib.util import find_spec import os import unittest +from importlib.util import find_spec faulthandler.enable() # to debug seg faults and timeouts @@ -31,7 +31,8 @@ def setUp(self): ] @unittest.skipUnless( - find_spec("pycodestyle"), "pycodestyle required but not installed") + find_spec("pycodestyle"), "pycodestyle required but not installed" + ) def test_pep8_compliance(self): import pycodestyle diff --git a/setup.py b/setup.py index 66064ede44..b77a69f8bd 100755 --- a/setup.py +++ b/setup.py @@ -156,13 +156,9 @@ def compile(): ============= Powerful, flexible, and very simple to produce visualizations of field -constructs are available with the -`cf-plot `_ package, that -needs to be installed seprately to the ``cf`` package. - -See the `cfplot gallery -`_ for the full range -of plotting possibilities with example code. +constructs are available with the `cf-plot +`_ package, that needs to be +installed separately to the ``cf`` package. Functionality ============= @@ -219,7 +215,7 @@ def compile(): * regrid structured grid, mesh and DSG field constructs with (multi-)linear, nearest neighbour, first- and second-order conservative and higher order patch recovery methods, including 3-d - regridding, + regridding, and large-grid support, * apply convolution filters to field constructs,