From f3b155ce97a80ecf6baa43df50c5d80dbd633e80 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Sun, 13 Jul 2025 14:27:56 +0100 Subject: [PATCH 01/54] dev --- cf/regrid/regrid.py | 173 ++++++++++++++++++++++++++------------------ 1 file changed, 101 insertions(+), 72 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index e3d4ad79a9..e444644697 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -655,6 +655,7 @@ def regrid( dst=dst, weights_file=weights_file if from_file else None, src_mesh_location=src_grid.mesh_location, + dst_mesh_location=dst_grid.mesh_location, src_featureType=src_grid.featureType, dst_featureType=dst_grid.featureType, src_z=src_grid.z, @@ -2142,9 +2143,10 @@ def create_esmpy_mesh(grid, mask=None): The `esmpy.Mesh` derived from *grid*. """ + destination = grid.name == 'destination' if grid.mesh_location != "face": raise ValueError( - f"Can't regrid {'from' if grid.name == 'source' else 'to'} " + f"Can't regrid {'to' if destination else 'from'} " f"a {grid.name} grid of UGRID {grid.mesh_location!r} cells" ) @@ -2153,80 +2155,107 @@ def create_esmpy_mesh(grid, mask=None): else: # Cartesian coord_sys = esmpy.CoordSys.CART - - # Create an empty esmpy.Mesh - esmpy_mesh = esmpy.Mesh( - parametric_dim=2, spatial_dim=2, coord_sys=coord_sys - ) - - element_conn = grid.domain_topology.normalise().array - element_count = element_conn.shape[0] - element_types = np.ma.count(element_conn, axis=1) - element_conn = np.ma.compressed(element_conn) - - # Element coordinates - if grid.coords: - try: - element_coords = [c.array for c in grid.coords] - except AttributeError: - # The coordinate constructs have no data - element_coords = None - else: - element_coords = np.stack(element_coords, axis=-1) + + if destination and weights_chunks > 1: + chunks = chunk_indices((normalize_chunks(size // weights_chunks, (size,)),)) else: - element_coords = None - - node_ids, index = np.unique(element_conn, return_index=True) - node_coords = [b.data.compressed().array[index] for b in grid.bounds] - node_coords = np.stack(node_coords, axis=-1) - node_count = node_ids.size - node_owners = np.zeros(node_count) - - # Make sure that node IDs are >= 1, as needed by newer versions of - # esmpy. - min_id = node_ids.min() - if min_id < 1: - node_ids = node_ids + min_id + 1 - - # Add nodes. This must be done before `add_elements`. - esmpy_mesh.add_nodes( - node_count=node_count, - node_ids=node_ids, - node_coords=node_coords, - node_owners=node_owners, - ) - - # Mask - if mask is not None: - if mask.dtype != bool: - raise ValueError( - "'mask' must be None or a Boolean array. " - f"Got: dtype={mask.dtype}" - ) + chunks = (None,) + + meshes = [] + for chunk in chunks: + + # Create an empty esmpy.Mesh + esmpy_mesh = esmpy.Mesh( + parametric_dim=2, spatial_dim=2, coord_sys=coord_sys + ) - # Note: 'mask' has True/False for masked/unmasked elements, - # but the esmpy mask requires 0/1 for masked/unmasked - # elements. - mask = np.invert(mask).astype("int32") - if mask.all(): - # There are no masked elements - mask = None + domain_topology = grid.domain_topology + if chunk is not None: + domain_topology = domain_topology[chunk] + + # element_conn = grid.domain_topology.normalise().array + element_conn = domain_topology.normalise().array + element_count = element_conn.shape[0] + element_types = np.ma.count(element_conn, axis=1) + element_conn = np.ma.compressed(element_conn) + + # Element coordinates + if grid.coords: + grid_coords = grid.coords + if chunk is not None: + grid_coords = [c[chunk] for b in grid.coords] + + try: + element_coords = [c.array for c in grid_coords] + except AttributeError: + # The coordinate constructs have no data + element_coords = None + else: + element_coords = np.stack(element_coords, axis=-1) + else: + element_coords = None + + grid_bounds = grid.bounds + if chunk is not None: + grid_bounds = [b[chunk] for b in grid.bounds] + + node_ids, index = np.unique(element_conn, return_index=True) + node_coords = [ + b.data.compressed().array[index] for b in grid_bounds + ] + node_coords = np.stack(node_coords, axis=-1) + node_count = node_ids.size + node_owners = np.zeros(node_count) + + # Make sure that node IDs are >= 1, as needed by newer versions of + # esmpy. + min_id = node_ids.min() + if min_id < 1: + node_ids = node_ids + min_id + 1 + + # Add nodes. This must be done before `add_elements`. + esmpy_mesh.add_nodes( + node_count=node_count, + node_ids=node_ids, + node_coords=node_coords, + node_owners=node_owners, + ) + + # Mask + if mask is not None: + if mask.dtype != bool: + raise ValueError( + "'mask' must be None or a Boolean array. " + f"Got: dtype={mask.dtype}" + ) + + # Note: 'mask' has True/False for masked/unmasked elements, + # but the esmpy mask requires 0/1 for masked/unmasked + # elements. + mask = np.invert(mask).astype("int32") + if mask.all(): + # There are no masked elements + mask = None + + # Add elements. This must be done after `add_nodes`. + # + # Note: The element_ids should start at 1, since when writing the + # weights to a file, these ids are used for the column + # indices. + esmpy_mesh.add_elements( + element_count=element_count, + element_ids=np.arange(1, element_count + 1), + element_types=element_types, + element_conn=element_conn, + element_mask=mask, + element_area=None, + element_coords=element_coords, + ) - # Add elements. This must be done after `add_nodes`. - # - # Note: The element_ids should start at 1, since when writing the - # weights to a file, these ids are used for the column - # indices. - esmpy_mesh.add_elements( - element_count=element_count, - element_ids=np.arange(1, element_count + 1), - element_types=element_types, - element_conn=element_conn, - element_mask=mask, - element_area=None, - element_coords=element_coords, - ) - return esmpy_mesh + meshes.append(mesh) + +# return esmpy_mesh + return meshes def create_esmpy_locstream(grid, mask=None): From bae834a3a0d4139e8f51ecb7fcc8cb7eaf43e66c Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 14 Jul 2025 18:01:22 +0100 Subject: [PATCH 02/54] dev --- cf/field.py | 4 + cf/regrid/regrid.py | 873 ++++++++++++++++++++++-------------- cf/regrid/regridoperator.py | 149 +++--- 3 files changed, 614 insertions(+), 412 deletions(-) diff --git a/cf/field.py b/cf/field.py index c8b2bc9a48..f02d1b40fc 100644 --- a/cf/field.py +++ b/cf/field.py @@ -12970,6 +12970,7 @@ def regrids( ln_z=None, verbose=None, return_esmpy_regrid_operator=False, + weights_partitions=1, inplace=False, i=False, _compute_field_mass=None, @@ -13307,6 +13308,7 @@ def regrids( z=z, ln_z=ln_z, return_esmpy_regrid_operator=return_esmpy_regrid_operator, + weights_partitions=weights_partitions, inplace=inplace, ) @@ -13331,6 +13333,7 @@ def regridc( z=None, ln_z=None, return_esmpy_regrid_operator=False, + weights_partitions=1, inplace=False, i=False, _compute_field_mass=None, @@ -13602,6 +13605,7 @@ def regridc( z=z, ln_z=ln_z, return_esmpy_regrid_operator=return_esmpy_regrid_operator, + weights_partitions=weights_partitions, inplace=inplace, ) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index e444644697..22352680e7 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -79,6 +79,9 @@ class Grid: # The shape of the regridding axes, in the same order as the # 'axis_keys' attribute. E.g. (96, 73) or (1243,) shape: tuple = None + # The shape of the regridding axes in `esmpy` order. E.g. (73, 96) + # or (1243,) + esmpy_shape: tuple = None # The regrid axis coordinates, in the order expected by # `esmpy`. If the coordinates are 2-d (or more) then the axis # order of each coordinate object must be as expected by `esmpy`. @@ -154,6 +157,7 @@ def regrid( min_weight=None, weights_file=None, return_esmpy_regrid_operator=False, + weights_partitions=1, inplace=False, ): """Regrid a field to a new spherical or Cartesian grid. @@ -323,6 +327,20 @@ def regrid( .. versionadded:: 3.16.2 + weights_partitions: `int`, optional + + If set to an integer greater than 1, then weights will be + calculated separately for this amount of independent + non-overlapping parittions of the destination grid, before + being combined to into the final weights array. This makes + no difference to the result, but will reduce the memory + needed to calulate the weights by a factor approximately + equal to number of partitions, which is should be enough + to allow weights for even very large grids to be calcuated + on machines with modest amounts of RAM. + + .. versionadded:: NEXTVERSION + :Returns: `Field` or `None` or `RegridOperator` or `esmpy.Regrid` @@ -566,7 +584,19 @@ def regrid( dst_mask = None # Create the destination esmpy.Grid - dst_esmpy_grid = create_esmpy_grid(dst_grid, grid_dst_mask) + if dst_grid.is_mesh: + dst_esmpy_grids = create_esmpy_mesh( + dst_grid, grid_dst_mask, weights_partitions + ) + elif dst_grid.is_locstream: + dst_esmpy_grids = create_esmpy_locstream( + dst_grid, grid_dst_mask, weights_partitions + ) + else: + dst_esmpy_grids = create_esmpy_grid( + dst_grid, grid_dst_mask, weights_partitions + ) + del grid_dst_mask # Create a mask for the source grid @@ -591,27 +621,35 @@ def regrid( grid_src_mask = src_mask # Create the source esmpy.Grid - src_esmpy_grid = create_esmpy_grid(src_grid, grid_src_mask) - del grid_src_mask + if src_grid.is_mesh: + src_esmpy_grids = create_esmpy_mesh( + src_grid, grid_src_mask, weights_partitions + ) + elif src_grid.is_locstream: + src_esmpy_grids = create_esmpy_locstream( + src_grid, grid_src_mask, weights_partitions + ) + else: + src_esmpy_grids = create_esmpy_grid( + src_grid, grid_src_mask, weights_partitions + ) - if is_log_level_debug(logger): - logger.debug( - f"Source ESMF Grid:\n{src_esmpy_grid}\n\nDestination ESMF Grid:\n{dst_esmpy_grid}\n" - ) # pragma: no cover + del grid_src_mask esmpy_regrid_operator = [] if return_esmpy_regrid_operator else None # Create regrid weights weights, row, col, start_index, from_file = create_esmpy_weights( method, - src_esmpy_grid, - dst_esmpy_grid, + src_esmpy_grids, + dst_esmpy_grids, src_grid=src_grid, dst_grid=dst_grid, ignore_degenerate=ignore_degenerate, quarter=src_grid.dummy_size_2_dimension, esmpy_regrid_operator=esmpy_regrid_operator, weights_file=weights_file, + weights_partitions=weights_partitions, ) if return_esmpy_regrid_operator: @@ -681,6 +719,14 @@ def regrid( regrid_operator.tosparse() return regrid_operator + from scipy.sparse import issparse + + if issparse(regrid_operator.weights) and is_log_level_debug(logger): + logger.debug( + f"Sparse weights: {regrid_operator.weights!r}\n" + f" {regrid_operator.weights.__dict__}" + ) # pragma: no cover + # ---------------------------------------------------------------- # Still here? Then do the regridding # ---------------------------------------------------------------- @@ -1415,6 +1461,7 @@ def spherical_grid( n_regrid_axes=n_regrid_axes, dimensionality=regridding_dimensionality, shape=shape, + esmpy_shape=shape[::-1], coords=coords, bounds=get_bounds(method, coords, mesh_location), cyclic=cyclic, @@ -1632,6 +1679,8 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): is_locstream = bool(featureType) is_grid = not is_mesh and not is_locstream + shape = tuple(axis_sizes) + grid = Grid( name=name, coord_sys="Cartesian", @@ -1641,7 +1690,8 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): axes=axis_keys, n_regrid_axes=n_regrid_axes, dimensionality=regridding_dimensionality, - shape=tuple(axis_sizes), + shape=shape, + esmpy_shape=shape, coords=coords, bounds=bounds, cyclic=cyclic, @@ -1869,8 +1919,8 @@ def esmpy_initialise(): return esmpy.Manager(debug=bool(regrid_logging())) -def create_esmpy_grid(grid, mask=None): - """Create an `esmpy.Grid` or `esmpy.Mesh`. +def create_esmpy_grid(grid, mask=None, weights_partitions=1): + """Create an `esmpy.Grid`. .. versionadded:: 3.14.0 @@ -1887,21 +1937,11 @@ def create_esmpy_grid(grid, mask=None): :Returns: - `esmpy.Grid` or `esmpy.Mesh` - The `esmpy.Grid` or `esmpy.Mesh` derived from *grid*. + `esmpy.Grid` + The `esmpy.Grid` derived from *grid*. """ - if grid.is_mesh: - # Create an `esmpy.Mesh` - return create_esmpy_mesh(grid, mask) - - if grid.is_locstream: - # Create an `esmpy.LocStream` - return create_esmpy_locstream(grid, mask) - # Create an `esmpy.Grid` - coords = grid.coords - bounds = grid.bounds cyclic = grid.cyclic num_peri_dims = 0 @@ -1918,209 +1958,236 @@ def create_esmpy_grid(grid, mask=None): spherical = False coord_sys = esmpy.CoordSys.CART - # Parse coordinates for the esmpy.Grid, and get its shape. - n_axes = len(coords) - coords = [np.asanyarray(c) for c in coords] - shape = [None] * n_axes - for dim, c in enumerate(coords[:]): - ndim = c.ndim - if ndim == 1: - # 1-d - shape[dim] = c.size - c = c.reshape([c.size if i == dim else 1 for i in range(n_axes)]) - elif ndim == 2: - # 2-d lat or lon - shape[:ndim] = c.shape - if n_axes == 3: - c = c.reshape(c.shape + (1,)) - elif ndim == 3: - # 3-d Z - shape[:ndim] = c.shape - else: - raise ValueError( - f"Can't create an esmpy.Grid from coordinates with {ndim} " - f"dimensions: {c!r}" - ) - - coords[dim] = c - - # Parse bounds for the esmpy.Grid - if bounds: - bounds = [np.asanyarray(b) for b in bounds] + for partition in partitions(grid, weights_partitions): + coords = grid.coords[:] + bounds = grid.bounds[:] - if spherical: - bounds[lat] = np.clip(bounds[lat], -90, 90) - if not contiguous_bounds(bounds[lat]): - raise ValueError( - f"The {grid.name} latitude coordinates must have " - f"contiguous, non-overlapping bounds for {grid.method} " - "regridding." + if partition is not None: + ndim = coords[-1].ndim + if ndim == 1: + coords[-1] = coords[-1][partition] + if bounds: + bounds[-1] = bounds[-1][partition] + else: + coords = [c[partition] for c in coords] + if bounds: + bounds = [b[partition] for b in bounds] + + # Parse coordinates for the esmpy.Grid, and get its shape. + coords = [np.asanyarray(c) for c in coords] + n_axes = len(coords) + shape = [None] * n_axes + for dim, c in enumerate(coords[:]): + ndim = c.ndim + if ndim == 1: + # 1-d + shape[dim] = c.size + c = c.reshape( + [c.size if i == dim else 1 for i in range(n_axes)] ) - - if not contiguous_bounds(bounds[lon], cyclic=cyclic, period=360): + elif ndim == 2: + # 2-d lat or lon + shape[:ndim] = c.shape + if n_axes == 3: + c = c.reshape(c.shape + (1,)) + elif ndim == 3: + # 3-d Z + shape[:ndim] = c.shape + else: raise ValueError( - f"The {grid.name} longitude coordinates must have " - f"contiguous, non-overlapping bounds for {grid.method} " - "regridding." + "Can't create an esmpy.Grid from coordinates with " + f"{ndim} dimensions: {c!r}" ) - else: - # Cartesian - for b in bounds: - if not contiguous_bounds(b): + + coords[dim] = c + + # Parse bounds for the esmpy.Grid + if bounds: + bounds = [np.asanyarray(b) for b in bounds] + + if spherical: + bounds[lat] = np.clip(bounds[lat], -90, 90) + if not contiguous_bounds(bounds[lat]): raise ValueError( - f"The {grid.name} coordinates must have contiguous, " - "non-overlapping bounds for " + f"The {grid.name} latitude coordinates must have " + "contiguous, non-overlapping bounds for " f"{grid.method} regridding." ) - # Convert each bounds to a grid with no repeated values - for dim, b in enumerate(bounds[:]): - ndim = b.ndim - if ndim == 2: - # Bounds for 1-d coordinates. - # - # E.g. if the esmpy.Grid is (X, Y) then for non-cyclic - # bounds we create a new bounds array with - # shape (97, 1); and for non-cyclic bounds we - # create a new bounds array with shape (1, - # 74). When multiplied, these arrays would create - # the 2-d (97, 74) bounds grid expected by - # esmpy.Grid. - # - # Note that if the X axis were cyclic, then its - # new bounds array would have shape (96, 1). - if spherical and cyclic and dim == lon: - tmp = b[:, 0] - else: - n = b.shape[0] - tmp = np.empty((n + 1,), dtype=b.dtype) - tmp[:n] = b[:, 0] - tmp[n] = b[-1, 1] - - tmp = tmp.reshape( - [tmp.size if i == dim else 1 for i in range(n_axes)] - ) - elif ndim == 3: - # Bounds for 2-d coordinates - # - # E.g. if the esmpy.Grid is (X, Y) then for bounds with - # a non-cyclic X axis, we create a new bounds - # array with shape (97, 74). - # - # Note that if the X axis were cyclic, then the - # new bounds array would have shape (96, 74). - n, m = b.shape[:2] - if spherical and cyclic: - tmp = np.empty((n, m + 1), dtype=b.dtype) - tmp[:, :m] = b[:, :, 0] - if dim == lon: - tmp[:, m] = b[:, -1, 0] + if not contiguous_bounds( + bounds[lon], cyclic=cyclic, period=360 + ): + raise ValueError( + f"The {grid.name} longitude coordinates must have " + "contiguous, non-overlapping bounds for " + f"{grid.method} regridding." + ) + else: + # Cartesian + for b in bounds: + if not contiguous_bounds(b): + raise ValueError( + f"The {grid.name} coordinates must have " + "contiguous, non-overlapping bounds for " + f"{grid.method} regridding." + ) + + # Convert each bounds to a grid with no repeated values + for dim, b in enumerate(bounds[:]): + ndim = b.ndim + if ndim == 2: + # Bounds for 1-d coordinates. + # + # E.g. if the esmpy.Grid is (X, Y) then for + # non-cyclic bounds we create a new bounds + # array with shape (97, 1); and for + # non-cyclic bounds we create a new bounds + # array with shape (1, 74). When multiplied, + # these arrays would create the 2-d (97, 74) + # bounds grid expected by esmpy.Grid. + # + # Note that if the X axis were cyclic, then its + # new bounds array would have shape (96, 1). + if spherical and cyclic and dim == lon: + tmp = b[:, 0] else: - tmp[:, m] = b[:, -1, 1] - else: - tmp = np.empty((n + 1, m + 1), dtype=b.dtype) - tmp[:n, :m] = b[:, :, 0] - tmp[:n, m] = b[:, -1, 1] - tmp[n, :m] = b[-1, :, 3] - tmp[n, m] = b[-1, -1, 2] + n = b.shape[0] + tmp = np.empty((n + 1,), dtype=b.dtype) + tmp[:n] = b[:, 0] + tmp[n] = b[-1, 1] - if n_axes == 3: - tmp = tmp.reshape(tmp.shape + (1,)) + tmp = tmp.reshape( + [tmp.size if i == dim else 1 for i in range(n_axes)] + ) + elif ndim == 3: + # Bounds for 2-d coordinates + # + # E.g. if the esmpy.Grid is (X, Y) then for bounds + # with a non-cyclic X axis, we + # create a new bounds array with shape (97, + # 74). + # + # Note that if the X axis were cyclic, then + # the new bounds array would have shape (96, + # 74). + n, m = b.shape[:2] + if spherical and cyclic: + tmp = np.empty((n, m + 1), dtype=b.dtype) + tmp[:, :m] = b[:, :, 0] + if dim == lon: + tmp[:, m] = b[:, -1, 0] + else: + tmp[:, m] = b[:, -1, 1] + else: + tmp = np.empty((n + 1, m + 1), dtype=b.dtype) + tmp[:n, :m] = b[:, :, 0] + tmp[:n, m] = b[:, -1, 1] + tmp[n, :m] = b[-1, :, 3] + tmp[n, m] = b[-1, -1, 2] - elif ndim == 4: - # Bounds for 3-d coordinates - raise ValueError( - f"Can't do {grid.method} 3-d {grid.coord_sys} regridding " - f"with {grid.coord_sys} 3-d coordinates " - f"{coords[z].identity!r}." - ) + if n_axes == 3: + tmp = tmp.reshape(tmp.shape + (1,)) - bounds[dim] = tmp - - # Define the esmpy.Grid stagger locations. For details see - # - # 2-d: - # https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node5.html#fig:gridstaggerloc2d - # - # 3-d: - # https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node5.html#fig:gridstaggerloc3d - if bounds: - if n_axes == 3: - staggerlocs = [ - esmpy.StaggerLoc.CENTER_VCENTER, - esmpy.StaggerLoc.CORNER_VFACE, - ] - else: - staggerlocs = [esmpy.StaggerLoc.CORNER, esmpy.StaggerLoc.CENTER] - else: - if n_axes == 3: - staggerlocs = [esmpy.StaggerLoc.CENTER_VCENTER] - else: - staggerlocs = [esmpy.StaggerLoc.CENTER] - - # Create an empty esmpy.Grid - esmpy_grid = esmpy.Grid( - max_index=np.array(shape, dtype="int32"), - coord_sys=coord_sys, - num_peri_dims=num_peri_dims, - periodic_dim=periodic_dim, - staggerloc=staggerlocs, - ) + elif ndim == 4: + # Bounds for 3-d coordinates + raise ValueError( + f"Can't do {grid.method} 3-d {grid.coord_sys} " + f"regridding with {grid.coord_sys} 3-d coordinates " + f"{coords[z].identity!r}." + ) - # Populate the esmpy.Grid centres - for dim, c in enumerate(coords): - if n_axes == 3: - grid_centre = esmpy_grid.get_coords( - dim, staggerloc=esmpy.StaggerLoc.CENTER_VCENTER - ) + bounds[dim] = tmp + + # Define the esmpy.Grid stagger locations. For details see + # + # 2-d: + # https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node5.html#fig:gridstaggerloc2d + # + # 3-d: + # https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc/node5.html#fig:gridstaggerloc3d + if bounds: + if n_axes == 3: + staggerlocs = [ + esmpy.StaggerLoc.CENTER_VCENTER, + esmpy.StaggerLoc.CORNER_VFACE, + ] + else: + staggerlocs = [ + esmpy.StaggerLoc.CORNER, + esmpy.StaggerLoc.CENTER, + ] else: - grid_centre = esmpy_grid.get_coords( - dim, staggerloc=esmpy.StaggerLoc.CENTER - ) + if n_axes == 3: + staggerlocs = [esmpy.StaggerLoc.CENTER_VCENTER] + else: + staggerlocs = [esmpy.StaggerLoc.CENTER] - grid_centre[...] = c + # Create an empty esmpy.Grid + esmpy_grid = esmpy.Grid( + max_index=np.array(shape, dtype="int32"), + coord_sys=coord_sys, + num_peri_dims=num_peri_dims, + periodic_dim=periodic_dim, + staggerloc=staggerlocs, + ) - # Populate the esmpy.Grid corners - if bounds: - if n_axes == 3: - staggerloc = esmpy.StaggerLoc.CORNER_VFACE - else: - staggerloc = esmpy.StaggerLoc.CORNER + # Populate the esmpy.Grid centres + for dim, c in enumerate(coords): + if n_axes == 3: + grid_centre = esmpy_grid.get_coords( + dim, staggerloc=esmpy.StaggerLoc.CENTER_VCENTER + ) + else: + grid_centre = esmpy_grid.get_coords( + dim, staggerloc=esmpy.StaggerLoc.CENTER + ) - for dim, b in enumerate(bounds): - grid_corner = esmpy_grid.get_coords(dim, staggerloc=staggerloc) - grid_corner[...] = b + grid_centre[...] = c - # Add an esmpy.Grid mask - if mask is not None: - if mask.dtype != bool: - raise ValueError( - "'mask' must be None or a Boolean array. Got: " - f"dtype={mask.dtype}" - ) + # Populate the esmpy.Grid corners + if bounds: + if n_axes == 3: + staggerloc = esmpy.StaggerLoc.CORNER_VFACE + else: + staggerloc = esmpy.StaggerLoc.CORNER - if not mask.any(): - mask = None + for dim, b in enumerate(bounds): + grid_corner = esmpy_grid.get_coords(dim, staggerloc=staggerloc) + grid_corner[...] = b + # Add an esmpy.Grid mask if mask is not None: - grid_mask = esmpy_grid.add_item(esmpy.GridItem.MASK) - if len(grid.coords) == 2 and mask.ndim == 1: - # esmpy grid has a dummy size 1 dimension, so we need to - # include this in the mask as well. - mask = np.expand_dims(mask, 1) + if mask.dtype != bool: + raise ValueError( + "'mask' must be None or a Boolean array. Got: " + f"dtype={mask.dtype}" + ) - # Note: 'mask' has True/False for masked/unmasked - # elements, but the esmpy mask requires 0/1 for - # masked/unmasked elements. - grid_mask[...] = np.invert(mask).astype("int32") + if partition is not None: + mask = mask[partition] - return esmpy_grid + if not mask.any(): + mask = None + if mask is not None: + grid_mask = esmpy_grid.add_item(esmpy.GridItem.MASK) + if len(grid.coords) == 2 and mask.ndim == 1: + # esmpy grid has a dummy size 1 dimension, so we + # need to include this in the mask as well. + mask = np.expand_dims(mask, 1) -def create_esmpy_mesh(grid, mask=None): + # Note: 'mask' has True/False for masked/unmasked + # elements, but the esmpy mask requires 0/1 for + # masked/unmasked elements. + grid_mask[...] = np.invert(mask).astype("int32") + + yield esmpy_grid + + +def create_esmpy_mesh(grid, mask=None, weights_partitions=1): """Create an `esmpy.Mesh`. .. versionadded:: 3.16.0 @@ -2143,10 +2210,9 @@ def create_esmpy_mesh(grid, mask=None): The `esmpy.Mesh` derived from *grid*. """ - destination = grid.name == 'destination' if grid.mesh_location != "face": raise ValueError( - f"Can't regrid {'to' if destination else 'from'} " + f"Can't regrid {'from' if grid.name == 'source' else 'to'} " f"a {grid.name} grid of UGRID {grid.mesh_location!r} cells" ) @@ -2155,36 +2221,28 @@ def create_esmpy_mesh(grid, mask=None): else: # Cartesian coord_sys = esmpy.CoordSys.CART - - if destination and weights_chunks > 1: - chunks = chunk_indices((normalize_chunks(size // weights_chunks, (size,)),)) - else: - chunks = (None,) - - meshes = [] - for chunk in chunks: - # Create an empty esmpy.Mesh + for partition in partitions(grid, weights_partitions): + # Create an empty esmpy.Mesh for this partition esmpy_mesh = esmpy.Mesh( parametric_dim=2, spatial_dim=2, coord_sys=coord_sys ) domain_topology = grid.domain_topology - if chunk is not None: - domain_topology = domain_topology[chunk] - - # element_conn = grid.domain_topology.normalise().array + if partition is not None: + domain_topology = domain_topology[partition] + element_conn = domain_topology.normalise().array element_count = element_conn.shape[0] element_types = np.ma.count(element_conn, axis=1) element_conn = np.ma.compressed(element_conn) - + # Element coordinates if grid.coords: grid_coords = grid.coords - if chunk is not None: - grid_coords = [c[chunk] for b in grid.coords] - + if partition is not None: + grid_coords = [c[partition] for c in grid.coords] + try: element_coords = [c.array for c in grid_coords] except AttributeError: @@ -2194,25 +2252,23 @@ def create_esmpy_mesh(grid, mask=None): element_coords = np.stack(element_coords, axis=-1) else: element_coords = None - + grid_bounds = grid.bounds - if chunk is not None: - grid_bounds = [b[chunk] for b in grid.bounds] + if partition is not None: + grid_bounds = [b[partition] for b in grid.bounds] node_ids, index = np.unique(element_conn, return_index=True) - node_coords = [ - b.data.compressed().array[index] for b in grid_bounds - ] + node_coords = [b.data.compressed().array[index] for b in grid_bounds] node_coords = np.stack(node_coords, axis=-1) node_count = node_ids.size node_owners = np.zeros(node_count) - + # Make sure that node IDs are >= 1, as needed by newer versions of # esmpy. min_id = node_ids.min() if min_id < 1: node_ids = node_ids + min_id + 1 - + # Add nodes. This must be done before `add_elements`. esmpy_mesh.add_nodes( node_count=node_count, @@ -2220,7 +2276,7 @@ def create_esmpy_mesh(grid, mask=None): node_coords=node_coords, node_owners=node_owners, ) - + # Mask if mask is not None: if mask.dtype != bool: @@ -2228,7 +2284,10 @@ def create_esmpy_mesh(grid, mask=None): "'mask' must be None or a Boolean array. " f"Got: dtype={mask.dtype}" ) - + + if partition is not None: + mask = mask[partition] + # Note: 'mask' has True/False for masked/unmasked elements, # but the esmpy mask requires 0/1 for masked/unmasked # elements. @@ -2236,7 +2295,7 @@ def create_esmpy_mesh(grid, mask=None): if mask.all(): # There are no masked elements mask = None - + # Add elements. This must be done after `add_nodes`. # # Note: The element_ids should start at 1, since when writing the @@ -2252,13 +2311,10 @@ def create_esmpy_mesh(grid, mask=None): element_coords=element_coords, ) - meshes.append(mesh) - -# return esmpy_mesh - return meshes + yield esmpy_mesh -def create_esmpy_locstream(grid, mask=None): +def create_esmpy_locstream(grid, mask=None, weights_partitions=1): """Create an `esmpy.LocStream`. .. versionadded:: 3.16.2 @@ -2289,54 +2345,63 @@ def create_esmpy_locstream(grid, mask=None): coord_sys = esmpy.CoordSys.CART keys = ("ESMF:X", "ESMF:Y", "ESMF:Z") - # Create an empty esmpy.LocStream - location_count = grid.shape[0] - esmpy_locstream = esmpy.LocStream( - location_count=location_count, - coord_sys=coord_sys, - name=grid.featureType, - ) + for partition in partitions(grid, weights_partitions): + coords = grid.coords + if partition is not None: + coords = [c[partition] for c in grid.coords] - # Add coordinates (must be of type float64) - for coord, key in zip(grid.coords, keys): - esmpy_locstream[key] = coord.array.astype(float) + # Create an empty esmpy.LocStream + location_count = coords[0].size + esmpy_locstream = esmpy.LocStream( + location_count=location_count, + coord_sys=coord_sys, + name=grid.featureType, + ) - # Add mask (always required, and must be of type int32) - if mask is not None: - if mask.dtype != bool: - raise ValueError( - "'mask' must be None or a Boolean array. " - f"Got: dtype={mask.dtype}" - ) + # Add coordinates (must be of type float64) + for coord, key in zip(coords, keys): + esmpy_locstream[key] = coord.array.astype(float) - # Note: 'mask' has True/False for masked/unmasked elements, - # but the esmpy mask requires 0/1 for masked/unmasked - # elements. - mask = np.invert(mask).astype("int32") - if mask.size == 1: - # Make sure that there's a mask element for each point in - # the locstream (rather than a scalar that applies to all - # elements). - mask = np.full((location_count,), mask, dtype="int32") - else: - # No masked points - mask = np.full((location_count,), 1, dtype="int32") + # Add mask (always required, and must be of type int32) + if mask is not None: + if mask.dtype != bool: + raise ValueError( + "'mask' must be None or a Boolean array. " + f"Got: dtype={mask.dtype}" + ) + + if partition is not None: + mask = mask[partition] - esmpy_locstream["ESMF:Mask"] = mask + # Note: 'mask' has True/False for masked/unmasked elements, + # but the esmpy mask requires 0/1 for masked/unmasked + # elements. + mask = np.invert(mask).astype("int32") + if mask.size == 1: + # Make sure that there's a mask element for each point in + # the locstream (rather than a scalar that applies to all + # elements). + mask = np.full((location_count,), mask, dtype="int32") + else: + # No masked points + mask = np.full((location_count,), 1, dtype="int32") + + esmpy_locstream["ESMF:Mask"] = mask - return esmpy_locstream + yield esmpy_locstream def create_esmpy_weights( method, - src_esmpy_grid, - dst_esmpy_grid, + src_esmpy_grids, + dst_esmpy_grids, src_grid, dst_grid, ignore_degenerate, quarter=False, esmpy_regrid_operator=None, weights_file=None, + weights_partitions=1, ): """Create the `esmpy` regridding weights. @@ -2347,10 +2412,10 @@ def create_esmpy_weights( method: `str` The regridding method. - src_esmpy_grid: `esmpy.Grid` + src_esmpy_grids: sequence of `esmpy.Grid` The source grid. - dst_esmpy_grid: `esmpy.Grid` + dst_esmpy_gridss: sequence of `esmpy.Grid` The destination grid. src_grid: `Grid` @@ -2415,8 +2480,23 @@ def create_esmpy_weights( otherwise `False`. """ + debug = is_log_level_debug(logger) + start_index = 1 + src_esmpy_grids = tuple(src_esmpy_grids) + if len(src_esmpy_grids) != 1: + raise ValueError( + "There must be exactly one source grid partition. " + f"Got {len(src_esmpy_grids)}" + ) + + src_esmpy_grid = src_esmpy_grids[0] + if debug: + logger.debug( + f"Source ESMF Grid:\n{src_esmpy_grid}\n\n" + ) # pragma: no cover + compute_weights = True if esmpy_regrid_operator is None and weights_file is not None: from os.path import isfile @@ -2453,76 +2533,126 @@ def create_esmpy_weights( src_esmpy_field = esmpy.Field( src_esmpy_grid, name="src", meshloc=src_meshloc ) - dst_esmpy_field = esmpy.Field( - dst_esmpy_grid, name="dst", meshloc=dst_meshloc - ) - mask_values = np.array([0], dtype="int32") - - # Create the esmpy.Regrid operator - r = esmpy.Regrid( - src_esmpy_field, - dst_esmpy_field, - regrid_method=esmpy_methods.get(method), - unmapped_action=esmpy.UnmappedAction.IGNORE, - ignore_degenerate=bool(ignore_degenerate), - src_mask_values=mask_values, - dst_mask_values=mask_values, - norm_type=esmpy.api.constants.NormType.FRACAREA, - factors=True, - ) + src_size = src_esmpy_field.data.size + src_rank = src_esmpy_grid.rank - weights = r.get_weights_dict(deep_copy=True) - row = weights["row_dst"] - col = weights["col_src"] - weights = weights["weights"] - - if quarter: - # The weights were created with a dummy size 2 dimension - # such that the weights for each dummy axis element are - # identical. The duplicate weights need to be removed. - # - # To do this, only retain the indices that correspond to - # the top left quarter of the weights matrix in dense - # form. I.e. if w is the NxM dense form of the weights (N, - # M both even), then this is equivalent to w[:N//2, - # :M//2]. - index = np.where( - (row <= dst_esmpy_field.data.size // 2) - & (col <= src_esmpy_field.data.size // 2) + partitioned_dst_grid = weights_partitions > 1 + if partitioned_dst_grid: + from scipy.sparse import csr_array, vstack + + w = [] + for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): + if debug: + logger.debug( + f"Destination ESMF Grid (partition {i}):\n" + f"{dst_esmpy_grid}\n" + ) # pragma: no cover + + dst_esmpy_field = esmpy.Field( + dst_esmpy_grid, name="dst", meshloc=dst_meshloc ) - weights = weights[index] - row = row[index] - col = col[index] + + dst_size = dst_esmpy_field.data.size + dst_rank = dst_esmpy_grid.rank + + mask_values = np.array([0], dtype="int32") + + # Create the esmpy.Regrid operator + r = esmpy.Regrid( + src_esmpy_field, + dst_esmpy_field, + regrid_method=esmpy_methods.get(method), + unmapped_action=esmpy.UnmappedAction.IGNORE, + ignore_degenerate=bool(ignore_degenerate), + src_mask_values=mask_values, + dst_mask_values=mask_values, + norm_type=esmpy.api.constants.NormType.FRACAREA, + factors=True, + ) + + weights = r.get_weights_dict(deep_copy=True) + row = weights["row_dst"] + col = weights["col_src"] + weights = weights["weights"] + + ESMF_unmapped_action = r.unmapped_action + ESMF_ignore_degenerate = int(r.ignore_degenerate) + + # Destroy esmpy objects that are no longer needed + dst_esmpy_grid.destroy() + dst_esmpy_field.destroy() + r.dstfield.grid.destroy() + r.dstfield.destroy() + r.destroy() + + if quarter: + # The weights were created with a dummy size 2 + # dimension such that the weights for each dummy axis + # element are identical. The duplicate weights need to + # be removed. + # + # To do this, only retain the indices that correspond + # to the top left quarter of the weights matrix in + # dense form. I.e. if w is the NxM dense form of the + # weights (N, M both even), then this is equivalent to + # w[:N//2, :M//2]. + index = np.where( + (row <= dst_size // 2) & (col <= src_size // 2) + ) + weights = weights[index] + row = row[index] + col = col[index] + del index + + if partitioned_dst_grid: + # The destination grid has been partitioned. Create + # the sparse weights array for this partition. + row -= 1 + col -= 1 + w.append( + csr_array( + (weights, (row, col)), shape=[dst_size, src_size] + ) + ) + + # Destroy esmpy objects that are no longer needed + src_esmpy_field.destroy() + r.srcfield.grid.destroy() + r.srcfield.destroy() + + if partitioned_dst_grid: + # The destination grid has been partitioned. Concatenate + # the sparse weights arrays for all partitions. + weights = vstack(w) + dst_size = weights.shape[0] + row = None + col = None + del w if weights_file is not None: # Write the weights to a netCDF file (copying the # dimension and variable names and structure of a weights # file created by ESMF). + from cfdm import integer_dtype from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset + from scipy.sparse import issparse from .. import __version__ - if ( - max(dst_esmpy_field.data.size, src_esmpy_field.data.size) - <= np.iinfo("int32").max - ): - i_dtype = "i4" + if issparse(weights): + # 'weights' is a CSR sparse array, so we have to infer + # the row and column arrays from it. + weights_data = weights.data + row, col = weights.tocoo().coords + if start_index: + row += start_index + col += start_index else: - i_dtype = "i8" - - upper_bounds = src_esmpy_grid.upper_bounds - if len(upper_bounds) > 1: - upper_bounds = upper_bounds[0] + weights_data = weights - src_shape = tuple(upper_bounds) - - upper_bounds = dst_esmpy_grid.upper_bounds - if len(upper_bounds) > 1: - upper_bounds = upper_bounds[0] - - dst_shape = tuple(upper_bounds) + i_dtype = integer_dtype(max(dst_size, src_size)) regrid_method = f"{src_grid.coord_sys} {src_grid.method}" if src_grid.ln_z: @@ -2533,34 +2663,34 @@ def create_esmpy_weights( nc.title = ( f"Regridding weights from source {src_grid.type} " - f"with shape {src_shape} to destination " - f"{dst_grid.type} with shape {dst_shape}" + f"with shape {src_grid.esmpy_shape} to destination " + f"{dst_grid.type} with shape {dst_grid.esmpy_shape}" ) nc.source = f"cf v{__version__}, esmpy v{esmpy.__version__}" nc.history = f"Created at {datetime.now()}" nc.regrid_method = regrid_method - nc.ESMF_unmapped_action = r.unmapped_action - nc.ESMF_ignore_degenerate = int(r.ignore_degenerate) + nc.ESMF_unmapped_action = ESMF_unmapped_action + nc.ESMF_ignore_degenerate = ESMF_ignore_degenerate - nc.createDimension("n_s", weights.size) - nc.createDimension("src_grid_rank", src_esmpy_grid.rank) - nc.createDimension("dst_grid_rank", dst_esmpy_grid.rank) + nc.createDimension("n_s", weights_data.size) + nc.createDimension("src_grid_rank", src_rank) + nc.createDimension("dst_grid_rank", dst_rank) v = nc.createVariable( "src_grid_dims", i_dtype, ("src_grid_rank",) ) v.long_name = "Source grid shape" - v[...] = src_shape + v[...] = src_grid.esmpy_shape v = nc.createVariable( "dst_grid_dims", i_dtype, ("dst_grid_rank",) ) v.long_name = "Destination grid shape" - v[...] = dst_shape + v[...] = dst_grid.esmpy_shape - v = nc.createVariable("S", weights.dtype, ("n_s",)) + v = nc.createVariable("S", weights_data.dtype, ("n_s",)) v.long_name = "Weights values" - v[...] = weights + v[...] = weights_data v = nc.createVariable("row", i_dtype, ("n_s",), zlib=True) v.long_name = "Destination/row indices" @@ -2574,19 +2704,15 @@ def create_esmpy_weights( nc.close() + if issparse(weights): + row = None + col = None + if esmpy_regrid_operator is None: - # Destroy esmpy objects (the esmpy.Grid objects exist even if - # we didn't create any weights using esmpy.Regrid). + # Destroy esmpy objects that are no longer needed src_esmpy_grid.destroy() - dst_esmpy_grid.destroy() - if compute_weights: - src_esmpy_field.destroy() - dst_esmpy_field.destroy() - r.srcfield.grid.destroy() - r.srcfield.destroy() - r.dstfield.grid.destroy() - r.dstfield.destroy() - r.destroy() + for dst_esmpy_grid in dst_esmpy_grids: + dst_esmpy_grid.destroy() else: # Make the Regrid instance available via the # 'esmpy_regrid_operator' list @@ -3161,3 +3287,52 @@ def set_grid_type(grid): grid.type = f"UGRID {grid.mesh_location} mesh" elif grid.is_locstream: grid.type = f"DSG {grid.featureType}" + + +def partitions(grid, weights_partitions): + """Partitions of the grid. + + Each partition is returned as a slice index to the cell + coordiantes, which my be used to create the actual partition of + the grid. + + Only destinaton grids are allowed to be partitioned. + + When there is a single partition that spans the entire grid, then + the speical value of `None` is used. + + .. versionadded:: NEXTVERSION + + .. seealso:: `create_esmpy_grid`, `create_esmpy_mesh`, + `create_esmpy_locstream` + + :Parameters: + + grid: `Grid` + The definition of the source or destination grid. + + weights_partitions: `int` + The number of partitions to split the grid into. Only tthe + "last" dimension is partitioned. + + :Returns: + + generator or `tuple` + The partition specifications, or a `tuple` containing + `None` if there is only one partition. + + """ + if weights_partitions > 1 and grid.name == "destination": + from math import ceil + + from cfdm.data.utils import chunk_indices + from dask.array.core import normalize_chunks + + shape = grid.coords[-1].shape + size = ceil(shape[-1] / weights_partitions) + + return chunk_indices( + normalize_chunks(shape[:-1] + (size,), shape=shape) + ) + + return (None,) diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index 7621addc7e..c44ed63832 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -50,6 +50,7 @@ def __init__( src_z=None, dst_z=None, ln_z=False, + _dst_mask_adjusted=False, ): """**Initialisation** @@ -224,6 +225,9 @@ def __init__( self._set_component("src_z", src_z, copy=False) self._set_component("dst_z", dst_z, copy=False) self._set_component("ln_z", bool(ln_z), copy=False) + self._set_component( + "_dst_mask_adjusted", bool(_dst_mask_adjusted), copy=False + ) def __repr__(self): """x.__repr__() <==> repr(x)""" @@ -231,6 +235,20 @@ def __repr__(self): f"" ) + @property + def _dst_mask_adjusted(self): + """Whether or not the destination grid mask has been adjusted. + + Whether or not the destination grid mask has been set to True + where the weights for destination grid points are all zero. + + See `tosparse` for details. + + .. versionadded:: NEXTVERSION + + """ + return self._get_component("_dst_mask_adjusted") + @property def col(self): """The 1-d array of the column indices of the regridding @@ -549,6 +567,7 @@ def copy(self): src_z=self.src_z, dst_z=self.dst_z, ln_z=self.ln_z, + _dst_mask_adjusted=self._dst_mask_adjusted, ) @_display_or_return @@ -706,75 +725,78 @@ def tosparse(self): `None` """ - weights = self.weights - row = self.row - col = self.col - if weights is not None and row is None and col is None: - # Weights are already in sparse array format - return - from math import prod - from scipy.sparse import csr_array - - start_index = self.start_index - col_start_index = None - row_start_index = None - - if weights is None: - weights_file = self.weights_file - if weights_file is not None: - # Read the weights from the weights file - from cfdm.data.locks import netcdf_lock - from netCDF4 import Dataset - - with netcdf_lock: - nc = Dataset(weights_file, "r") - weights = nc.variables["S"][...] - row = nc.variables["row"][...] - col = nc.variables["col"][...] - - try: - col_start_index = nc.variables["col"].start_index - except AttributeError: - col_start_index = 1 - - try: - row_start_index = nc.variables["row"].start_index - except AttributeError: - row_start_index = 1 - - nc.close() - else: - raise ValueError( - "Conversion to sparse array format requires at least " - "one of the 'weights' or 'weights_file' attributes to " - "be set" - ) - - # Convert to sparse array format - if col_start_index: - col = col - col_start_index - elif start_index: - col = col - start_index + from scipy.sparse import csr_array, issparse - if row_start_index: - row = row - row_start_index - elif start_index: - row = row - start_index - - src_size = prod(self.src_shape) dst_size = prod(self.dst_shape) - weights = csr_array((weights, (row, col)), shape=[dst_size, src_size]) - - self._set_component("weights", weights, copy=False) - self._set_component("row", None, copy=False) - self._set_component("col", None, copy=False) - del row, col + weights = self.weights + if not issparse(weights): + row = self.row + col = self.col + start_index = self.start_index + col_start_index = None + row_start_index = None + + if weights is None: + weights_file = self.weights_file + if weights_file is not None: + # Read the weights from the weights file + from cfdm.data.locks import netcdf_lock + from netCDF4 import Dataset + + with netcdf_lock: + nc = Dataset(weights_file, "r") + weights = nc.variables["S"][...] + row = nc.variables["row"][...] + col = nc.variables["col"][...] + + try: + col_start_index = nc.variables["col"].start_index + except AttributeError: + col_start_index = 1 + + try: + row_start_index = nc.variables["row"].start_index + except AttributeError: + row_start_index = 1 + + nc.close() + else: + raise ValueError( + "Conversion to sparse array format requires at least " + "one of the 'weights' or 'weights_file' attributes to " + "be set" + ) + + # Convert to sparse array format + if col_start_index: + col = col - col_start_index + elif start_index: + col = col - start_index + + if row_start_index: + row = row - row_start_index + elif start_index: + row = row - start_index + + src_size = prod(self.src_shape) + weights = csr_array( + (weights, (row, col)), shape=[dst_size, src_size] + ) + + self._set_component("weights", weights, copy=False) + self._set_component("row", None, copy=False) + self._set_component("col", None, copy=False) + del row, col + + if self._dst_mask_adjusted: + # The destination grid mask has already been adjusted + return - # Set the destination grid mask to True where the weights for - # destination grid points are all zero + # Still here? Then set the destination grid mask to True where + # the weights for destination grid points are all zero. dst_mask = self.dst_mask if dst_mask is not None: if dst_mask.dtype != bool or dst_mask.shape != self.dst_shape: @@ -807,3 +829,4 @@ def tosparse(self): dst_mask = dst_mask.reshape(self.dst_shape) self._set_component("dst_mask", dst_mask, copy=False) + self._set_component("_dst_mask_adjusted", True, copy=False) From b1f13b745abfae361a8822ed099a37a2b574331a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 15 Jul 2025 13:51:36 +0100 Subject: [PATCH 03/54] dev --- cf/field.py | 10 +- cf/regrid/regrid.py | 452 +++++++++++++++++++++--------------- cf/regrid/regridoperator.py | 39 ++++ 3 files changed, 304 insertions(+), 197 deletions(-) diff --git a/cf/field.py b/cf/field.py index f02d1b40fc..ff9598023b 100644 --- a/cf/field.py +++ b/cf/field.py @@ -12970,7 +12970,7 @@ def regrids( ln_z=None, verbose=None, return_esmpy_regrid_operator=False, - weights_partitions=1, + dst_grid_partitions=1, inplace=False, i=False, _compute_field_mass=None, @@ -13308,12 +13308,13 @@ def regrids( z=z, ln_z=ln_z, return_esmpy_regrid_operator=return_esmpy_regrid_operator, - weights_partitions=weights_partitions, + dst_grid_partitions=dst_grid_partitions, inplace=inplace, ) @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0") @_inplace_enabled(default=False) + @_manage_log_level_via_verbosity def regridc( self, dst, @@ -13333,7 +13334,8 @@ def regridc( z=None, ln_z=None, return_esmpy_regrid_operator=False, - weights_partitions=1, + dst_grid_partitions=1, + verbose=None, inplace=False, i=False, _compute_field_mass=None, @@ -13605,7 +13607,7 @@ def regridc( z=z, ln_z=ln_z, return_esmpy_regrid_operator=return_esmpy_regrid_operator, - weights_partitions=weights_partitions, + dst_grid_partitions=dst_grid_partitions, inplace=inplace, ) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 22352680e7..00c77e52fa 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -157,202 +157,227 @@ def regrid( min_weight=None, weights_file=None, return_esmpy_regrid_operator=False, - weights_partitions=1, + dst_grid_partitions=1, inplace=False, ): """Regrid a field to a new spherical or Cartesian grid. - This is a worker function primarily intended to be called by - `cf.Field.regridc` and `cf.Field.regrids`. + This is a worker function primarily intended to be called by + `cf.Field.regridc` and `cf.Field.regrids`. - .. versionadded:: 3.14.0 + .. versionadded:: 3.14.0 - .. seealso:: `cf.Field.regridc`, `cf.Field.regrids`, - `cf.data.dask_regrid.regrid`. + .. seealso:: `cf.Field.regridc`, `cf.Field.regrids`, + `cf.data.dask_regrid.regrid`. - :Parameters: + :Parameters: - coord_sys: `str` - The name of the coordinate system of the source and - destination grids. Either ``'spherical'`` or - ``'Cartesian'``. + coord_sys: `str` + The name of the coordinate system of the source and + destination grids. Either ``'spherical'`` or + ``'Cartesian'``. - src: `Field` - The source field to be regridded. + src: `Field` + The source field to be regridded. - dst: `Field`, `Domain`, `RegridOperator` or sequence of `Coordinate` - The definition of the destination grid. + dst: `Field`, `Domain`, `RegridOperator` or sequence of `Coordinate` + The definition of the destination grid. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - method: `str` - Specify which interpolation method to use during - regridding. This parameter must be set unless *dst* is a - `RegridOperator`, when the *method* is ignored. + method: `str` + Specify which interpolation method to use during + regridding. This parameter must be set unless *dst* is a + `RegridOperator`, when the *method* is ignored. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - src_cyclic: `None` or `bool`, optional - For spherical regridding, specifies whether or not the - source grid longitude axis is cyclic. + src_cyclic: `None` or `bool`, optional + For spherical regridding, specifies whether or not the + source grid longitude axis is cyclic. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - dst_cyclic: `None` or `bool`, optional - For spherical regridding, specifies whether or not the - destination grid longitude axis is cyclic. + dst_cyclic: `None` or `bool`, optional + For spherical regridding, specifies whether or not the + destination grid longitude axis is cyclic. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - use_src_mask: `bool`, optional - Whether or not to use the source grid mask. + use_src_mask: `bool`, optional + Whether or not to use the source grid mask. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - use_dst_mask: `bool`, optional - Whether or not to use the source grid mask. + use_dst_mask: `bool`, optional + Whether or not to use the source grid mask. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - src_axes: `dict` or sequence or `None`, optional - Define the source grid axes to be regridded. + src_axes: `dict` or sequence or `None`, optional + Define the source grid axes to be regridded. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - dst_axes: `dict` or sequence or `None`, optional - Define the destination grid axes to be regridded. + dst_axes: `dict` or sequence or `None`, optional + Define the destination grid axes to be regridded. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. - Ignored for Cartesian regridding. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. + Ignored for Cartesian regridding. - axes: sequence, optional - Define the axes to be regridded for the source grid and, - if *dst* is a `Field` or `Domain`, the destination - grid. Ignored for spherical regridding. + axes: sequence, optional + Define the axes to be regridded for the source grid and, + if *dst* is a `Field` or `Domain`, the destination + grid. Ignored for spherical regridding. - See `cf.Field.regridc` for details. + See `cf.Field.regridc` for details. - ignore_degenerate: `bool`, optional - Whether or not to ignore degenerate cells. + ignore_degenerate: `bool`, optional + Whether or not to ignore degenerate cells. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. - Ignored for Cartesian regridding. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. + Ignored for Cartesian regridding. - return_operator: `bool`, optional - If True then do not perform the regridding, rather return - the `RegridOperator` instance that defines the regridding - operation, and which can be used in subsequent calls. + return_operator: `bool`, optional + If True then do not perform the regridding, rather return + the `RegridOperator` instance that defines the regridding + operation, and which can be used in subsequent calls. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - check_coordinates: `bool`, optional - Whether or not to check the regrid operator source grid - coordinates. Ignored unless *dst* is a `RegridOperator`. + check_coordinates: `bool`, optional + Whether or not to check the regrid operator source grid + coordinates. Ignored unless *dst* is a `RegridOperator`. - If True and then the source grid coordinates defined by - the regrid operator are checked for compatibility against - those of the *src* field. By default this check is not - carried out. See the *dst* parameter for details. + If True and then the source grid coordinates defined by + the regrid operator are checked for compatibility against + those of the *src* field. By default this check is not + carried out. See the *dst* parameter for details. - If False then only the computationally cheap tests are - performed (checking that the coordinate system, cyclicity - and grid shape are the same). + If False then only the computationally cheap tests are + performed (checking that the coordinate system, cyclicity + and grid shape are the same). - inplace: `bool`, optional - If True then modify *src* in-place and return `None`. + inplace: `bool`, optional + If True then modify *src* in-place and return `None`. - return_esmpy_regrid_operator: `bool`, optional - If True then *src* is not regridded, but the - `esmpy.Regrid` instance for the operation is returned - instead. This is useful for checking that the field has - been regridded correctly. + return_esmpy_regrid_operator: `bool`, optional + If True then *src* is not regridded, but the + `esmpy.Regrid` instance for the operation is returned + instead. This is useful for checking that the field has + been regridded correctly. - weights_file: `str` or `None`, optional - Provide a netCDF file that contains, or will contain, the - regridding weights. + weights_file: `str` or `None`, optional + Provide a netCDF file that contains, or will contain, the + regridding weights. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - .. versionadded:: 3.15.2 + .. versionadded:: 3.15.2 - src_z: optional - The identity of the source grid vertical coordinates used - to calculate the weights. If `None` then no vertical axis - is identified, and in the spherical case regridding will - be 2-d. + src_z: optional + The identity of the source grid vertical coordinates used + to calculate the weights. If `None` then no vertical axis + is identified, and in the spherical case regridding will + be 2-d. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - .. versionadded:: 3.16.2 + .. versionadded:: 3.16.2 - dst_z: optional - The identity of the destination grid vertical coordinates - used to calculate the weights. If `None` then no vertical - axis is identified, and in the spherical case regridding - will be 2-d. + dst_z: optional + The identity of the destination grid vertical coordinates + used to calculate the weights. If `None` then no vertical + axis is identified, and in the spherical case regridding + will be 2-d. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - .. versionadded:: 3.16.2 + .. versionadded:: 3.16.2 - z: optional - The *z* parameter is a convenience that may be used to - replace both *src_z* and *dst_z* when they would contain - identical values. + z: optional + The *z* parameter is a convenience that may be used to + replace both *src_z* and *dst_z* when they would contain + identical values. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - .. versionadded:: 3.16.2 + .. versionadded:: 3.16.2 - ln_z: `bool` or `None`, optional - Whether or not the weights are to be calculated with the - natural logarithm of vertical coordinates. + ln_z: `bool` or `None`, optional + Whether or not the weights are to be calculated with the + natural logarithm of vertical coordinates. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - .. versionadded:: 3.16.2 + .. versionadded:: 3.16.2 - weights_partitions: `int`, optional + dst_grid_partitions: `int`, optional + Reduce the memory requirement of the weights calculation + by calculating weights separately for non-overlapping + partitions of the destination grid, and then combining the + weights from each partition to create the final weights + matrix. The number of partitions is given by + *dst_grid_partitions*, whose default value is 1 (i.e. one + partition for the entire grid). - If set to an integer greater than 1, then weights will be - calculated separately for this amount of independent - non-overlapping parittions of the destination grid, before - being combined to into the final weights array. This makes - no difference to the result, but will reduce the memory - needed to calulate the weights by a factor approximately - equal to number of partitions, which is should be enough - to allow weights for even very large grids to be calcuated - on machines with modest amounts of RAM. + The amount of memory reduction will vary on a case-by-case + basis, but will always be proportional to the number of + partitions, whilst the time taken to calculate the final + weights matrix is inversely proportional to the number of + partitions. - .. versionadded:: NEXTVERSION + The number of partitions makes no difference to the + final weights matrix, and hence no difference to the + regridded result. - :Returns: + Only the slowest moving regridding dimension is + partitioned (either the Y or Z dimension, for 2-d or + 3-d spherical regridding respectively). + + Any postitve integer may be given, but the number of + partitions will always be less than or equal to the + size of the slowest moving regridding dimension, which + is either the Y or Z dimension, for 2-d or 3-d + spherical regridding respectively. + + Any postitve integer may be given, but the number of + partitions will always be less than or equal to the + size of the slowest moving regridding dimension, which + is the first of the axes given by *axes* or + *dst_axes*. - `Field` or `None` or `RegridOperator` or `esmpy.Regrid` - The regridded field construct; or `None` if the operation - was in-place; or the regridding operator if - *return_operator* is True. + .. versionadded:: NEXTVERSION - If *return_esmpy_regrid_operator* is True then *src* is - not regridded, but the `esmpy.Regrid` instance for the - operation is returned instead. + :Returns: + + `Field` or `None` or `RegridOperator` or `esmpy.Regrid` + The regridded field construct; or `None` if the operation + was in-place; or the regridding operator if + *return_operator* is True. + + If *return_esmpy_regrid_operator* is True then *src* is + not regridded, but the `esmpy.Regrid` instance for the + operation is returned instead. """ + debug = is_log_level_debug(logger) + if not inplace: src = src.copy() @@ -512,7 +537,7 @@ def regrid( ln_z=ln_z, ) - if is_log_level_debug(logger): + if debug: logger.debug( f"Source Grid:\n{src_grid}\n\nDestination Grid:\n{dst_grid}\n" ) # pragma: no cover @@ -586,15 +611,15 @@ def regrid( # Create the destination esmpy.Grid if dst_grid.is_mesh: dst_esmpy_grids = create_esmpy_mesh( - dst_grid, grid_dst_mask, weights_partitions + dst_grid, grid_dst_mask, dst_grid_partitions ) elif dst_grid.is_locstream: dst_esmpy_grids = create_esmpy_locstream( - dst_grid, grid_dst_mask, weights_partitions + dst_grid, grid_dst_mask, dst_grid_partitions ) else: dst_esmpy_grids = create_esmpy_grid( - dst_grid, grid_dst_mask, weights_partitions + dst_grid, grid_dst_mask, dst_grid_partitions ) del grid_dst_mask @@ -622,17 +647,11 @@ def regrid( # Create the source esmpy.Grid if src_grid.is_mesh: - src_esmpy_grids = create_esmpy_mesh( - src_grid, grid_src_mask, weights_partitions - ) + src_esmpy_grids = create_esmpy_mesh(src_grid, grid_src_mask) elif src_grid.is_locstream: - src_esmpy_grids = create_esmpy_locstream( - src_grid, grid_src_mask, weights_partitions - ) + src_esmpy_grids = create_esmpy_locstream(src_grid, grid_src_mask) else: - src_esmpy_grids = create_esmpy_grid( - src_grid, grid_src_mask, weights_partitions - ) + src_esmpy_grids = create_esmpy_grid(src_grid, grid_src_mask) del grid_src_mask @@ -649,7 +668,7 @@ def regrid( quarter=src_grid.dummy_size_2_dimension, esmpy_regrid_operator=esmpy_regrid_operator, weights_file=weights_file, - weights_partitions=weights_partitions, + dst_grid_partitions=dst_grid_partitions, ) if return_esmpy_regrid_operator: @@ -716,12 +735,20 @@ def regrid( # Note: The `RegridOperator.tosparse` method will also set # 'dst_mask' to False for destination points with all # zero weights. - regrid_operator.tosparse() + if regrid_operator.weights_file is None: + regrid_operator.tosparse() + + if debug: + logger.debug( + f"Sparse weights: {regrid_operator.weights!r}\n" + f" {regrid_operator.weights.__dict__}" + ) # pragma: no cover + return regrid_operator from scipy.sparse import issparse - if issparse(regrid_operator.weights) and is_log_level_debug(logger): + if debug and issparse(regrid_operator.weights): logger.debug( f"Sparse weights: {regrid_operator.weights!r}\n" f" {regrid_operator.weights.__dict__}" @@ -1691,7 +1718,7 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): n_regrid_axes=n_regrid_axes, dimensionality=regridding_dimensionality, shape=shape, - esmpy_shape=shape, + esmpy_shape=shape[::-1], coords=coords, bounds=bounds, cyclic=cyclic, @@ -1919,7 +1946,7 @@ def esmpy_initialise(): return esmpy.Manager(debug=bool(regrid_logging())) -def create_esmpy_grid(grid, mask=None, weights_partitions=1): +def create_esmpy_grid(grid, mask=None, grid_partitions=1): """Create an `esmpy.Grid`. .. versionadded:: 3.14.0 @@ -1941,6 +1968,8 @@ def create_esmpy_grid(grid, mask=None, weights_partitions=1): The `esmpy.Grid` derived from *grid*. """ + debug = is_log_level_debug(logger) + # Create an `esmpy.Grid` cyclic = grid.cyclic @@ -1958,17 +1987,26 @@ def create_esmpy_grid(grid, mask=None, weights_partitions=1): spherical = False coord_sys = esmpy.CoordSys.CART - for partition in partitions(grid, weights_partitions): + for i, partition in enumerate(partitions(grid, grid_partitions)): coords = grid.coords[:] bounds = grid.bounds[:] if partition is not None: + if debug: + logger.debug( + f"Partition {i} index: {partition}" + ) # pragma: no cover + ndim = coords[-1].ndim if ndim == 1: + # Each coordinate spans a different axis, and we're + # only partitioning the axis of the last coordinate in + # the list. coords[-1] = coords[-1][partition] if bounds: bounds[-1] = bounds[-1][partition] else: + # All coordinates spans the same axes coords = [c[partition] for c in coords] if bounds: bounds = [b[partition] for b in bounds] @@ -2167,7 +2205,7 @@ def create_esmpy_grid(grid, mask=None, weights_partitions=1): ) if partition is not None: - mask = mask[partition] + mask = mask[..., *partition] if not mask.any(): mask = None @@ -2187,7 +2225,7 @@ def create_esmpy_grid(grid, mask=None, weights_partitions=1): yield esmpy_grid -def create_esmpy_mesh(grid, mask=None, weights_partitions=1): +def create_esmpy_mesh(grid, mask=None, grid_partitions=1): """Create an `esmpy.Mesh`. .. versionadded:: 3.16.0 @@ -2210,6 +2248,8 @@ def create_esmpy_mesh(grid, mask=None, weights_partitions=1): The `esmpy.Mesh` derived from *grid*. """ + debug = is_log_level_debug(logger) + if grid.mesh_location != "face": raise ValueError( f"Can't regrid {'from' if grid.name == 'source' else 'to'} " @@ -2222,7 +2262,7 @@ def create_esmpy_mesh(grid, mask=None, weights_partitions=1): # Cartesian coord_sys = esmpy.CoordSys.CART - for partition in partitions(grid, weights_partitions): + for i, partition in enumerate(partitions(grid, grid_partitions)): # Create an empty esmpy.Mesh for this partition esmpy_mesh = esmpy.Mesh( parametric_dim=2, spatial_dim=2, coord_sys=coord_sys @@ -2230,6 +2270,11 @@ def create_esmpy_mesh(grid, mask=None, weights_partitions=1): domain_topology = grid.domain_topology if partition is not None: + if debug: + logger.debug( + f"Partition {i} index: {partition}" + ) # pragma: no cover + domain_topology = domain_topology[partition] element_conn = domain_topology.normalise().array @@ -2241,6 +2286,7 @@ def create_esmpy_mesh(grid, mask=None, weights_partitions=1): if grid.coords: grid_coords = grid.coords if partition is not None: + # All coordinates spans the same axis grid_coords = [c[partition] for c in grid.coords] try: @@ -2255,6 +2301,7 @@ def create_esmpy_mesh(grid, mask=None, weights_partitions=1): grid_bounds = grid.bounds if partition is not None: + # All coordinates spans the same axis grid_bounds = [b[partition] for b in grid.bounds] node_ids, index = np.unique(element_conn, return_index=True) @@ -2286,6 +2333,7 @@ def create_esmpy_mesh(grid, mask=None, weights_partitions=1): ) if partition is not None: + # The mask spans the same axis as the coordinates mask = mask[partition] # Note: 'mask' has True/False for masked/unmasked elements, @@ -2314,7 +2362,7 @@ def create_esmpy_mesh(grid, mask=None, weights_partitions=1): yield esmpy_mesh -def create_esmpy_locstream(grid, mask=None, weights_partitions=1): +def create_esmpy_locstream(grid, mask=None, grid_partitions=1): """Create an `esmpy.LocStream`. .. versionadded:: 3.16.2 @@ -2337,6 +2385,8 @@ def create_esmpy_locstream(grid, mask=None, weights_partitions=1): The `esmpy.LocStream` derived from *grid*. """ + debug = is_log_level_debug(logger) + if grid.coord_sys == "spherical": coord_sys = esmpy.CoordSys.SPH_DEG keys = ("ESMF:Lon", "ESMF:Lat", "ESMF:Radius") @@ -2345,9 +2395,15 @@ def create_esmpy_locstream(grid, mask=None, weights_partitions=1): coord_sys = esmpy.CoordSys.CART keys = ("ESMF:X", "ESMF:Y", "ESMF:Z") - for partition in partitions(grid, weights_partitions): + for i, partition in enumerate(partitions(grid, grid_partitions)): coords = grid.coords if partition is not None: + if debug: + logger.debug( + f"Partition {i} index: {partition}" + ) # pragma: no cover + + # All coordinates spans the same axis coords = [c[partition] for c in grid.coords] # Create an empty esmpy.LocStream @@ -2371,6 +2427,7 @@ def create_esmpy_locstream(grid, mask=None, weights_partitions=1): ) if partition is not None: + # The mask spans the same axis as the coordinates mask = mask[partition] # Note: 'mask' has True/False for masked/unmasked elements, @@ -2401,7 +2458,7 @@ def create_esmpy_weights( quarter=False, esmpy_regrid_operator=None, weights_file=None, - weights_partitions=1, + dst_grid_partitions=1, ): """Create the `esmpy` regridding weights. @@ -2537,7 +2594,7 @@ def create_esmpy_weights( src_size = src_esmpy_field.data.size src_rank = src_esmpy_grid.rank - partitioned_dst_grid = weights_partitions > 1 + partitioned_dst_grid = dst_grid_partitions > 1 if partitioned_dst_grid: from scipy.sparse import csr_array, vstack @@ -2623,7 +2680,8 @@ def create_esmpy_weights( if partitioned_dst_grid: # The destination grid has been partitioned. Concatenate - # the sparse weights arrays for all partitions. + # the sparse weights arrays for all destination grid + # partitions. weights = vstack(w) dst_size = weights.shape[0] row = None @@ -2637,22 +2695,25 @@ def create_esmpy_weights( from cfdm import integer_dtype from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset - from scipy.sparse import issparse from .. import __version__ - if issparse(weights): + from_file = True + + if partitioned_dst_grid: # 'weights' is a CSR sparse array, so we have to infer # the row and column arrays from it. weights_data = weights.data row, col = weights.tocoo().coords if start_index: + # 'row' and 'col' are currently zero-based row += start_index col += start_index else: weights_data = weights - i_dtype = integer_dtype(max(dst_size, src_size)) + row = row.astype(integer_dtype(dst_size), copy=False) + col = col.astype(integer_dtype(src_size), copy=False) regrid_method = f"{src_grid.coord_sys} {src_grid.method}" if src_grid.ln_z: @@ -2677,34 +2738,38 @@ def create_esmpy_weights( nc.createDimension("dst_grid_rank", dst_rank) v = nc.createVariable( - "src_grid_dims", i_dtype, ("src_grid_rank",) + "src_grid_dims", col.dtype, ("src_grid_rank",) ) v.long_name = "Source grid shape" v[...] = src_grid.esmpy_shape v = nc.createVariable( - "dst_grid_dims", i_dtype, ("dst_grid_rank",) + "dst_grid_dims", row.dtype, ("dst_grid_rank",) ) v.long_name = "Destination grid shape" v[...] = dst_grid.esmpy_shape - v = nc.createVariable("S", weights_data.dtype, ("n_s",)) + v = nc.createVariable( + "S", weights_data.dtype, ("n_s",), zlib=True + ) v.long_name = "Weights values" v[...] = weights_data - v = nc.createVariable("row", i_dtype, ("n_s",), zlib=True) + v = nc.createVariable("row", row.dtype, ("n_s",), zlib=True) v.long_name = "Destination/row indices" v.start_index = start_index v[...] = row - v = nc.createVariable("col", i_dtype, ("n_s",), zlib=True) + v = nc.createVariable("col", col.dtype, ("n_s",), zlib=True) v.long_name = "Source/col indices" v.start_index = start_index v[...] = col nc.close() - if issparse(weights): + if partitioned_dst_grid: + # Reset 'row' and 'col' to None, because 'weights' is + # already a sparse array. row = None col = None @@ -3289,17 +3354,17 @@ def set_grid_type(grid): grid.type = f"DSG {grid.featureType}" -def partitions(grid, weights_partitions): +def partitions(grid, grid_partitions): """Partitions of the grid. - Each partition is returned as a slice index to the cell - coordiantes, which my be used to create the actual partition of - the grid. + Each partition is defined as an index to cell coordinates, which + may be used to create the actual partition of the grid. - Only destinaton grids are allowed to be partitioned. + Only a destinaton grid without a dummy size 2 dimension can be + partitioned. When there is a single partition that spans the entire grid, then - the speical value of `None` is used. + the special value of `None` is used. .. versionadded:: NEXTVERSION @@ -3311,28 +3376,29 @@ def partitions(grid, weights_partitions): grid: `Grid` The definition of the source or destination grid. - weights_partitions: `int` - The number of partitions to split the grid into. Only tthe - "last" dimension is partitioned. + grid_partitions: `int` + The number of partitions to split the grid into. Only the + last (i.e. slowest moving) dimension in `esmpy` order is + partitioned. :Returns: generator or `tuple` The partition specifications, or a `tuple` containing - `None` if there is only one partition. + `None` if there is only one partition. Each partition + specification is a tuple of `slice` objects. """ - if weights_partitions > 1 and grid.name == "destination": - from math import ceil + grid_partitions = int(grid_partitions) + if grid.name == "source" or grid.dummy_size_2_dimension or grid_partitions <= 1 : + return (None,) - from cfdm.data.utils import chunk_indices - from dask.array.core import normalize_chunks + from math import ceil - shape = grid.coords[-1].shape - size = ceil(shape[-1] / weights_partitions) + from cfdm.data.utils import chunk_indices + from dask.array.core import normalize_chunks - return chunk_indices( - normalize_chunks(shape[:-1] + (size,), shape=shape) - ) + shape = grid.coords[-1].shape + size = ceil(shape[-1] / grid_partitions) - return (None,) + return chunk_indices(normalize_chunks(shape[:-1] + (size,), shape=shape)) diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index c44ed63832..5dbb839b02 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -5,6 +5,8 @@ from ..decorators import _display_or_return from ..functions import _DEPRECATION_ERROR_ATTRIBUTE, _DEPRECATION_ERROR_METHOD +from ..functions import atol as cf_atol +from ..functions import rtol as cf_rtol from ..mixin_container import Container as mixin_Container @@ -193,6 +195,8 @@ def __init__( .. versionadded:: 3.16.2 """ + from scipy.sparse import issparse + super().__init__() if weights is None and weights_file is None: @@ -229,6 +233,10 @@ def __init__( "_dst_mask_adjusted", bool(_dst_mask_adjusted), copy=False ) + if issparse(weights): + # Make sure that the destination mask has been adjusted + self.tosparse() + def __repr__(self): """x.__repr__() <==> repr(x)""" return ( @@ -626,6 +634,37 @@ def dump(self, display=True): return "\n".join(string) + def equal_weights(self, other, rtol=None, atol=None): + """TODOREGRID + + :Parameters: + + {{rtol: number, optional}} + + {{atol: number, optional}} + + :Returns: + + `bool` + + """ + if atol is None: + atol = cf_atol() + + if rtol is None: + rtol = cf_rtol() + + self.tosparse() + other.tosparse() + w0 = self.weights + w1 = other.weights + + return ( + (w0.indices == w1.indices).all() + and (w0.indptr == w1.indptr).all() + and np.allclose(w0.data, w1.data, rtol=rtol, atol=atol) + ) + def get_parameter(self, parameter, *default): """Return a regrid operation parameter. From 7336fbab71d8146afc670b13c5ddf8305c23e256 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 15 Jul 2025 19:16:11 +0100 Subject: [PATCH 04/54] dev --- cf/field.py | 2 +- cf/regrid/regrid.py | 327 ++++++++++++++++++++++---------------------- 2 files changed, 161 insertions(+), 168 deletions(-) diff --git a/cf/field.py b/cf/field.py index ff9598023b..3c224d1159 100644 --- a/cf/field.py +++ b/cf/field.py @@ -13335,7 +13335,7 @@ def regridc( ln_z=None, return_esmpy_regrid_operator=False, dst_grid_partitions=1, - verbose=None, + verbose=None, inplace=False, i=False, _compute_field_mass=None, diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 00c77e52fa..1c71535082 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -162,218 +162,205 @@ def regrid( ): """Regrid a field to a new spherical or Cartesian grid. - This is a worker function primarily intended to be called by - `cf.Field.regridc` and `cf.Field.regrids`. + This is a worker function primarily intended to be called by + `cf.Field.regridc` and `cf.Field.regrids`. - .. versionadded:: 3.14.0 - - .. seealso:: `cf.Field.regridc`, `cf.Field.regrids`, - `cf.data.dask_regrid.regrid`. - - :Parameters: - - coord_sys: `str` - The name of the coordinate system of the source and - destination grids. Either ``'spherical'`` or - ``'Cartesian'``. - - src: `Field` - The source field to be regridded. - - dst: `Field`, `Domain`, `RegridOperator` or sequence of `Coordinate` - The definition of the destination grid. - - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + .. versionadded:: 3.14.0 - method: `str` - Specify which interpolation method to use during - regridding. This parameter must be set unless *dst* is a - `RegridOperator`, when the *method* is ignored. + .. seealso:: `cf.Field.regridc`, `cf.Field.regrids`, + `cf.data.dask_regrid.regrid`. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + :Parameters: - src_cyclic: `None` or `bool`, optional - For spherical regridding, specifies whether or not the - source grid longitude axis is cyclic. + coord_sys: `str` + The name of the coordinate system of the source and + destination grids. Either ``'spherical'`` or + ``'Cartesian'``. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + src: `Field` + The source field to be regridded. - dst_cyclic: `None` or `bool`, optional - For spherical regridding, specifies whether or not the - destination grid longitude axis is cyclic. + dst: `Field`, `Domain`, `RegridOperator` or sequence of `Coordinate` + The definition of the destination grid. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - use_src_mask: `bool`, optional - Whether or not to use the source grid mask. + method: `str` + Specify which interpolation method to use during + regridding. This parameter must be set unless *dst* is a + `RegridOperator`, when the *method* is ignored. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - use_dst_mask: `bool`, optional - Whether or not to use the source grid mask. + src_cyclic: `None` or `bool`, optional + For spherical regridding, specifies whether or not the + source grid longitude axis is cyclic. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - src_axes: `dict` or sequence or `None`, optional - Define the source grid axes to be regridded. + dst_cyclic: `None` or `bool`, optional + For spherical regridding, specifies whether or not the + destination grid longitude axis is cyclic. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - dst_axes: `dict` or sequence or `None`, optional - Define the destination grid axes to be regridded. + use_src_mask: `bool`, optional + Whether or not to use the source grid mask. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. - Ignored for Cartesian regridding. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - axes: sequence, optional - Define the axes to be regridded for the source grid and, - if *dst* is a `Field` or `Domain`, the destination - grid. Ignored for spherical regridding. + use_dst_mask: `bool`, optional + Whether or not to use the source grid mask. - See `cf.Field.regridc` for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - ignore_degenerate: `bool`, optional - Whether or not to ignore degenerate cells. + src_axes: `dict` or sequence or `None`, optional + Define the source grid axes to be regridded. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. - Ignored for Cartesian regridding. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - return_operator: `bool`, optional - If True then do not perform the regridding, rather return - the `RegridOperator` instance that defines the regridding - operation, and which can be used in subsequent calls. + dst_axes: `dict` or sequence or `None`, optional + Define the destination grid axes to be regridded. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. + Ignored for Cartesian regridding. - check_coordinates: `bool`, optional - Whether or not to check the regrid operator source grid - coordinates. Ignored unless *dst* is a `RegridOperator`. + axes: sequence, optional + Define the axes to be regridded for the source grid and, + if *dst* is a `Field` or `Domain`, the destination + grid. Ignored for spherical regridding. - If True and then the source grid coordinates defined by - the regrid operator are checked for compatibility against - those of the *src* field. By default this check is not - carried out. See the *dst* parameter for details. + See `cf.Field.regridc` for details. - If False then only the computationally cheap tests are - performed (checking that the coordinate system, cyclicity - and grid shape are the same). + ignore_degenerate: `bool`, optional + Whether or not to ignore degenerate cells. - inplace: `bool`, optional - If True then modify *src* in-place and return `None`. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. + Ignored for Cartesian regridding. - return_esmpy_regrid_operator: `bool`, optional - If True then *src* is not regridded, but the - `esmpy.Regrid` instance for the operation is returned - instead. This is useful for checking that the field has - been regridded correctly. + return_operator: `bool`, optional + If True then do not perform the regridding, rather return + the `RegridOperator` instance that defines the regridding + operation, and which can be used in subsequent calls. - weights_file: `str` or `None`, optional - Provide a netCDF file that contains, or will contain, the - regridding weights. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + check_coordinates: `bool`, optional + Whether or not to check the regrid operator source grid + coordinates. Ignored unless *dst* is a `RegridOperator`. - .. versionadded:: 3.15.2 + If True and then the source grid coordinates defined by + the regrid operator are checked for compatibility against + those of the *src* field. By default this check is not + carried out. See the *dst* parameter for details. - src_z: optional - The identity of the source grid vertical coordinates used - to calculate the weights. If `None` then no vertical axis - is identified, and in the spherical case regridding will - be 2-d. + If False then only the computationally cheap tests are + performed (checking that the coordinate system, cyclicity + and grid shape are the same). - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + inplace: `bool`, optional + If True then modify *src* in-place and return `None`. - .. versionadded:: 3.16.2 + return_esmpy_regrid_operator: `bool`, optional + If True then *src* is not regridded, but the + `esmpy.Regrid` instance for the operation is returned + instead. This is useful for checking that the field has + been regridded correctly. - dst_z: optional - The identity of the destination grid vertical coordinates - used to calculate the weights. If `None` then no vertical - axis is identified, and in the spherical case regridding - will be 2-d. + weights_file: `str` or `None`, optional + Provide a netCDF file that contains, or will contain, the + regridding weights. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - .. versionadded:: 3.16.2 + .. versionadded:: 3.15.2 - z: optional - The *z* parameter is a convenience that may be used to - replace both *src_z* and *dst_z* when they would contain - identical values. + src_z: optional + The identity of the source grid vertical coordinates used + to calculate the weights. If `None` then no vertical axis + is identified, and in the spherical case regridding will + be 2-d. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - .. versionadded:: 3.16.2 + .. versionadded:: 3.16.2 - ln_z: `bool` or `None`, optional - Whether or not the weights are to be calculated with the - natural logarithm of vertical coordinates. + dst_z: optional + The identity of the destination grid vertical coordinates + used to calculate the weights. If `None` then no vertical + axis is identified, and in the spherical case regridding + will be 2-d. - See `cf.Field.regrids` (for spherical regridding) or - `cf.Field.regridc` (for Cartesian regridding) for details. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - .. versionadded:: 3.16.2 + .. versionadded:: 3.16.2 - dst_grid_partitions: `int`, optional - Reduce the memory requirement of the weights calculation - by calculating weights separately for non-overlapping - partitions of the destination grid, and then combining the - weights from each partition to create the final weights - matrix. The number of partitions is given by - *dst_grid_partitions*, whose default value is 1 (i.e. one - partition for the entire grid). + z: optional + The *z* parameter is a convenience that may be used to + replace both *src_z* and *dst_z* when they would contain + identical values. - The amount of memory reduction will vary on a case-by-case - basis, but will always be proportional to the number of - partitions, whilst the time taken to calculate the final - weights matrix is inversely proportional to the number of - partitions. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - The number of partitions makes no difference to the - final weights matrix, and hence no difference to the - regridded result. + .. versionadded:: 3.16.2 - Only the slowest moving regridding dimension is - partitioned (either the Y or Z dimension, for 2-d or - 3-d spherical regridding respectively). + ln_z: `bool` or `None`, optional + Whether or not the weights are to be calculated with the + natural logarithm of vertical coordinates. - Any postitve integer may be given, but the number of - partitions will always be less than or equal to the - size of the slowest moving regridding dimension, which - is either the Y or Z dimension, for 2-d or 3-d - spherical regridding respectively. + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. - Any postitve integer may be given, but the number of - partitions will always be less than or equal to the - size of the slowest moving regridding dimension, which - is the first of the axes given by *axes* or - *dst_axes*. + .. versionadded:: 3.16.2 - .. versionadded:: NEXTVERSION + dst_grid_partitions: `int` or `str`, optional + Calculating the weights matrix for grids with very large + numbers of grid points can potentially require more memory + than is available. However, this memory requirement can be + greatly reduced by calculating weights separately for + non-overlapping partitions of the destination grid, and + then combining the weights from each partition to create + the final weights matrix. The more partitions there are, + the smaller the memory requirement for the weights + calculations will be, at the expense of the weights + calculations taking longer. + + The *dst_grid_partitions* parameter sets the number of + destination grid partitions for the weights + calculations. The default value is ``1``, i.e. one + partition for the entire grid, that maximises memory usage + and minimises the weights calculation time. If the string + ``'maximum'`` is given then the largest possible number of + partitions of the destination grid will be used. A + positive integer specifies the exact number of partitions, + capped by the maximum allowed. + + .. versionadded:: NEXTVERSION - :Returns: + :Returns: - `Field` or `None` or `RegridOperator` or `esmpy.Regrid` - The regridded field construct; or `None` if the operation - was in-place; or the regridding operator if - *return_operator* is True. + `Field` or `None` or `RegridOperator` or `esmpy.Regrid` + The regridded field construct; or `None` if the operation + was in-place; or the regridding operator if + *return_operator* is True. - If *return_esmpy_regrid_operator* is True then *src* is - not regridded, but the `esmpy.Regrid` instance for the - operation is returned instead. + If *return_esmpy_regrid_operator* is True then *src* is + not regridded, but the `esmpy.Regrid` instance for the + operation is returned instead. """ debug = is_log_level_debug(logger) @@ -737,7 +724,7 @@ def regrid( # zero weights. if regrid_operator.weights_file is None: regrid_operator.tosparse() - + if debug: logger.debug( f"Sparse weights: {regrid_operator.weights!r}\n" @@ -2594,7 +2581,7 @@ def create_esmpy_weights( src_size = src_esmpy_field.data.size src_rank = src_esmpy_grid.rank - partitioned_dst_grid = dst_grid_partitions > 1 + partitioned_dst_grid = dst_grid_partitions != 1 if partitioned_dst_grid: from scipy.sparse import csr_array, vstack @@ -3389,8 +3376,7 @@ def partitions(grid, grid_partitions): specification is a tuple of `slice` objects. """ - grid_partitions = int(grid_partitions) - if grid.name == "source" or grid.dummy_size_2_dimension or grid_partitions <= 1 : + if grid.name == "source" or grid.dummy_size_2_dimension: return (None,) from math import ceil @@ -3399,6 +3385,13 @@ def partitions(grid, grid_partitions): from dask.array.core import normalize_chunks shape = grid.coords[-1].shape - size = ceil(shape[-1] / grid_partitions) - + + if grid_partitions == 'maximum': + size = 1 + else: + if grid_partitions <= 1: + return (None,) + + size = ceil(shape[-1] / grid_partitions) + return chunk_indices(normalize_chunks(shape[:-1] + (size,), shape=shape)) From d9bf32244668731211f03489667ff8b2c5f1ab91 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 15 Jul 2025 22:32:26 +0100 Subject: [PATCH 05/54] dev --- cf/docstring/docstring.py | 22 ++++++++++++++++++++ cf/field.py | 12 +++++++++++ cf/regrid/regrid.py | 44 +++++++++++++++------------------------ 3 files changed, 51 insertions(+), 27 deletions(-) diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 9006299d75..29e4f6e5a3 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -564,6 +564,28 @@ If True then do not perform the regridding, rather return the `esmpy.Regrid` instance that defines the regridding operation.""", + # dst_grid_partitions + "{{dst_grid_partitions: `int` or `str`, optional}}": """rdst_grid_partitions: `int` or `str`, optional + Calculating the weights matrix for grids with very large + numbers of grid points can potentially require more memory + than is available. However, this memory requirement can be + greatly reduced by calculating weights separately for + non-overlapping partitions of the destination grid, and + then combining the weights from each partition to create + the final weights matrix. The more partitions there are, + the smaller the memory requirement for the weights + calculations will be, at the expense of the weights + calculations taking longer. + + The *dst_grid_partitions* parameter sets the number of + destination grid partitions for the weights + calculations. The default value is ``1``, i.e. one + partition for the entire grid, that maximises memory usage + and minimises the weights calculation time. If the string + ``'maximum'`` is given then the largest possible number of + partitions of the destination grid will be used. A + positive integer specifies the exact number of partitions, + capped by the maximum allowed.""", # ---------------------------------------------------------------- # Method description substitutions (4 levels of indentation) # ---------------------------------------------------------------- diff --git a/cf/field.py b/cf/field.py index 3c224d1159..78b8da4565 100644 --- a/cf/field.py +++ b/cf/field.py @@ -13215,6 +13215,10 @@ def regrids( .. versionadded:: 3.16.2 + {{dst_grid_partitions: `int` or `str`, optional}} + + .. versionadded:: NEXTVERSION + axis_order: sequence, optional Deprecated at version 3.14.0. @@ -13515,6 +13519,14 @@ def regridc( .. versionadded:: 3.16.2 + {{dst_grid_partitions: `int` or `str`, optional}} + + .. versionadded:: NEXTVERSION + + {{verbose: `int` or `str` or `None`, optional}} + + .. versionadded:: NEXTVERSION + axis_order: sequence, optional Deprecated at version 3.14.0. diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 1c71535082..a107441658 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -328,26 +328,15 @@ def regrid( .. versionadded:: 3.16.2 dst_grid_partitions: `int` or `str`, optional - Calculating the weights matrix for grids with very large - numbers of grid points can potentially require more memory - than is available. However, this memory requirement can be - greatly reduced by calculating weights separately for - non-overlapping partitions of the destination grid, and - then combining the weights from each partition to create - the final weights matrix. The more partitions there are, - the smaller the memory requirement for the weights - calculations will be, at the expense of the weights - calculations taking longer. - - The *dst_grid_partitions* parameter sets the number of - destination grid partitions for the weights - calculations. The default value is ``1``, i.e. one - partition for the entire grid, that maximises memory usage - and minimises the weights calculation time. If the string - ``'maximum'`` is given then the largest possible number of - partitions of the destination grid will be used. A - positive integer specifies the exact number of partitions, - capped by the maximum allowed. + The number of destination grid partitions for the weights + calculations. If the string ``'maximum'`` is given then + the largest possible number of partitions of the + destination grid will be used. A positive integer + specifies the exact number of partitions, capped by the + maximum allowed. + + See `cf.Field.regrids` (for spherical regridding) or + `cf.Field.regridc` (for Cartesian regridding) for details. .. versionadded:: NEXTVERSION @@ -3363,10 +3352,11 @@ def partitions(grid, grid_partitions): grid: `Grid` The definition of the source or destination grid. - grid_partitions: `int` + grid_partitions: `int` or `str` The number of partitions to split the grid into. Only the last (i.e. slowest moving) dimension in `esmpy` order is - partitioned. + partitioned. If ``'maximum'`` then the maximum possible + number of partitions is defined. :Returns: @@ -3376,7 +3366,7 @@ def partitions(grid, grid_partitions): specification is a tuple of `slice` objects. """ - if grid.name == "source" or grid.dummy_size_2_dimension: + if grid.name == "source" or grid.dummy_size_2_dimension: return (None,) from math import ceil @@ -3385,13 +3375,13 @@ def partitions(grid, grid_partitions): from dask.array.core import normalize_chunks shape = grid.coords[-1].shape - - if grid_partitions == 'maximum': + + if grid_partitions == "maximum": size = 1 else: if grid_partitions <= 1: return (None,) - + size = ceil(shape[-1] / grid_partitions) - + return chunk_indices(normalize_chunks(shape[:-1] + (size,), shape=shape)) From 03aad422c3457aa8709fdbda0008723991b34f3f Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 16 Jul 2025 15:31:11 +0100 Subject: [PATCH 06/54] dev --- cf/docstring/docstring.py | 6 +- cf/regrid/regrid.py | 278 +++++++++++++++---------- cf/regrid/regridoperator.py | 21 +- cf/test/test_RegridOperator.py | 8 + cf/test/test_regrid.py | 19 +- cf/test/test_regrid_featureType.py | 17 +- cf/test/test_regrid_mesh.py | 15 +- cf/test/test_regrid_partitions.py | 316 +++++++++++++++++++++++++++++ 8 files changed, 531 insertions(+), 149 deletions(-) create mode 100644 cf/test/test_regrid_partitions.py diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 29e4f6e5a3..d1996696bc 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -74,7 +74,11 @@ weights with the source data. (Note that whilst the `esmpy` package is also able to create the regridded data from its weights, this feature can't be integrated with the `dask` - framework that underpins the field's data.)""", + framework that underpins the field's data.) + + The calculation of weights for large grids can have a very + high memory requirement, but this can be reduced by setting + the *dst_grid_partitions* parameter.""", # regrid Logging "{{regrid Logging}}": """**Logging** diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index a107441658..2c379bb629 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -423,6 +423,20 @@ def regrid( f"using the {method!r} regridding method." ) + if dst_grid_partitions != 1: + if method in ("conservative_2nd", "nearest_dtos"): + raise ValueError( + f"Can't use destination grid partitioning for {method!r} " + "regridding. Got dst_grid_partitions={dst_grid_partitions!r}" + ) + + if return_esmpy_regrid_operator: + raise ValueError( + "Can't use destination grid partitioning when " + "return_esmpy_regrid_operator=True. " + f"Got dst_grid_partitions={dst_grid_partitions!r}" + ) + if cartesian: if isinstance(axes, str): axes = (axes,) @@ -633,6 +647,10 @@ def regrid( esmpy_regrid_operator = [] if return_esmpy_regrid_operator else None + partitioned_dst_grid = ( + partitions(dst_grid, dst_grid_partitions, return_n=True) > 1 + ) + # Create regrid weights weights, row, col, start_index, from_file = create_esmpy_weights( method, @@ -644,7 +662,7 @@ def regrid( quarter=src_grid.dummy_size_2_dimension, esmpy_regrid_operator=esmpy_regrid_operator, weights_file=weights_file, - dst_grid_partitions=dst_grid_partitions, + partitioned_dst_grid=partitioned_dst_grid, ) if return_esmpy_regrid_operator: @@ -1940,12 +1958,20 @@ def create_esmpy_grid(grid, mask=None, grid_partitions=1): :Returns: - `esmpy.Grid` - The `esmpy.Grid` derived from *grid*. + generator + An iterator through the `esmpy.Grid` instances for each + grid partition. """ debug = is_log_level_debug(logger) + if mask is not None: + if mask.dtype != bool: + raise ValueError( + "'mask' must be None or a Boolean array. Got: " + f"dtype={mask.dtype}" + ) + # Create an `esmpy.Grid` cyclic = grid.cyclic @@ -2173,30 +2199,25 @@ def create_esmpy_grid(grid, mask=None, grid_partitions=1): grid_corner[...] = b # Add an esmpy.Grid mask - if mask is not None: - if mask.dtype != bool: - raise ValueError( - "'mask' must be None or a Boolean array. Got: " - f"dtype={mask.dtype}" - ) - + mask1 = mask + if mask1 is not None: if partition is not None: - mask = mask[..., *partition] + mask1 = mask1[..., *partition] - if not mask.any(): - mask = None + if not mask1.any(): + mask1 = None - if mask is not None: + if mask1 is not None: grid_mask = esmpy_grid.add_item(esmpy.GridItem.MASK) - if len(grid.coords) == 2 and mask.ndim == 1: + if len(grid.coords) == 2 and mask1.ndim == 1: # esmpy grid has a dummy size 1 dimension, so we # need to include this in the mask as well. - mask = np.expand_dims(mask, 1) + mask1 = np.expand_dims(mask1, 1) - # Note: 'mask' has True/False for masked/unmasked + # Note: 'mask1' has True/False for masked/unmasked # elements, but the esmpy mask requires 0/1 for # masked/unmasked elements. - grid_mask[...] = np.invert(mask).astype("int32") + grid_mask[...] = np.invert(mask1).astype("int32") yield esmpy_grid @@ -2220,8 +2241,9 @@ def create_esmpy_mesh(grid, mask=None, grid_partitions=1): :Returns: - `esmpy.Mesh` - The `esmpy.Mesh` derived from *grid*. + generator + An iterator through the `esmpy.Mesh` instances for each + grid partition. """ debug = is_log_level_debug(logger) @@ -2232,6 +2254,13 @@ def create_esmpy_mesh(grid, mask=None, grid_partitions=1): f"a {grid.name} grid of UGRID {grid.mesh_location!r} cells" ) + if mask is not None: + if mask.dtype != bool: + raise ValueError( + "'mask' must be None or a Boolean array. " + f"Got: dtype={mask.dtype}" + ) + if grid.coord_sys == "spherical": coord_sys = esmpy.CoordSys.SPH_DEG else: @@ -2259,11 +2288,11 @@ def create_esmpy_mesh(grid, mask=None, grid_partitions=1): element_conn = np.ma.compressed(element_conn) # Element coordinates - if grid.coords: - grid_coords = grid.coords + grid_coords = grid.coords + if grid_coords: if partition is not None: - # All coordinates spans the same axis - grid_coords = [c[partition] for c in grid.coords] + # All coordinates span the same discrete axis + grid_coords = [c[partition] for c in grid_coords] try: element_coords = [c.array for c in grid_coords] @@ -2277,8 +2306,8 @@ def create_esmpy_mesh(grid, mask=None, grid_partitions=1): grid_bounds = grid.bounds if partition is not None: - # All coordinates spans the same axis - grid_bounds = [b[partition] for b in grid.bounds] + # All coordinates span the same discrete axis + grid_bounds = [b[partition] for b in grid_bounds] node_ids, index = np.unique(element_conn, return_index=True) node_coords = [b.data.compressed().array[index] for b in grid_bounds] @@ -2301,24 +2330,20 @@ def create_esmpy_mesh(grid, mask=None, grid_partitions=1): ) # Mask - if mask is not None: - if mask.dtype != bool: - raise ValueError( - "'mask' must be None or a Boolean array. " - f"Got: dtype={mask.dtype}" - ) - + mask1 = mask + if mask1 is not None: if partition is not None: - # The mask spans the same axis as the coordinates - mask = mask[partition] - - # Note: 'mask' has True/False for masked/unmasked elements, - # but the esmpy mask requires 0/1 for masked/unmasked - # elements. - mask = np.invert(mask).astype("int32") - if mask.all(): + # The mask spans the same discrete axis as the + # coordinates + mask1 = mask1[partition] + + # Note: 'mask1' has True/False for masked/unmasked + # elements, but the esmpy mask requires 0/1 for + # masked/unmasked elements. + mask1 = np.invert(mask1).astype("int32") + if mask1.all(): # There are no masked elements - mask = None + mask1 = None # Add elements. This must be done after `add_nodes`. # @@ -2330,7 +2355,7 @@ def create_esmpy_mesh(grid, mask=None, grid_partitions=1): element_ids=np.arange(1, element_count + 1), element_types=element_types, element_conn=element_conn, - element_mask=mask, + element_mask=mask1, element_area=None, element_coords=element_coords, ) @@ -2357,12 +2382,20 @@ def create_esmpy_locstream(grid, mask=None, grid_partitions=1): :Returns: - `esmpy.LocStream` - The `esmpy.LocStream` derived from *grid*. + generator + An iterator through the `esmpy.LocStream` instances for + each grid partition. """ debug = is_log_level_debug(logger) + if mask is not None: + if mask.dtype != bool: + raise ValueError( + "'mask' must be None or a Boolean array. " + f"Got: dtype={mask.dtype}" + ) + if grid.coord_sys == "spherical": coord_sys = esmpy.CoordSys.SPH_DEG keys = ("ESMF:Lon", "ESMF:Lat", "ESMF:Radius") @@ -2379,7 +2412,7 @@ def create_esmpy_locstream(grid, mask=None, grid_partitions=1): f"Partition {i} index: {partition}" ) # pragma: no cover - # All coordinates spans the same axis + # All coordinates span the same discrete axis coords = [c[partition] for c in grid.coords] # Create an empty esmpy.LocStream @@ -2395,31 +2428,27 @@ def create_esmpy_locstream(grid, mask=None, grid_partitions=1): esmpy_locstream[key] = coord.array.astype(float) # Add mask (always required, and must be of type int32) - if mask is not None: - if mask.dtype != bool: - raise ValueError( - "'mask' must be None or a Boolean array. " - f"Got: dtype={mask.dtype}" - ) - + mask1 = mask + if mask1 is not None: if partition is not None: - # The mask spans the same axis as the coordinates - mask = mask[partition] - - # Note: 'mask' has True/False for masked/unmasked elements, - # but the esmpy mask requires 0/1 for masked/unmasked - # elements. - mask = np.invert(mask).astype("int32") - if mask.size == 1: + # The mask spans the same discrete axis as the + # coordinates + mask1 = mask1[partition] + + # Note: 'mask1' has True/False for masked/unmasked + # elements, but the esmpy mask requires 0/1 for + # masked/unmasked elements. + mask1 = np.invert(mask1).astype("int32") + if mask1.size == 1: # Make sure that there's a mask element for each point in # the locstream (rather than a scalar that applies to all # elements). - mask = np.full((location_count,), mask, dtype="int32") + mask1 = np.full((location_count,), mask1, dtype="int32") else: # No masked points - mask = np.full((location_count,), 1, dtype="int32") + mask1 = np.full((location_count,), 1, dtype="int32") - esmpy_locstream["ESMF:Mask"] = mask + esmpy_locstream["ESMF:Mask"] = mask1 yield esmpy_locstream @@ -2434,7 +2463,7 @@ def create_esmpy_weights( quarter=False, esmpy_regrid_operator=None, weights_file=None, - dst_grid_partitions=1, + partitioned_dst_grid=False, ): """Create the `esmpy` regridding weights. @@ -2489,6 +2518,11 @@ def create_esmpy_weights( .. versionadded:: 3.15.2 + partitioned_dst_grid: `bool`, optional + Whether or not the destination grid has been partitioned. + + .. versionadded:: NEXTVERSION + :Returns: 5-`tuple` @@ -2513,6 +2547,8 @@ def create_esmpy_weights( otherwise `False`. """ + from cfdm import integer_dtype + debug = is_log_level_debug(logger) start_index = 1 @@ -2570,11 +2606,13 @@ def create_esmpy_weights( src_size = src_esmpy_field.data.size src_rank = src_esmpy_grid.rank - partitioned_dst_grid = dst_grid_partitions != 1 if partitioned_dst_grid: from scipy.sparse import csr_array, vstack - w = [] + # Initialise the list of the sparse weights arrays for + # each destination grid partition + w = [] + for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): if debug: logger.debug( @@ -2612,12 +2650,13 @@ def create_esmpy_weights( ESMF_unmapped_action = r.unmapped_action ESMF_ignore_degenerate = int(r.ignore_degenerate) - # Destroy esmpy objects that are no longer needed - dst_esmpy_grid.destroy() - dst_esmpy_field.destroy() - r.dstfield.grid.destroy() - r.dstfield.destroy() - r.destroy() + if esmpy_regrid_operator is None: + # Destroy esmpy objects that are no longer needed + dst_esmpy_grid.destroy() + dst_esmpy_field.destroy() + r.dstfield.grid.destroy() + r.dstfield.destroy() + r.destroy() if quarter: # The weights were created with a dummy size 2 @@ -2638,8 +2677,12 @@ def create_esmpy_weights( col = col[index] del index + # Cast as 32-bit integers, if possible. + row = row.astype(integer_dtype(dst_size), copy=False) + col = col.astype(integer_dtype(src_size), copy=False) + if partitioned_dst_grid: - # The destination grid has been partitioned. Create + # The destination grid has been partitioned, so create # the sparse weights array for this partition. row -= 1 col -= 1 @@ -2648,27 +2691,37 @@ def create_esmpy_weights( (weights, (row, col)), shape=[dst_size, src_size] ) ) + row = None + col = None - # Destroy esmpy objects that are no longer needed - src_esmpy_field.destroy() - r.srcfield.grid.destroy() - r.srcfield.destroy() + if esmpy_regrid_operator is None: + # Destroy esmpy objects that are no longer needed + src_esmpy_grid.destroy() + src_esmpy_field.destroy() + r.srcfield.grid.destroy() + r.srcfield.destroy() if partitioned_dst_grid: - # The destination grid has been partitioned. Concatenate - # the sparse weights arrays for all destination grid - # partitions. + # The destination grid has been partitioned, so + # concatenate the sparse weights arrays for all + # destination grid partitions. weights = vstack(w) + + # Cast indptr and indices as 32-bit integers, if possible. + weights.indptr = weights.indptr.astype( + integer_dtype(dst_size), copy=False + ) + weights.indices = weights.indices.astype( + integer_dtype(src_size), copy=False + ) + dst_size = weights.shape[0] - row = None - col = None del w if weights_file is not None: # Write the weights to a netCDF file (copying the # dimension and variable names and structure of a weights # file created by ESMF). - from cfdm import integer_dtype from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset @@ -2682,15 +2735,13 @@ def create_esmpy_weights( weights_data = weights.data row, col = weights.tocoo().coords if start_index: - # 'row' and 'col' are currently zero-based + # 'row' and 'col' come out of `tocoo().coords` as + # zero-based values row += start_index col += start_index else: weights_data = weights - row = row.astype(integer_dtype(dst_size), copy=False) - col = col.astype(integer_dtype(src_size), copy=False) - regrid_method = f"{src_grid.coord_sys} {src_grid.method}" if src_grid.ln_z: regrid_method += f", ln {src_grid.method} in vertical" @@ -2749,12 +2800,7 @@ def create_esmpy_weights( row = None col = None - if esmpy_regrid_operator is None: - # Destroy esmpy objects that are no longer needed - src_esmpy_grid.destroy() - for dst_esmpy_grid in dst_esmpy_grids: - dst_esmpy_grid.destroy() - else: + if esmpy_regrid_operator is not None: # Make the Regrid instance available via the # 'esmpy_regrid_operator' list esmpy_regrid_operator.append(r) @@ -3330,7 +3376,7 @@ def set_grid_type(grid): grid.type = f"DSG {grid.featureType}" -def partitions(grid, grid_partitions): +def partitions(grid, grid_partitions, return_n=False): """Partitions of the grid. Each partition is defined as an index to cell coordinates, which @@ -3339,13 +3385,10 @@ def partitions(grid, grid_partitions): Only a destinaton grid without a dummy size 2 dimension can be partitioned. - When there is a single partition that spans the entire grid, then - the special value of `None` is used. - .. versionadded:: NEXTVERSION .. seealso:: `create_esmpy_grid`, `create_esmpy_mesh`, - `create_esmpy_locstream` + `create_esmpy_locstream`, `create_esmpy_weights` :Parameters: @@ -3356,17 +3399,30 @@ def partitions(grid, grid_partitions): The number of partitions to split the grid into. Only the last (i.e. slowest moving) dimension in `esmpy` order is partitioned. If ``'maximum'`` then the maximum possible - number of partitions is defined. + number of partitions are defined. + + return_n: `bool` + If True then return the number of partitions. :Returns: - generator or `tuple` - The partition specifications, or a `tuple` containing - `None` if there is only one partition. Each partition - specification is a tuple of `slice` objects. + generator or `tuple` or `int` + The partition specifications. Each partition specification + is a tuple of `slice` objects. When there is a single + partition that spans the entire grid, then the special + value of ``(None,)`` is returned. If *return_n* is True + then the number of partitions will be returned. """ - if grid.name == "source" or grid.dummy_size_2_dimension: + if ( + grid_partitions == 1 + or grid.name == "source" + or grid.dummy_size_2_dimension + ): + # One partition + if return_n: + return 1 + return (None,) from math import ceil @@ -3377,11 +3433,23 @@ def partitions(grid, grid_partitions): shape = grid.coords[-1].shape if grid_partitions == "maximum": + # Maximum possible number of partitions + if return_n: + return shape[-1] + size = 1 else: if grid_partitions <= 1: + # One partition + if return_n: + return 1 + return (None,) + # Somewhere in the range [1, maximum] number of partitions size = ceil(shape[-1] / grid_partitions) + if return_n: + return len(normalize_chunks((size,), shape=shape[-1:])[0]) + return chunk_indices(normalize_chunks(shape[:-1] + (size,), shape=shape)) diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index 5dbb839b02..ad467d4a79 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -541,6 +541,10 @@ def copy(self): The deep copy. """ + weights = self.weights + if weights is not None: + weights = weights.copy() + row = self.row if row is not None: row = row.copy() @@ -550,7 +554,7 @@ def copy(self): col = col.copy() return type(self)( - self.weights.copy(), + weights, row, col, method=self.method, @@ -635,10 +639,13 @@ def dump(self, display=True): return "\n".join(string) def equal_weights(self, other, rtol=None, atol=None): - """TODOREGRID + """Whether two regrid operators have identical weights. :Parameters: + other: `RegridOperator` + The other regrid operator to compare. + {{rtol: number, optional}} {{atol: number, optional}} @@ -646,6 +653,8 @@ def equal_weights(self, other, rtol=None, atol=None): :Returns: `bool` + True if the regrid operators have identical weights, + otherwise False. """ if atol is None: @@ -656,12 +665,14 @@ def equal_weights(self, other, rtol=None, atol=None): self.tosparse() other.tosparse() + w0 = self.weights w1 = other.weights - return ( - (w0.indices == w1.indices).all() - and (w0.indptr == w1.indptr).all() + w0.shape == w1.shape + and w0.data.shape == w1.data.shape + and np.array_equal(w0.indices, w1.indices) + and np.array_equal(w0.indptr, w1.indptr) and np.allclose(w0.data, w1.data, rtol=rtol, atol=atol) ) diff --git a/cf/test/test_RegridOperator.py b/cf/test/test_RegridOperator.py index dae6be8ce8..c3bb9aabc2 100644 --- a/cf/test/test_RegridOperator.py +++ b/cf/test/test_RegridOperator.py @@ -42,6 +42,14 @@ def test_RegridOperator_attributes(self): def test_RegridOperator_copy(self): self.assertIsInstance(self.r.copy(), self.r.__class__) + def test_RegridOperator_equal_weights(self): + r = self.r + r.tosparse() + r1 = r.copy() + self.assertTrue(r.equal_weights(r1)) + r1.weights.data += 0.1 + self.assertFalse(r.equal_weights(r1)) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/test/test_regrid.py b/cf/test/test_regrid.py index 2001b4cce6..f98dbd9631 100644 --- a/cf/test/test_regrid.py +++ b/cf/test/test_regrid.py @@ -31,21 +31,12 @@ def _remove_tmpfiles(): atexit.register(_remove_tmpfiles) -# ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow -# either for now for backwards compatibility. -esmpy_imported = False +esmpy_imported = True try: - import esmpy - - esmpy_imported = True + import esmpy # noqa: F401 except ImportError: - try: - # Take the new name to use in preference to the old one. - import ESMF as esmpy + esmpy_imported = False - esmpy_imported = True - except ImportError: - pass all_methods = ( "linear", @@ -780,12 +771,14 @@ def test_Field_regrid_weights_file(self): dst, method="linear", return_operator=True, weights_file=tmpfile ) self.assertTrue(os.path.isfile(tmpfile)) - self.assertIsNone(r.weights_file) + self.assertEqual(r.weights_file, tmpfile) + self.assertIsNotNone(r.weights) r = src.regrids( dst, method="linear", return_operator=True, weights_file=tmpfile ) self.assertEqual(r.weights_file, tmpfile) + self.assertIsNone(r.weights) # Can't provide weights_file when dst is a RegridOperator with self.assertRaises(ValueError): diff --git a/cf/test/test_regrid_featureType.py b/cf/test/test_regrid_featureType.py index 198b44dd97..a7d36dec56 100644 --- a/cf/test/test_regrid_featureType.py +++ b/cf/test/test_regrid_featureType.py @@ -9,21 +9,12 @@ import cf -# ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow -# either for now for backwards compatibility. -esmpy_imported = False +esmpy_imported = True try: - import esmpy - - esmpy_imported = True + import esmpy # noqa: F401 except ImportError: - try: - # Take the new name to use in preference to the old one. - import ESMF as esmpy + esmpy_imported = False - esmpy_imported = True - except ImportError: - pass methods = ( "linear", @@ -168,7 +159,7 @@ def test_Field_regrid_featureType_to_grid_2d(self): self.assertFalse(cf.regrid_logging()) # Create some nice data - src = self.dst_featureType + src = self.dst_featureType.copy() src.del_construct("cellmethod0") src = src[:12] src[...] = 273 + np.arange(12) diff --git a/cf/test/test_regrid_mesh.py b/cf/test/test_regrid_mesh.py index 3095640135..30418521e8 100644 --- a/cf/test/test_regrid_mesh.py +++ b/cf/test/test_regrid_mesh.py @@ -9,21 +9,12 @@ import cf -# ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow -# either for now for backwards compatibility. -esmpy_imported = False +esmpy_imported = True try: - import esmpy - - esmpy_imported = True + import esmpy # noqa: F401 except ImportError: - try: - # Take the new name to use in preference to the old one. - import ESMF as esmpy + esmpy_imported = False - esmpy_imported = True - except ImportError: - pass all_methods = ( "linear", diff --git a/cf/test/test_regrid_partitions.py b/cf/test/test_regrid_partitions.py new file mode 100644 index 0000000000..6d64011be3 --- /dev/null +++ b/cf/test/test_regrid_partitions.py @@ -0,0 +1,316 @@ +import datetime +import faulthandler +import os +import unittest + +faulthandler.enable() # to debug seg faults and timeouts + +import numpy as np + +import cf + +esmpy_imported = True +try: + import esmpy # noqa: F401 +except ImportError: + esmpy_imported = False + + +valid_methods = ( + "linear", + "conservative", + "nearest_stod", + "patch", +) +invalid_methods = ("conservative_2nd", "nearest_dtos") + + +class RegridPartitionsTest(unittest.TestCase): + # Get the test source and destination fields + filename = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "regrid.nc" + ) + src_mesh_file = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ugrid_global_1.nc" + ) + dst_mesh_file = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "ugrid_global_2.nc" + ) + filename_xyz = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "regrid_xyz.nc" + ) + dst_featureType_file = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "dsg_trajectory.nc" + ) + + dst_src_grid = cf.read(filename) + dst_grid = dst_src_grid[0] + src_grid = dst_src_grid[1] + src_mesh = cf.read(src_mesh_file)[0] + dst_mesh = cf.read(dst_mesh_file)[0] + dst_featureType = cf.read(dst_featureType_file)[0] + src_grid_xyz = cf.read(filename_xyz)[0] + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrid_partitions_2d_grid_to_grid(self): + self.assertFalse(cf.regrid_logging()) + + src = cf.example_field(0) + + dst = src.copy() + with cf.bounds_combination_mode("XOR"): + x = dst.dimension_coordinate("X") + x += 1 + + # Mask some destination grid points + dst[2:3, 4] = cf.masked + + # Loop round spherical and Cartesian coordinate systems + for coord_sys, regrid_func, kwargs in zip( + ("spherical", "Cartesian"), + ("regrids", "regridc"), + ({}, {"axes": ["Y", "X"]}), + ): + kwargs["return_operator"] = True + # Loop over whether or not to use the destination grid + # masked points + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + kwargs["method"] = method + r0 = getattr(src, regrid_func)(dst, **kwargs) + for n in (2, 3, "maximum"): + r1 = getattr(src, regrid_func)( + dst, dst_grid_partitions=n, **kwargs + ) + self.assertTrue(r0.equal_weights(r1)) + + # ------------------------------------------------------------ + # Destination grid defined by 2-d lats and lons + # ------------------------------------------------------------ + x = dst.coord("X") + y = dst.coord("Y") + x_bounds = x.bounds.array + y_bounds = y.bounds.array + + lat = np.empty((y.size, x.size)) + lat[...] = y.array.reshape(y.size, 1) + lon = np.empty((y.size, x.size)) + lon[...] = x.array + + lon_bounds = np.empty(lon.shape + (4,)) + lon_bounds[..., [0, 3]] = x_bounds[:, 0].reshape(1, x.size, 1) + lon_bounds[..., [1, 2]] = x_bounds[:, 1].reshape(1, x.size, 1) + + lat_bounds = np.empty(lat.shape + (4,)) + lat_bounds[..., [0, 1]] = y_bounds[:, 0].reshape(y.size, 1, 1) + lat_bounds[..., [2, 3]] = y_bounds[:, 1].reshape(y.size, 1, 1) + + lon_2d_coord = cf.AuxiliaryCoordinate( + data=cf.Data(lon, units=x.Units), bounds=cf.Bounds(data=lon_bounds) + ) + lat_2d_coord = cf.AuxiliaryCoordinate( + data=cf.Data(lat, units=y.Units), bounds=cf.Bounds(data=lat_bounds) + ) + dst = [lon_2d_coord, lat_2d_coord] + + kwargs = { + "return_operator": True, + "dst_axes": {"X": 1, "Y": 0}, + "dst_cyclic": True, + } + for method in valid_methods: + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2, 3, "maximum"): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrid_partitions_cannot_partition(self): + src = self.src_grid + dst = self.dst_grid + for n in (2, "maximum"): + for method in invalid_methods: + with self.assertRaises(ValueError): + src.regrids(dst, method=method, dst_grid_partitions=n) + + with self.assertRaises(ValueError): + src.regrids( + dst, + method="linear", + dst_grid_partitions=n, + return_esmpy_regrid_operator=True, + ) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regridc_partitions_1d_3d_grid_to_grid(self): + src = self.src_grid.copy() + dst = self.dst_grid.copy() + + src.transpose(["X", "Y", "T"], inplace=True) + dst.transpose(["Y", "T", "X"], inplace=True) + + # Mask some destination grid points + dst[2:25, 0, 2:35] = cf.masked + + for axes in (["T", "Y", "X"], ["Y"]): + kwargs = {"axes": axes, "return_operator": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + if method in ("patch",) and len(axes) == 1: + # 'patch' regridding is not available for 1-d + # regridding + continue + + kwargs["method"] = method + r0 = src.regridc(dst, **kwargs) + for n in (2,): + r1 = src.regridc(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_mesh_to_mesh(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_mesh.copy() + src = self.src_mesh.copy() + + # Mask some destination grid points + dst[0, 2:35] = cf.masked + + kwargs = {"return_operator": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_mesh_to_grid(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_grid.copy() + src = self.src_mesh.copy() + + # Mask some destination grid points + dst[0, 30, 2:35] = cf.masked + + kwargs = {"return_operator": True} + for src_masked in (False, True): + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_grid_to_mesh(self): + self.assertFalse(cf.regrid_logging()) + + src = self.src_grid.copy() + dst = self.src_mesh.copy() + + # Mask some destination grid points + dst[100:300] = cf.masked + + kwargs = {"return_operator": True} + for src_masked in (False, True): + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_grid_to_featureType_3d(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_featureType.copy() + src = self.src_grid_xyz.copy() + + # Mask some destination grid points + dst[20:25] = cf.masked + + kwargs = {"return_operator": True, "z": "air_pressure", "ln_z": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + if method == "conservative": + # Can't do conservative regridding to a + # destination DSG featureType + continue + + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_grid_to_featureType_2d(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_featureType.copy() + src = self.src_grid_xyz.copy() + src = src[0, 0, :, :] + + # Mask some destination grid points + dst[20:25] = cf.masked + + kwargs = {"return_operator": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + if method == "conservative": + # Can't do conservative regridding to a + # destination DSG featureType + continue + + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_mesh_to_featureType_2d(self): + self.assertFalse(cf.regrid_logging()) + + dst = self.dst_featureType.copy() + src = self.src_mesh.copy() + + # Mask some destination grid points + dst[20:25] = cf.masked + + kwargs = {"return_operator": True} + for use_dst_mask in (False, True): + kwargs["use_dst_mask"] = use_dst_mask + for method in valid_methods: + if method == "conservative": + # Can't do conservative regridding to a + # destination DSG featureType + continue + + kwargs["method"] = method + r0 = src.regrids(dst, **kwargs) + for n in (2,): + r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) + self.assertTrue(r0.equal_weights(r1)) + + +if __name__ == "__main__": + print("Run date:", datetime.datetime.now()) + cf.environment() + print("") + unittest.main(verbosity=2) From 63eba70a6dd80f0129640de0f4219946b100c2e3 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 16 Jul 2025 17:07:25 +0100 Subject: [PATCH 07/54] dev --- cf/test/test_quantization.py | 1 - cf/test/test_regrid.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/cf/test/test_quantization.py b/cf/test/test_quantization.py index 03960da987..19f7fdc20a 100644 --- a/cf/test/test_quantization.py +++ b/cf/test/test_quantization.py @@ -63,7 +63,6 @@ def test_quantization_read_write(self): f.set_quantize_on_write(q0) # Write the field and read it back in - tmpfile1 = "delme1.nc" cf.write(f, tmpfile1) g = cf.read(tmpfile1)[0] diff --git a/cf/test/test_regrid.py b/cf/test/test_regrid.py index f98dbd9631..e5d28ab918 100644 --- a/cf/test/test_regrid.py +++ b/cf/test/test_regrid.py @@ -307,6 +307,8 @@ def test_Field_regrid_2d_field(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_coords(self): """Spherical regridding with coords destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst.copy() src = self.src.copy() @@ -375,6 +377,8 @@ def test_Field_regrids_coords(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_2d_coords(self): """2-d Cartesian regridding with coords destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst.copy() src = self.src.copy() @@ -403,6 +407,8 @@ def test_Field_regridc_2d_coords(self): def test_Field_regrids_bad_dst(self): """Disallowed destination grid types raise an exception.""" + self.assertFalse(cf.regrid_logging()) + with self.assertRaises(TypeError): self.src.regrids(999, method="conservative") @@ -415,6 +421,8 @@ def test_Field_regrids_bad_dst(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_domain(self): """Spherical regridding with Domain destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src @@ -437,6 +445,8 @@ def test_Field_regrids_domain(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_domain(self): """Spherical regridding with Domain destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src @@ -461,6 +471,8 @@ def test_Field_regridc_domain(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_field_operator(self): """Spherical regridding with operator destination grid.""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src @@ -497,6 +509,8 @@ def test_Field_regrids_field_operator(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_non_coordinates(self): """Check setting of non-coordinate metadata.""" + self.assertFalse(cf.regrid_logging()) + dst = cf.example_field(1) src = self.src @@ -544,6 +558,8 @@ def test_Field_regrids_non_coordinates(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_3d_field(self): """3-d Cartesian regridding with Field destination grid.""" + self.assertFalse(cf.regrid_logging()) + methods = list(all_methods) methods.remove("conservative_2nd") @@ -637,6 +653,8 @@ def test_Field_regridc_3d_field(self): self.assertTrue(np.allclose(y, a, atol=atol, rtol=rtol)) # These methods aren't meant to work for 3-d regridding + # + # Note: This test leaves behind a PET0.ESMF_LogFile for method in ("conservative_2nd",): with self.assertRaises(ValueError): src.regridc(dst, method=method, axes=axes) @@ -644,6 +662,8 @@ def test_Field_regridc_3d_field(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_1d_field(self): """1-d Cartesian regridding with Field destination grid.""" + self.assertFalse(cf.regrid_logging()) + methods = list(all_methods) methods.remove("conservative_2nd") methods.remove("patch") @@ -733,6 +753,8 @@ def test_Field_regridc_1d_field(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regridc_1d_coordinates_z(self): """1-d Z Cartesian regridding with coordinates destination grid.""" + self.assertFalse(cf.regrid_logging()) + src = cf.read(self.filename_xyz)[0] dst = cf.DimensionCoordinate( data=cf.Data([800, 705, 632, 510, 320.0], "hPa") @@ -744,6 +766,8 @@ def test_Field_regridc_1d_coordinates_z(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrid_chunks(self): """Regridding of chunked axes""" + self.assertFalse(cf.regrid_logging()) + filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "regrid.nc" ) @@ -759,6 +783,8 @@ def test_Field_regrid_chunks(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrid_weights_file(self): """Regridding creation/use of weights file""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src @@ -789,6 +815,8 @@ def test_Field_regrid_weights_file(self): @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_return_esmpy_regrid_operator(self): """esmpy regrid operator returns esmpy.Regrid in regrids and regridc""" + self.assertFalse(cf.regrid_logging()) + dst = self.dst src = self.src From 412cd52a1a41612496e93a9debe2b3a6d1503dd6 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 17 Jul 2025 09:49:23 +0100 Subject: [PATCH 08/54] dev --- cf/regrid/regridoperator.py | 29 +++++++++++++++++++++++++++++ cf/test/test_RegridOperator.py | 21 ++++++++++++++++----- cf/test/test_regrid_partitions.py | 9 +++++++++ 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index ad467d4a79..1c0dc27803 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -638,6 +638,35 @@ def dump(self, display=True): return "\n".join(string) + def equal_dst_mask(self, other): + """Whether two regrid operators have identical destination masks. + + :Parameters: + + other: `RegridOperator` + The other regrid operator to compare. + + :Returns: + + `bool` + True if the regrid operators have destination masks, + otherwise False. + + """ + self.tosparse() + other.tosparse() + + m0 = self.dst_mask + m1 = other.dst_mask + + if m0 is None and m1 is None: + return True + + if m0 is None or m1 is None: + return False + + return np.array_equal(m0, m1) + def equal_weights(self, other, rtol=None, atol=None): """Whether two regrid operators have identical weights. diff --git a/cf/test/test_RegridOperator.py b/cf/test/test_RegridOperator.py index c3bb9aabc2..3a4a92bc77 100644 --- a/cf/test/test_RegridOperator.py +++ b/cf/test/test_RegridOperator.py @@ -43,12 +43,23 @@ def test_RegridOperator_copy(self): self.assertIsInstance(self.r.copy(), self.r.__class__) def test_RegridOperator_equal_weights(self): - r = self.r - r.tosparse() - r1 = r.copy() - self.assertTrue(r.equal_weights(r1)) + r0 = self.r + r1 = r0.copy() + self.assertTrue(r0.equal_weights(r1)) r1.weights.data += 0.1 - self.assertFalse(r.equal_weights(r1)) + self.assertFalse(r0.equal_weights(r1)) + + def test_RegridOperator_equal_dst_mask(self): + r0 = self.r.copy() + r1 = r0.copy() + self.assertTrue(r0.equal_dst_mask(r1)) + mask = [True, False] + r0._set_component("dst_mask", mask) + self.assertFalse(r0.equal_dst_mask(r1)) + r1._set_component("dst_mask", mask) + self.assertTrue(r0.equal_dst_mask(r1)) + r1._set_component("dst_mask", mask[::-1]) + self.assertFalse(r0.equal_dst_mask(r1)) if __name__ == "__main__": diff --git a/cf/test/test_regrid_partitions.py b/cf/test/test_regrid_partitions.py index 6d64011be3..6aa7afd0f4 100644 --- a/cf/test/test_regrid_partitions.py +++ b/cf/test/test_regrid_partitions.py @@ -84,6 +84,7 @@ def test_Field_regrid_partitions_2d_grid_to_grid(self): dst, dst_grid_partitions=n, **kwargs ) self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) # ------------------------------------------------------------ # Destination grid defined by 2-d lats and lons @@ -125,6 +126,7 @@ def test_Field_regrid_partitions_2d_grid_to_grid(self): for n in (2, 3, "maximum"): r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrid_partitions_cannot_partition(self): @@ -169,6 +171,7 @@ def test_Field_regridc_partitions_1d_3d_grid_to_grid(self): for n in (2,): r1 = src.regridc(dst, dst_grid_partitions=n, **kwargs) self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_partitions_mesh_to_mesh(self): @@ -189,6 +192,7 @@ def test_Field_regrids_partitions_mesh_to_mesh(self): for n in (2,): r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_partitions_mesh_to_grid(self): @@ -210,6 +214,7 @@ def test_Field_regrids_partitions_mesh_to_grid(self): for n in (2,): r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_partitions_grid_to_mesh(self): @@ -231,6 +236,7 @@ def test_Field_regrids_partitions_grid_to_mesh(self): for n in (2,): r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_partitions_grid_to_featureType_3d(self): @@ -256,6 +262,7 @@ def test_Field_regrids_partitions_grid_to_featureType_3d(self): for n in (2,): r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_partitions_grid_to_featureType_2d(self): @@ -282,6 +289,7 @@ def test_Field_regrids_partitions_grid_to_featureType_2d(self): for n in (2,): r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrids_partitions_mesh_to_featureType_2d(self): @@ -307,6 +315,7 @@ def test_Field_regrids_partitions_mesh_to_featureType_2d(self): for n in (2,): r1 = src.regrids(dst, dst_grid_partitions=n, **kwargs) self.assertTrue(r0.equal_weights(r1)) + self.assertTrue(r0.equal_dst_mask(r1)) if __name__ == "__main__": From 2b1dd830552a4f3793e393f52065888ec86c0ade Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 17 Jul 2025 12:05:46 +0100 Subject: [PATCH 09/54] dev --- cf/regrid/regrid.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 2c379bb629..90f322c2ea 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -3,13 +3,14 @@ import logging from dataclasses import dataclass, field from datetime import datetime +from time import time from typing import Any import dask.array as da import numpy as np from cfdm import is_log_level_debug -from ..functions import DeprecationError, regrid_logging +from ..functions import DeprecationError, regrid_logging, free_memory from ..units import Units from .regridoperator import RegridOperator @@ -734,8 +735,9 @@ def regrid( if debug: logger.debug( - f"Sparse weights: {regrid_operator.weights!r}\n" - f" {regrid_operator.weights.__dict__}" + "Weights matrix for all partitions:\n" + f"{regrid_operator.weights!r}\n" + f"{regrid_operator.weights.__dict__}" ) # pragma: no cover return regrid_operator @@ -1249,7 +1251,7 @@ def spherical_grid( f"The {name} latitude and longitude coordinates " "are 2-d but the X and Y axes could not be identified " "from dimension coordinates nor from the " - f"{'src_axes' if name == 'source' else 'dst_axes'!r}" + f"{'src_axes' if name == 'source' else 'dst_axes'!r} " "parameter" ) @@ -2615,6 +2617,7 @@ def create_esmpy_weights( for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): if debug: + start_time = time() logger.debug( f"Destination ESMF Grid (partition {i}):\n" f"{dst_esmpy_grid}\n" @@ -2694,6 +2697,13 @@ def create_esmpy_weights( row = None col = None + if debug: + logger.debug( + f"Time taken to calculate weights for partition {i}: " + f"{time() - start_time} s\n" + f"Free memory: {free_memory()/(2**30)} GiB\n" + ) # pragma: no cover + if esmpy_regrid_operator is None: # Destroy esmpy objects that are no longer needed src_esmpy_grid.destroy() From f7b8316e31d95ef573ca5143f2136661d2e76fbc Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 17 Jul 2025 12:08:49 +0100 Subject: [PATCH 10/54] dev --- cf/regrid/regrid.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 90f322c2ea..adf8c93208 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2728,6 +2728,12 @@ def create_esmpy_weights( dst_size = weights.shape[0] del w + if debug: + logger.debug( + "Free memory after final weights matrix creation: " + f"{free_memory()/(2**30)} GiB\n" + ) # pragma: no cover + if weights_file is not None: # Write the weights to a netCDF file (copying the # dimension and variable names and structure of a weights From c75ee35ba45e7c2896c81912d0a02a1b901bd792 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 17 Jul 2025 12:16:30 +0100 Subject: [PATCH 11/54] dev --- cf/regrid/regrid.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index adf8c93208..cb6787c989 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2696,6 +2696,10 @@ def create_esmpy_weights( ) row = None col = None + if debug: + logger.debug( + f"Sparse weights array for partition {i}: {w[-1]!r}\n" + ) # pragma: no cover if debug: logger.debug( From 04bbe3dc16460fafc019c317ade562ccaf106a07 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 17 Jul 2025 13:47:59 +0100 Subject: [PATCH 12/54] dev --- cf/field.py | 19 ------- cf/mixin/propertiesdatabounds.py | 13 ----- cf/regrid/regrid.py | 90 +++++++++++++++++++++----------- 3 files changed, 59 insertions(+), 63 deletions(-) diff --git a/cf/field.py b/cf/field.py index 78b8da4565..5c0f0b41bb 100644 --- a/cf/field.py +++ b/cf/field.py @@ -382,14 +382,6 @@ def __getitem__(self, indices): (6, 4, 3) """ - debug = is_log_level_debug(logger) - - if debug: - logger.debug( - self.__class__.__name__ + ".__getitem__" - ) # pragma: no cover - logger.debug(f" input indices = {indices}") # pragma: no cover - if indices is Ellipsis: return self.copy() @@ -437,12 +429,6 @@ def __getitem__(self, indices): else: findices = indices - if debug: - logger.debug(f" shape = {shape}") # pragma: no cover - logger.debug(f" indices = {indices}") # pragma: no cover - logger.debug(f" indices2 = {indices2}") # pragma: no cover - logger.debug(f" findices = {findices}") # pragma: no cover - new_data = data[tuple(findices)] if 0 in new_data.shape: @@ -496,11 +482,6 @@ def __getitem__(self, indices): else: dice.append(slice(None)) - if debug: - logger.debug( - f" dice = {tuple(dice)}" - ) # pragma: no cover - # Generally we do not apply an ancillary mask to the # metadata items, but for DSGs we do. if ancillary_mask and new.DSG: diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 21e8b9f803..546f19f8f7 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -82,13 +82,6 @@ def __getitem__(self, indices): findices = tuple(indices) cname = self.__class__.__name__ - if is_log_level_debug(logger): - logger.debug( - f"{cname}.__getitem__: shape = {self.shape}\n" - f"{cname}.__getitem__: indices2 = {indices2}\n" - f"{cname}.__getitem__: indices = {indices}\n" - f"{cname}.__getitem__: findices = {findices}" - ) # pragma: no cover data = self.get_data(None, _fill_value=False) if data is not None: @@ -133,12 +126,6 @@ def __getitem__(self, indices): mask.insert_dimension(-1) for mask in findices[1] ] - if is_log_level_debug(logger): - logger.debug( - f"{self.__class__.__name__}.__getitem__: findices for " - f"bounds = {tuple(findices)}" - ) # pragma: no cover - new.bounds.set_data(bounds_data[tuple(findices)], copy=False) # Remove the direction, as it may now be wrong diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index cb6787c989..634981dbf2 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -10,7 +10,7 @@ import numpy as np from cfdm import is_log_level_debug -from ..functions import DeprecationError, regrid_logging, free_memory +from ..functions import DeprecationError, free_memory, regrid_logging from ..units import Units from .regridoperator import RegridOperator @@ -684,7 +684,7 @@ def regrid( src_grid.coords.pop() if src_grid.bounds: src_grid.bounds.pop() - + print (33) # Create regrid operator regrid_operator = RegridOperator( weights, @@ -726,20 +726,23 @@ def regrid( src, src_grid, regrid_operator, check_coordinates=check_coordinates ) + print (44) if return_operator: # Note: The `RegridOperator.tosparse` method will also set # 'dst_mask' to False for destination points with all # zero weights. if regrid_operator.weights_file is None: + print (45) regrid_operator.tosparse() + print (46) if debug: logger.debug( - "Weights matrix for all partitions:\n" + "Sparse weights array for all partitions:\n" f"{regrid_operator.weights!r}\n" f"{regrid_operator.weights.__dict__}" ) # pragma: no cover - + print (47) return regrid_operator from scipy.sparse import issparse @@ -1998,7 +2001,7 @@ def create_esmpy_grid(grid, mask=None, grid_partitions=1): if partition is not None: if debug: logger.debug( - f"Partition {i} index: {partition}" + f"Partition {i}: Index: {partition}" ) # pragma: no cover ndim = coords[-1].ndim @@ -2564,8 +2567,9 @@ def create_esmpy_weights( src_esmpy_grid = src_esmpy_grids[0] if debug: + klass = src_esmpy_grid.__class__.__name__ logger.debug( - f"Source ESMF Grid:\n{src_esmpy_grid}\n\n" + f"Source ESMF {src_esmpy_grid}\n\n" ) # pragma: no cover compute_weights = True @@ -2615,14 +2619,21 @@ def create_esmpy_weights( # each destination grid partition w = [] + + if debug: + start_time0 = time() + + # Loop round destination grid partitions for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): if debug: - start_time = time() + klass = dst_esmpy_grid.__class__.__name__ logger.debug( - f"Destination ESMF Grid (partition {i}):\n" - f"{dst_esmpy_grid}\n" + f"Partition {i}: Time taken to create destination ESMF " + f"{klass}: {time() - start_time0} s\n" + f"Partition {i}: Destination ESMF {dst_esmpy_grid}" ) # pragma: no cover - + start_time = time() + dst_esmpy_field = esmpy.Field( dst_esmpy_grid, name="dst", meshloc=dst_meshloc ) @@ -2650,6 +2661,12 @@ def create_esmpy_weights( col = weights["col_src"] weights = weights["weights"] + if debug: + logger.debug( + f"Partition {i}: Time taken by esmpy.Regrid: " + f"{time() - start_time} s" + ) # pragma: no cover + ESMF_unmapped_action = r.unmapped_action ESMF_ignore_degenerate = int(r.ignore_degenerate) @@ -2689,46 +2706,57 @@ def create_esmpy_weights( # the sparse weights array for this partition. row -= 1 col -= 1 - w.append( - csr_array( - (weights, (row, col)), shape=[dst_size, src_size] - ) + weights = csr_array( + (weights, (row, col)), shape=[dst_size, src_size] ) + w.append(weights) + row = None col = None + if debug: logger.debug( - f"Sparse weights array for partition {i}: {w[-1]!r}\n" + f"Partition {i}: Sparse weights array: {weights!r}" ) # pragma: no cover - + print (weights.indptr.dtype,weights.indices.dtype) + if debug: logger.debug( - f"Time taken to calculate weights for partition {i}: " - f"{time() - start_time} s\n" - f"Free memory: {free_memory()/(2**30)} GiB\n" + f"Partition {i}: Total time taken to calculate weights: " + f"{time() - start_time0} s\n" + f"Partition {i}: Free memory after weights calculation: " + f"{free_memory()/(2**30)} GiB\n" ) # pragma: no cover - + start_time0 = time() + print (11) if esmpy_regrid_operator is None: # Destroy esmpy objects that are no longer needed src_esmpy_grid.destroy() + print (12) src_esmpy_field.destroy() + print (13) r.srcfield.grid.destroy() + print (14) r.srcfield.destroy() - + print (15) + print (22) if partitioned_dst_grid: # The destination grid has been partitioned, so # concatenate the sparse weights arrays for all # destination grid partitions. - weights = vstack(w) - - # Cast indptr and indices as 32-bit integers, if possible. - weights.indptr = weights.indptr.astype( - integer_dtype(dst_size), copy=False - ) - weights.indices = weights.indices.astype( - integer_dtype(src_size), copy=False - ) - + if debug: + start_time = time() + logger.debug( + "Starting concatenation of sparse weights arrays for " + "all partitions ..." + ) # pragma: no cover + + weights = vstack(w, format="csr") + if debug: + logger.debug( + f"... finished in {time() - start_time} s" + ) # pragma: no cover + dst_size = weights.shape[0] del w From 2c127f89b8a34ea9bf85cf943a675c19f4b48657 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 17 Jul 2025 14:03:48 +0100 Subject: [PATCH 13/54] dev --- cf/regrid/regrid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 634981dbf2..f90c05f867 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2569,7 +2569,7 @@ def create_esmpy_weights( if debug: klass = src_esmpy_grid.__class__.__name__ logger.debug( - f"Source ESMF {src_esmpy_grid}\n\n" + f"Source ESMF {src_esmpy_grid}\n" ) # pragma: no cover compute_weights = True @@ -2628,8 +2628,8 @@ def create_esmpy_weights( if debug: klass = dst_esmpy_grid.__class__.__name__ logger.debug( - f"Partition {i}: Time taken to create destination ESMF " - f"{klass}: {time() - start_time0} s\n" + f"Partition {i}: Time taken to create ESMF {klass}: " + f"{time() - start_time0} s\n" f"Partition {i}: Destination ESMF {dst_esmpy_grid}" ) # pragma: no cover start_time = time() From b7cac3f381387dfa96087967bac47efbe41e2e89 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 18 Jul 2025 09:14:34 +0100 Subject: [PATCH 14/54] dev --- cf/regrid/regrid.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index f90c05f867..0a53b6eb44 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2770,18 +2770,20 @@ def create_esmpy_weights( # Write the weights to a netCDF file (copying the # dimension and variable names and structure of a weights # file created by ESMF). + print (0) from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset from .. import __version__ from_file = True - + print (1) if partitioned_dst_grid: # 'weights' is a CSR sparse array, so we have to infer # the row and column arrays from it. weights_data = weights.data row, col = weights.tocoo().coords + print(2) if start_index: # 'row' and 'col' come out of `tocoo().coords` as # zero-based values @@ -2790,6 +2792,7 @@ def create_esmpy_weights( else: weights_data = weights + print(3) regrid_method = f"{src_grid.coord_sys} {src_grid.method}" if src_grid.ln_z: regrid_method += f", ln {src_grid.method} in vertical" @@ -2827,21 +2830,23 @@ def create_esmpy_weights( v = nc.createVariable( "S", weights_data.dtype, ("n_s",), zlib=True ) + print(4) v.long_name = "Weights values" v[...] = weights_data - + print(5) v = nc.createVariable("row", row.dtype, ("n_s",), zlib=True) v.long_name = "Destination/row indices" v.start_index = start_index v[...] = row + print(6) v = nc.createVariable("col", col.dtype, ("n_s",), zlib=True) v.long_name = "Source/col indices" v.start_index = start_index v[...] = col - + print(7) nc.close() - + print(8) if partitioned_dst_grid: # Reset 'row' and 'col' to None, because 'weights' is # already a sparse array. @@ -2852,7 +2857,7 @@ def create_esmpy_weights( # Make the Regrid instance available via the # 'esmpy_regrid_operator' list esmpy_regrid_operator.append(r) - + print(99) return weights, row, col, start_index, from_file From a13bdcaaafd89bfae0eb6f167988a28cad245ba4 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 18 Jul 2025 09:19:37 +0100 Subject: [PATCH 15/54] dev --- cf/regrid/regrid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 0a53b6eb44..d40575b0ef 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -731,9 +731,9 @@ def regrid( # Note: The `RegridOperator.tosparse` method will also set # 'dst_mask' to False for destination points with all # zero weights. - if regrid_operator.weights_file is None: - print (45) - regrid_operator.tosparse() +# if regrid_operator.weights_file is None: + print (45) + regrid_operator.tosparse() print (46) if debug: From 140e57d43a81b85a96af75a3c8a7aae09685dd5d Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 18 Jul 2025 09:25:08 +0100 Subject: [PATCH 16/54] dev --- cf/regrid/regrid.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index d40575b0ef..961c8f731a 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -731,18 +731,19 @@ def regrid( # Note: The `RegridOperator.tosparse` method will also set # 'dst_mask' to False for destination points with all # zero weights. -# if regrid_operator.weights_file is None: - print (45) - regrid_operator.tosparse() - - print (46) - if debug: - logger.debug( - "Sparse weights array for all partitions:\n" - f"{regrid_operator.weights!r}\n" - f"{regrid_operator.weights.__dict__}" - ) # pragma: no cover + if regrid_operator.weights is not None: + print (45) + regrid_operator.tosparse() + + print (46) + if debug: + logger.debug( + "Sparse weights array for all partitions:\n" + f"{regrid_operator.weights!r}\n" + f"{regrid_operator.weights.__dict__}" + ) # pragma: no cover print (47) + return regrid_operator from scipy.sparse import issparse From 81bd4c00d4eb1c4d53e65d609ddac3d4a926bbfa Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 18 Jul 2025 09:32:43 +0100 Subject: [PATCH 17/54] dev --- cf/regrid/regrid.py | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 961c8f731a..d72ec70210 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2558,21 +2558,7 @@ def create_esmpy_weights( debug = is_log_level_debug(logger) start_index = 1 - - src_esmpy_grids = tuple(src_esmpy_grids) - if len(src_esmpy_grids) != 1: - raise ValueError( - "There must be exactly one source grid partition. " - f"Got {len(src_esmpy_grids)}" - ) - - src_esmpy_grid = src_esmpy_grids[0] - if debug: - klass = src_esmpy_grid.__class__.__name__ - logger.debug( - f"Source ESMF {src_esmpy_grid}\n" - ) # pragma: no cover - + compute_weights = True if esmpy_regrid_operator is None and weights_file is not None: from os.path import isfile @@ -2585,6 +2571,19 @@ def create_esmpy_weights( row = None col = None +# +# compute_weights = True +# if esmpy_regrid_operator is None and weights_file is not None: +# from os.path import isfile +# +# if isfile(weights_file): +# # The regridding weights and indices will be read from a +# # file +# compute_weights = False +# weights = None +# row = None +# col = None +# from_file = True if compute_weights or esmpy_regrid_operator is not None: # Create the weights using ESMF @@ -2606,6 +2605,21 @@ def create_esmpy_weights( elif not dst_mesh_location: dst_meshloc = None + src_esmpy_grids = tuple(src_esmpy_grids) + if len(src_esmpy_grids) != 1: + raise ValueError( + "There must be exactly one source grid partition. " + f"Got {len(src_esmpy_grids)}" + ) + + # Create source esmpy field + src_esmpy_grid = src_esmpy_grids[0] + if debug: + klass = src_esmpy_grid.__class__.__name__ + logger.debug( + f"Source ESMF {src_esmpy_grid}\n" + ) # pragma: no cover + src_esmpy_field = esmpy.Field( src_esmpy_grid, name="src", meshloc=src_meshloc ) @@ -2635,6 +2649,7 @@ def create_esmpy_weights( ) # pragma: no cover start_time = time() + # Create destination esmpy field dst_esmpy_field = esmpy.Field( dst_esmpy_grid, name="dst", meshloc=dst_meshloc ) From 3a6fae6fcdaa0ba6c44ac48ec0385da4bc6f9f54 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 18 Jul 2025 10:04:04 +0100 Subject: [PATCH 18/54] dev --- cf/regrid/regrid.py | 82 ++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index d72ec70210..db0de78be7 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2571,21 +2571,20 @@ def create_esmpy_weights( row = None col = None -# -# compute_weights = True -# if esmpy_regrid_operator is None and weights_file is not None: -# from os.path import isfile -# -# if isfile(weights_file): -# # The regridding weights and indices will be read from a -# # file -# compute_weights = False -# weights = None -# row = None -# col = None -# + if esmpy_regrid_operator is not None: + # If we're returning the esmpy regrid operator, we need to + # create it by computing the weights. + compute_weights = True + from_file = True - if compute_weights or esmpy_regrid_operator is not None: + if compute_weights: # or esmpy_regrid_operator is not None: + if debug: + start_time0 = time() + logger.debug( + "Free memory before calculation of all weights: " + f"{free_memory()/(2**30)} GiB\n" + ) # pragma: no cover + # Create the weights using ESMF from_file = False @@ -2634,17 +2633,16 @@ def create_esmpy_weights( # each destination grid partition w = [] - - if debug: - start_time0 = time() - # Loop round destination grid partitions + if debug: + start_time = time() + for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): if debug: klass = dst_esmpy_grid.__class__.__name__ logger.debug( f"Partition {i}: Time taken to create ESMF {klass}: " - f"{time() - start_time0} s\n" + f"{time() - start_time} s\n" f"Partition {i}: Destination ESMF {dst_esmpy_grid}" ) # pragma: no cover start_time = time() @@ -2732,18 +2730,18 @@ def create_esmpy_weights( if debug: logger.debug( + f"Partition {i}: Time taken by create sparse weights " + f"array: {time() - start_time} s\n" f"Partition {i}: Sparse weights array: {weights!r}" ) # pragma: no cover - print (weights.indptr.dtype,weights.indices.dtype) + start_time = time() if debug: logger.debug( - f"Partition {i}: Total time taken to calculate weights: " - f"{time() - start_time0} s\n" f"Partition {i}: Free memory after weights calculation: " f"{free_memory()/(2**30)} GiB\n" ) # pragma: no cover - start_time0 = time() + start_time = time() print (11) if esmpy_regrid_operator is None: # Destroy esmpy objects that are no longer needed @@ -2760,27 +2758,26 @@ def create_esmpy_weights( # The destination grid has been partitioned, so # concatenate the sparse weights arrays for all # destination grid partitions. - if debug: - start_time = time() - logger.debug( - "Starting concatenation of sparse weights arrays for " - "all partitions ..." - ) # pragma: no cover - weights = vstack(w, format="csr") if debug: logger.debug( - f"... finished in {time() - start_time} s" - ) # pragma: no cover - + f"Time taken to concatenate sparse weights arrays: " + f"{time() - start_time} s\n" + f"Free memory after concatenation of sparse weights " + f"arrays: {free_memory()/(2**30)} GiB\n" + ) # pragma: no + start_time = time() + dst_size = weights.shape[0] del w - if debug: - logger.debug( - "Free memory after final weights matrix creation: " - f"{free_memory()/(2**30)} GiB\n" - ) # pragma: no cover + if debug: + logger.debug( + "Total time taken to calculate all weights: " + f"{time() - start_time0} s\n" + "Free memory after calculation of all weights: " + f"{free_memory()/(2**30)} GiB\n" + ) # pragma: no cover if weights_file is not None: # Write the weights to a netCDF file (copying the @@ -2863,6 +2860,15 @@ def create_esmpy_weights( print(7) nc.close() print(8) + if debug: + logger.debug( + f"Time taken to create weights file {weights_file}: " + f"{time() - start_time} s\n" + f"Free memory after creation of weights file: " + f"{free_memory()/(2**30)} GiB\n" + ) # pragma: no cover + start_time = time() + if partitioned_dst_grid: # Reset 'row' and 'col' to None, because 'weights' is # already a sparse array. From 14e28a73a0bde19773c01bb50cadfa89c6572b05 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 18 Jul 2025 11:03:01 +0100 Subject: [PATCH 19/54] dev --- cf/regrid/regrid.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index db0de78be7..b2053b5cc2 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -746,6 +746,9 @@ def regrid( return regrid_operator + # ---------------------------------------------------------------- + # Still here? Then do the regridding + # ---------------------------------------------------------------- from scipy.sparse import issparse if debug and issparse(regrid_operator.weights): @@ -754,9 +757,6 @@ def regrid( f" {regrid_operator.weights.__dict__}" ) # pragma: no cover - # ---------------------------------------------------------------- - # Still here? Then do the regridding - # ---------------------------------------------------------------- if src_grid.n_regrid_axes == dst_grid.n_regrid_axes: regridded_axis_sizes = { src_iaxis: (dst_size,) @@ -764,6 +764,7 @@ def regrid( src_grid.axis_indices, dst_grid.shape ) } + elif src_grid.n_regrid_axes == 1: # Fewer source grid axes than destination grid axes (e.g. mesh # regridded to lat/lon). @@ -2581,7 +2582,8 @@ def create_esmpy_weights( if debug: start_time0 = time() logger.debug( - "Free memory before calculation of all weights: " + "Calculating weights ...\n\n" + "Free memory before calculation of weights: " f"{free_memory()/(2**30)} GiB\n" ) # pragma: no cover @@ -2677,9 +2679,10 @@ def create_esmpy_weights( if debug: logger.debug( - f"Partition {i}: Time taken by esmpy.Regrid: " + f"Partition {i}: Time taken by ESMF to create weights: " f"{time() - start_time} s" ) # pragma: no cover + start_time = time() ESMF_unmapped_action = r.unmapped_action ESMF_ignore_degenerate = int(r.ignore_degenerate) @@ -2730,7 +2733,7 @@ def create_esmpy_weights( if debug: logger.debug( - f"Partition {i}: Time taken by create sparse weights " + f"Partition {i}: Time taken to create sparse weights " f"array: {time() - start_time} s\n" f"Partition {i}: Sparse weights array: {weights!r}" ) # pragma: no cover @@ -2742,17 +2745,14 @@ def create_esmpy_weights( f"{free_memory()/(2**30)} GiB\n" ) # pragma: no cover start_time = time() + print (11) if esmpy_regrid_operator is None: # Destroy esmpy objects that are no longer needed src_esmpy_grid.destroy() - print (12) src_esmpy_field.destroy() - print (13) r.srcfield.grid.destroy() - print (14) r.srcfield.destroy() - print (15) print (22) if partitioned_dst_grid: # The destination grid has been partitioned, so @@ -2765,6 +2765,7 @@ def create_esmpy_weights( f"{time() - start_time} s\n" f"Free memory after concatenation of sparse weights " f"arrays: {free_memory()/(2**30)} GiB\n" + f"Sparse weights array for all partitions: {weights!r}\n" ) # pragma: no start_time = time() From 75d38b3a4601fe9e3b0cdadb405054b3c0f06943 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 18 Jul 2025 17:15:47 +0100 Subject: [PATCH 20/54] dev --- cf/regrid/regrid.py | 55 +++++++++++++++++-------------------- cf/regrid/regridoperator.py | 8 +++--- 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index b2053b5cc2..cb3049f039 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -14,21 +14,11 @@ from ..units import Units from .regridoperator import RegridOperator -# ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow -# either for now for backwards compatibility. -esmpy_imported = False +esmpy_imported = True try: import esmpy - - esmpy_imported = True except ImportError: - try: - # Take the new name to use in preference to the old one. - import ESMF as esmpy - - esmpy_imported = True - except ImportError: - pass + esmpy_imported = False logger = logging.getLogger(__name__) @@ -2630,13 +2620,14 @@ def create_esmpy_weights( if partitioned_dst_grid: from scipy.sparse import csr_array, vstack - + # Initialise the list of the sparse weights arrays for # each destination grid partition w = [] # Loop round destination grid partitions if debug: + import tracemalloc start_time = time() for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): @@ -2680,7 +2671,7 @@ def create_esmpy_weights( if debug: logger.debug( f"Partition {i}: Time taken by ESMF to create weights: " - f"{time() - start_time} s" + f"{time() - start_time} s\n" ) # pragma: no cover start_time = time() @@ -2727,9 +2718,7 @@ def create_esmpy_weights( (weights, (row, col)), shape=[dst_size, src_size] ) w.append(weights) - - row = None - col = None + del row, col if debug: logger.debug( @@ -2795,16 +2784,15 @@ def create_esmpy_weights( if partitioned_dst_grid: # 'weights' is a CSR sparse array, so we have to infer # the row and column arrays from it. - weights_data = weights.data - row, col = weights.tocoo().coords - print(2) + row, col = weights.tocoo(copy=False).coords + print(2.0) + weights = weights.data + print(2.1) if start_index: # 'row' and 'col' come out of `tocoo().coords` as # zero-based values row += start_index col += start_index - else: - weights_data = weights print(3) regrid_method = f"{src_grid.coord_sys} {src_grid.method}" @@ -2825,7 +2813,7 @@ def create_esmpy_weights( nc.ESMF_unmapped_action = ESMF_unmapped_action nc.ESMF_ignore_degenerate = ESMF_ignore_degenerate - nc.createDimension("n_s", weights_data.size) + nc.createDimension("n_s", weights.size) nc.createDimension("src_grid_rank", src_rank) nc.createDimension("dst_grid_rank", dst_rank) @@ -2842,22 +2830,28 @@ def create_esmpy_weights( v[...] = dst_grid.esmpy_shape v = nc.createVariable( - "S", weights_data.dtype, ("n_s",), zlib=True + "S", weights.dtype, ("n_s",), zlib=True ) print(4) v.long_name = "Weights values" - v[...] = weights_data + v[...] = weights + nc.sync() + del weights print(5) v = nc.createVariable("row", row.dtype, ("n_s",), zlib=True) v.long_name = "Destination/row indices" v.start_index = start_index v[...] = row + nc.sync() + del row print(6) v = nc.createVariable("col", col.dtype, ("n_s",), zlib=True) v.long_name = "Source/col indices" v.start_index = start_index v[...] = col + nc.sync() + del col print(7) nc.close() print(8) @@ -2870,11 +2864,12 @@ def create_esmpy_weights( ) # pragma: no cover start_time = time() - if partitioned_dst_grid: - # Reset 'row' and 'col' to None, because 'weights' is - # already a sparse array. - row = None - col = None +# if partitioned_dst_grid: +# # Reset 'row' and 'col' to None, because 'weights' is +# # already a sparse array. + weights = None + row = None + col = None if esmpy_regrid_operator is not None: # Make the Regrid instance available via the diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index 1c0dc27803..0c1a20723e 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -850,13 +850,13 @@ def tosparse(self): ) # Convert to sparse array format - if col_start_index: - col = col - col_start_index + if col_start_index is not None: + col -= col_start_index elif start_index: col = col - start_index - if row_start_index: - row = row - row_start_index + if row_start_index is not None: + row -= row_start_index elif start_index: row = row - start_index From adff1d749dfc531fc6e3a374407addbc070f8cbb Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 08:59:36 +0100 Subject: [PATCH 21/54] dev --- cf/regrid/regrid.py | 42 ++++++++++++++++-------------------------- cf/test/test_regrid.py | 3 ++- 2 files changed, 18 insertions(+), 27 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index cb3049f039..0989f79ef6 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -674,7 +674,7 @@ def regrid( src_grid.coords.pop() if src_grid.bounds: src_grid.bounds.pop() - print (33) + # Create regrid operator regrid_operator = RegridOperator( weights, @@ -716,23 +716,16 @@ def regrid( src, src_grid, regrid_operator, check_coordinates=check_coordinates ) - print (44) if return_operator: - # Note: The `RegridOperator.tosparse` method will also set - # 'dst_mask' to False for destination points with all - # zero weights. if regrid_operator.weights is not None: - print (45) regrid_operator.tosparse() - print (46) if debug: logger.debug( "Sparse weights array for all partitions:\n" f"{regrid_operator.weights!r}\n" f"{regrid_operator.weights.__dict__}" ) # pragma: no cover - print (47) return regrid_operator @@ -2735,19 +2728,22 @@ def create_esmpy_weights( ) # pragma: no cover start_time = time() - print (11) if esmpy_regrid_operator is None: # Destroy esmpy objects that are no longer needed src_esmpy_grid.destroy() src_esmpy_field.destroy() r.srcfield.grid.destroy() r.srcfield.destroy() - print (22) + if partitioned_dst_grid: # The destination grid has been partitioned, so # concatenate the sparse weights arrays for all # destination grid partitions. weights = vstack(w, format="csr") + dst_size = weights.shape[0] + row = None + col = None + del w if debug: logger.debug( f"Time taken to concatenate sparse weights arrays: " @@ -2758,9 +2754,6 @@ def create_esmpy_weights( ) # pragma: no start_time = time() - dst_size = weights.shape[0] - del w - if debug: logger.debug( "Total time taken to calculate all weights: " @@ -2773,6 +2766,10 @@ def create_esmpy_weights( # Write the weights to a netCDF file (copying the # dimension and variable names and structure of a weights # file created by ESMF). + # + # Be careful with memory, by using `netCDF4.Dataset.sync` + # and `del`, because the weights may be large, and keeping + # copies of them in memory may not be possible. print (0) from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset @@ -2785,16 +2782,13 @@ def create_esmpy_weights( # 'weights' is a CSR sparse array, so we have to infer # the row and column arrays from it. row, col = weights.tocoo(copy=False).coords - print(2.0) weights = weights.data - print(2.1) if start_index: # 'row' and 'col' come out of `tocoo().coords` as # zero-based values row += start_index col += start_index - print(3) regrid_method = f"{src_grid.coord_sys} {src_grid.method}" if src_grid.ln_z: regrid_method += f", ln {src_grid.method} in vertical" @@ -2832,19 +2826,18 @@ def create_esmpy_weights( v = nc.createVariable( "S", weights.dtype, ("n_s",), zlib=True ) - print(4) + v.long_name = "Weights values" v[...] = weights nc.sync() del weights - print(5) + v = nc.createVariable("row", row.dtype, ("n_s",), zlib=True) v.long_name = "Destination/row indices" v.start_index = start_index v[...] = row nc.sync() del row - print(6) v = nc.createVariable("col", col.dtype, ("n_s",), zlib=True) v.long_name = "Source/col indices" @@ -2852,9 +2845,9 @@ def create_esmpy_weights( v[...] = col nc.sync() del col - print(7) + nc.close() - print(8) + if debug: logger.debug( f"Time taken to create weights file {weights_file}: " @@ -2863,10 +2856,7 @@ def create_esmpy_weights( f"{free_memory()/(2**30)} GiB\n" ) # pragma: no cover start_time = time() - -# if partitioned_dst_grid: -# # Reset 'row' and 'col' to None, because 'weights' is -# # already a sparse array. + weights = None row = None col = None @@ -2875,7 +2865,7 @@ def create_esmpy_weights( # Make the Regrid instance available via the # 'esmpy_regrid_operator' list esmpy_regrid_operator.append(r) - print(99) + return weights, row, col, start_index, from_file diff --git a/cf/test/test_regrid.py b/cf/test/test_regrid.py index e5d28ab918..43c24ecd2b 100644 --- a/cf/test/test_regrid.py +++ b/cf/test/test_regrid.py @@ -798,11 +798,12 @@ def test_Field_regrid_weights_file(self): ) self.assertTrue(os.path.isfile(tmpfile)) self.assertEqual(r.weights_file, tmpfile) - self.assertIsNotNone(r.weights) + self.assertIsNone(r.weights) r = src.regrids( dst, method="linear", return_operator=True, weights_file=tmpfile ) + r.dump() self.assertEqual(r.weights_file, tmpfile) self.assertIsNone(r.weights) From 05a19360a21ae88572bd5a7b1aa0d7b79b99ee33 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 09:00:41 +0100 Subject: [PATCH 22/54] dev --- cf/data/dask_regrid.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 99b546400f..0680fcd1a2 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -658,9 +658,9 @@ def regrid_weights(operator, dst_dtype=None): """ from math import prod - + print('IN regrid_weights') operator.tosparse() - + print('done tosprase()') weights = operator.weights if dst_dtype is not None and weights.dtype != dst_dtype: # Convert weights to have the same dtype as the regridded data @@ -670,5 +670,5 @@ def regrid_weights(operator, dst_dtype=None): if dst_mask is not None: # Convert dst_mask to a 1-d array dst_mask = dst_mask.reshape((prod(operator.dst_shape),)) - + print (112) return weights, dst_mask From 72626866b8cce144f7488b9aa648b8d2fd93c461 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 09:04:28 +0100 Subject: [PATCH 23/54] dev --- cf/regrid/regridoperator.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index 0c1a20723e..b3b8f856ac 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -817,10 +817,11 @@ def tosparse(self): start_index = self.start_index col_start_index = None row_start_index = None - - if weights is None: + + if weights is None: weights_file = self.weights_file if weights_file is not None: + print('reading file ....') # Read the weights from the weights file from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset @@ -848,7 +849,7 @@ def tosparse(self): "one of the 'weights' or 'weights_file' attributes to " "be set" ) - + print ('file read into RAM') # Convert to sparse array format if col_start_index is not None: col -= col_start_index @@ -861,10 +862,11 @@ def tosparse(self): row = row - start_index src_size = prod(self.src_shape) + print('starting csr_array') weights = csr_array( (weights, (row, col)), shape=[dst_size, src_size] ) - + print('done csr_array') self._set_component("weights", weights, copy=False) self._set_component("row", None, copy=False) self._set_component("col", None, copy=False) From 04edbcabaf3cfb7eabe1d83aa14c429784d54fee Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 09:18:24 +0100 Subject: [PATCH 24/54] dev --- cf/data/dask_regrid.py | 15 +++++++++------ cf/regrid/regridoperator.py | 9 ++++----- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 0680fcd1a2..febd22ac90 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -532,6 +532,7 @@ def _regrid( del indptr elif method in ("linear", "bilinear"): + print (11) # 2) Linear methods: # # Mask out any row j that contains at least one positive @@ -544,7 +545,7 @@ def _regrid( dst_mask = np.zeros((dst_size,), dtype=bool) else: dst_mask = dst_mask.copy() - + print(22) # Note: It is much more efficient to access # 'weights.indptr', 'weights.indices', and # 'weights.data' directly, rather than iterating @@ -563,7 +564,7 @@ def _regrid( if where((mask) & (pos_data[i0:i1]))[0].size: dst_mask[j] = True - + print(33) del indptr, pos_data elif method == "nearest_dtos": @@ -618,13 +619,15 @@ def _regrid( # Regrid the data by calculating the dot product of the weights # matrix with the source data # ---------------------------------------------------------------- + print(44) a = np.ma.getdata(a) + print(55) a = weights.dot(a) - + print(66) if dst_mask is not None: a = np.ma.array(a) a[dst_mask] = np.ma.masked - + print(77) return a, src_mask, dst_mask, weights @@ -660,7 +663,7 @@ def regrid_weights(operator, dst_dtype=None): from math import prod print('IN regrid_weights') operator.tosparse() - print('done tosprase()') + weights = operator.weights if dst_dtype is not None and weights.dtype != dst_dtype: # Convert weights to have the same dtype as the regridded data @@ -670,5 +673,5 @@ def regrid_weights(operator, dst_dtype=None): if dst_mask is not None: # Convert dst_mask to a 1-d array dst_mask = dst_mask.reshape((prod(operator.dst_shape),)) - print (112) + print ('DONE regrid_weights') return weights, dst_mask diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index b3b8f856ac..0654cbc00f 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -821,7 +821,6 @@ def tosparse(self): if weights is None: weights_file = self.weights_file if weights_file is not None: - print('reading file ....') # Read the weights from the weights file from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset @@ -849,7 +848,7 @@ def tosparse(self): "one of the 'weights' or 'weights_file' attributes to " "be set" ) - print ('file read into RAM') + # Convert to sparse array format if col_start_index is not None: col -= col_start_index @@ -862,15 +861,15 @@ def tosparse(self): row = row - start_index src_size = prod(self.src_shape) - print('starting csr_array') + weights = csr_array( (weights, (row, col)), shape=[dst_size, src_size] ) - print('done csr_array') + del row, col + self._set_component("weights", weights, copy=False) self._set_component("row", None, copy=False) self._set_component("col", None, copy=False) - del row, col if self._dst_mask_adjusted: # The destination grid mask has already been adjusted From 150b1c6b7f05a819aebf1738d5dd218e53b62ec1 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 09:40:25 +0100 Subject: [PATCH 25/54] dev --- cf/data/dask_regrid.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index febd22ac90..1e277ddb29 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -540,11 +540,11 @@ def _regrid( # corresponds to a masked source grid cell i. Such a row # corresponds to a destination grid cell that intersects # at least one masked source grid cell. - dst_size = weights.shape[0] - if dst_mask is None: - dst_mask = np.zeros((dst_size,), dtype=bool) - else: - dst_mask = dst_mask.copy() +# dst_size = weights.shape[0] +# if dst_mask is None: +# dst_mask = np.zeros((dst_size,), dtype=bool) +# else: +# dst_mask = dst_mask.copy() print(22) # Note: It is much more efficient to access # 'weights.indptr', 'weights.indices', and @@ -555,14 +555,25 @@ def _regrid( count_nonzero = np.count_nonzero where = np.where indptr = weights.indptr.tolist() + print (22.1) indices = weights.indices pos_data = weights.data >= min_weight + print (22.2) + dst_mask_copied=False for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] if not count_nonzero(mask): continue if where((mask) & (pos_data[i0:i1]))[0].size: + if not dst_mask_copied: + dst_mask_copied = True + if dst_mask is None: + dst_size = weights.shape[0] + dst_mask = np.zeros((dst_size,), dtype=bool) + else: + dst_mask = dst_mask.copy() + dst_mask[j] = True print(33) del indptr, pos_data From 56d4a6bcd446c1ce2ec45bc1325ff0f5a9729729 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 10:20:04 +0100 Subject: [PATCH 26/54] dev --- cf/data/dask_regrid.py | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 1e277ddb29..f2ac1e1212 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -540,11 +540,11 @@ def _regrid( # corresponds to a masked source grid cell i. Such a row # corresponds to a destination grid cell that intersects # at least one masked source grid cell. -# dst_size = weights.shape[0] -# if dst_mask is None: -# dst_mask = np.zeros((dst_size,), dtype=bool) -# else: -# dst_mask = dst_mask.copy() + dst_size = weights.shape[0] + if dst_mask is None: + dst_mask = np.zeros((dst_size,), dtype=bool) + else: + dst_mask = dst_mask.copy() print(22) # Note: It is much more efficient to access # 'weights.indptr', 'weights.indices', and @@ -557,26 +557,33 @@ def _regrid( indptr = weights.indptr.tolist() print (22.1) indices = weights.indices - pos_data = weights.data >= min_weight - print (22.2) + data = weights.data +# pos_data = weights.data >= min_weight + print (22.2, indptr.size - 1) dst_mask_copied=False + import time + s = time.time() for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): + if not divmod (j, 1000)[1]: + print (f"{j}: {time.time() - s} s") + s = time.time() mask = src_mask[indices[i0:i1]] if not count_nonzero(mask): continue - if where((mask) & (pos_data[i0:i1]))[0].size: - if not dst_mask_copied: - dst_mask_copied = True - if dst_mask is None: - dst_size = weights.shape[0] - dst_mask = np.zeros((dst_size,), dtype=bool) - else: - dst_mask = dst_mask.copy() - +# if where( (mask) & (pos_data[i0:i1]) )[0].size: + if where( data[i0:i1][mask] >= min_weight )[0].size: +# if not dst_mask_copied: +# dst_mask_copied = True +# if dst_mask is None: +# dst_size = weights.shape[0] +# dst_mask = np.zeros((dst_size,), dtype=bool) +# else: +# dst_mask = dst_mask.copy() + print ('MASKING j=', j) dst_mask[j] = True print(33) - del indptr, pos_data + del indptr #, pos_data elif method == "nearest_dtos": # 3) Nearest neighbour dtos method: From 12aa457f430b6ce73b882152e13a1e7649e22bea Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 10:21:41 +0100 Subject: [PATCH 27/54] dev --- cf/data/dask_regrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index f2ac1e1212..27af551ba0 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -559,7 +559,7 @@ def _regrid( indices = weights.indices data = weights.data # pos_data = weights.data >= min_weight - print (22.2, indptr.size - 1) + print (22.2, len(indptr) - 1) dst_mask_copied=False import time s = time.time() From df3a4664f3a9778f134c19d27116365cc53fd2c7 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 10:51:28 +0100 Subject: [PATCH 28/54] dev --- cf/data/dask_regrid.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 27af551ba0..761cdd27f7 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -564,9 +564,10 @@ def _regrid( import time s = time.time() for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): - if not divmod (j, 1000)[1]: + if not divmod (j, 100000)[1]: print (f"{j}: {time.time() - s} s") s = time.time() + mask = src_mask[indices[i0:i1]] if not count_nonzero(mask): continue @@ -637,11 +638,8 @@ def _regrid( # Regrid the data by calculating the dot product of the weights # matrix with the source data # ---------------------------------------------------------------- - print(44) a = np.ma.getdata(a) - print(55) a = weights.dot(a) - print(66) if dst_mask is not None: a = np.ma.array(a) a[dst_mask] = np.ma.masked From 54ea811edb0b4e31edcdec57dec6859685a77b8b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 11:09:09 +0100 Subject: [PATCH 29/54] dev --- cf/data/dask_regrid.py | 16 +++------------- cf/test/test_regrid.py | 1 - 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 761cdd27f7..9642d577ab 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -558,9 +558,7 @@ def _regrid( print (22.1) indices = weights.indices data = weights.data -# pos_data = weights.data >= min_weight - print (22.2, len(indptr) - 1) - dst_mask_copied=False + pos_data = weights.data >= min_weight import time s = time.time() for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): @@ -572,16 +570,8 @@ def _regrid( if not count_nonzero(mask): continue -# if where( (mask) & (pos_data[i0:i1]) )[0].size: - if where( data[i0:i1][mask] >= min_weight )[0].size: -# if not dst_mask_copied: -# dst_mask_copied = True -# if dst_mask is None: -# dst_size = weights.shape[0] -# dst_mask = np.zeros((dst_size,), dtype=bool) -# else: -# dst_mask = dst_mask.copy() - print ('MASKING j=', j) + if where( (mask) & (pos_data[i0:i1]) )[0].size: +# if where( data[i0:i1][mask] >= min_weight )[0].size: dst_mask[j] = True print(33) del indptr #, pos_data diff --git a/cf/test/test_regrid.py b/cf/test/test_regrid.py index 43c24ecd2b..839915227c 100644 --- a/cf/test/test_regrid.py +++ b/cf/test/test_regrid.py @@ -803,7 +803,6 @@ def test_Field_regrid_weights_file(self): r = src.regrids( dst, method="linear", return_operator=True, weights_file=tmpfile ) - r.dump() self.assertEqual(r.weights_file, tmpfile) self.assertIsNone(r.weights) From 1566d621f06eb83f9bc513907381a08e3aa3b57d Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 11:13:38 +0100 Subject: [PATCH 30/54] dev --- cf/data/dask_regrid.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 9642d577ab..2caf42f1f1 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -532,7 +532,6 @@ def _regrid( del indptr elif method in ("linear", "bilinear"): - print (11) # 2) Linear methods: # # Mask out any row j that contains at least one positive @@ -545,7 +544,7 @@ def _regrid( dst_mask = np.zeros((dst_size,), dtype=bool) else: dst_mask = dst_mask.copy() - print(22) + # Note: It is much more efficient to access # 'weights.indptr', 'weights.indices', and # 'weights.data' directly, rather than iterating @@ -555,7 +554,6 @@ def _regrid( count_nonzero = np.count_nonzero where = np.where indptr = weights.indptr.tolist() - print (22.1) indices = weights.indices data = weights.data pos_data = weights.data >= min_weight @@ -573,7 +571,7 @@ def _regrid( if where( (mask) & (pos_data[i0:i1]) )[0].size: # if where( data[i0:i1][mask] >= min_weight )[0].size: dst_mask[j] = True - print(33) + del indptr #, pos_data elif method == "nearest_dtos": @@ -633,7 +631,7 @@ def _regrid( if dst_mask is not None: a = np.ma.array(a) a[dst_mask] = np.ma.masked - print(77) + return a, src_mask, dst_mask, weights @@ -667,7 +665,7 @@ def regrid_weights(operator, dst_dtype=None): """ from math import prod - print('IN regrid_weights') + operator.tosparse() weights = operator.weights @@ -679,5 +677,5 @@ def regrid_weights(operator, dst_dtype=None): if dst_mask is not None: # Convert dst_mask to a 1-d array dst_mask = dst_mask.reshape((prod(operator.dst_shape),)) - print ('DONE regrid_weights') + return weights, dst_mask From 68c88eb7fe815d881d41003e995c16303d87a9ca Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 11:36:45 +0100 Subject: [PATCH 31/54] dev --- cf/data/dask_regrid.py | 20 ++++++++------------ cf/regrid/regrid.py | 3 +-- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 2caf42f1f1..336de05bb2 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -532,6 +532,7 @@ def _regrid( del indptr elif method in ("linear", "bilinear"): + print ('LINEAR MASK') # 2) Linear methods: # # Mask out any row j that contains at least one positive @@ -553,27 +554,22 @@ def _regrid( # faster than 'np.any' and 'np.all'. count_nonzero = np.count_nonzero where = np.where - indptr = weights.indptr.tolist() + indptr = weights.indptr indices = weights.indices data = weights.data - pos_data = weights.data >= min_weight - import time - s = time.time() +# import time +# s = time.time() for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): - if not divmod (j, 100000)[1]: - print (f"{j}: {time.time() - s} s") - s = time.time() - +# if not divmod (j, 1000)[1]: +# print (f"{j}: {time.time() - s} s") +# s = time.time() mask = src_mask[indices[i0:i1]] if not count_nonzero(mask): continue - if where( (mask) & (pos_data[i0:i1]) )[0].size: -# if where( data[i0:i1][mask] >= min_weight )[0].size: + if where(data[i0:i1][mask] >= min_weight)[0].size: dst_mask[j] = True - del indptr #, pos_data - elif method == "nearest_dtos": # 3) Nearest neighbour dtos method: # diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 0989f79ef6..45fe6816ed 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2770,14 +2770,13 @@ def create_esmpy_weights( # Be careful with memory, by using `netCDF4.Dataset.sync` # and `del`, because the weights may be large, and keeping # copies of them in memory may not be possible. - print (0) from cfdm.data.locks import netcdf_lock from netCDF4 import Dataset from .. import __version__ from_file = True - print (1) + if partitioned_dst_grid: # 'weights' is a CSR sparse array, so we have to infer # the row and column arrays from it. From f569a3c60bfe4ac551b79493f80cc0f790f67dcd Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 14:30:47 +0100 Subject: [PATCH 32/54] dev --- cf/data/dask_regrid.py | 26 ++----- cf/docstring/docstring.py | 15 ++-- cf/field.py | 9 +++ cf/mixin/propertiesdatabounds.py | 4 +- cf/regrid/regrid.py | 130 ++++++++++++++++--------------- cf/regrid/regridoperator.py | 6 +- 6 files changed, 98 insertions(+), 92 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 336de05bb2..6b642d0f79 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -507,10 +507,10 @@ def _regrid( # 'weights.indptr', 'weights.indices', and # 'weights.data' directly, rather than iterating # over rows of 'weights' and using - # 'weights.getrow'. Also, 'np.count_nonzero' is much - # faster than 'np.any' and 'np.all'. + # 'weights.getrow'. Also, `np.count_nonzero` is much + # faster than `np.any` and `np.all`. count_nonzero = np.count_nonzero - indptr = weights.indptr.tolist() + indptr = weights.indptr indices = weights.indices data = weights.data for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): @@ -529,10 +529,7 @@ def _regrid( w[mask] = 0 data[i0:i1] = w - del indptr - elif method in ("linear", "bilinear"): - print ('LINEAR MASK') # 2) Linear methods: # # Mask out any row j that contains at least one positive @@ -550,19 +547,14 @@ def _regrid( # 'weights.indptr', 'weights.indices', and # 'weights.data' directly, rather than iterating # over rows of 'weights' and using - # 'weights.getrow'. Also, 'np.count_nonzero' is much - # faster than 'np.any' and 'np.all'. + # 'weights.getrow'. Also, `np.count_nonzero` is much + # faster than `np.any` and `np.all`. count_nonzero = np.count_nonzero where = np.where indptr = weights.indptr indices = weights.indices data = weights.data -# import time -# s = time.time() for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): -# if not divmod (j, 1000)[1]: -# print (f"{j}: {time.time() - s} s") -# s = time.time() mask = src_mask[indices[i0:i1]] if not count_nonzero(mask): continue @@ -588,10 +580,10 @@ def _regrid( # 'weights.indptr', 'weights.indices', and # 'weights.data' directly, rather than iterating # over rows of 'weights' and using - # 'weights.getrow'. Also, 'np.count_nonzero' is much - # faster than 'np.any' and 'np.all'. + # 'weights.getrow'. Also, `np.count_nonzero` is much + # faster than `np.any` and `np.all`. count_nonzero = np.count_nonzero - indptr = weights.indptr.tolist() + indptr = weights.indptr indices = weights.indices for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] @@ -601,8 +593,6 @@ def _regrid( elif n_masked: weights.data[np.arange(i0, i1)[mask]] = 0 - del indptr - elif method in ( "patch", "conservative_2nd", diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index d1996696bc..1844884224 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -569,17 +569,18 @@ return the `esmpy.Regrid` instance that defines the regridding operation.""", # dst_grid_partitions - "{{dst_grid_partitions: `int` or `str`, optional}}": """rdst_grid_partitions: `int` or `str`, optional - Calculating the weights matrix for grids with very large - numbers of grid points can potentially require more memory - than is available. However, this memory requirement can be - greatly reduced by calculating weights separately for + "{{dst_grid_partitions: `int` or `str`, optional}}": """dst_grid_partitions: `int` or `str`, optional + Calculating the weights matrix for grids with a very large + number of source and or destination grid points can + potentially require more memory than is + available. However, the memory requirement can be greatly + reduced by calculating weights separately for non-overlapping partitions of the destination grid, and then combining the weights from each partition to create the final weights matrix. The more partitions there are, the smaller the memory requirement for the weights - calculations will be, at the expense of the weights - calculations taking longer. + calculations, at the expense of the weights calculations + taking longer. The *dst_grid_partitions* parameter sets the number of destination grid partitions for the weights diff --git a/cf/field.py b/cf/field.py index 5c0f0b41bb..d37e11f3c0 100644 --- a/cf/field.py +++ b/cf/field.py @@ -13198,6 +13198,10 @@ def regrids( {{dst_grid_partitions: `int` or `str`, optional}} + The maximum possible number of partitions is equal to + the size of the destination grid Y axis for 2-d + regridding, or the Z axis for 3-d regridding. + .. versionadded:: NEXTVERSION axis_order: sequence, optional @@ -13502,6 +13506,11 @@ def regridc( {{dst_grid_partitions: `int` or `str`, optional}} + Partitioning is only available for 2-d or 3-d + regridding. The maximum possible number of partitions + is equal to the size of the first of the destination + grid axes specified by the *axes* parameter. + .. versionadded:: NEXTVERSION {{verbose: `int` or `str` or `None`, optional}} diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 546f19f8f7..d08c04438b 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -1,7 +1,7 @@ import logging import numpy as np -from cfdm import is_log_level_debug, is_log_level_info +from cfdm import is_log_level_info from ..data import Data from ..decorators import ( @@ -81,8 +81,6 @@ def __getitem__(self, indices): else: findices = tuple(indices) - cname = self.__class__.__name__ - data = self.get_data(None, _fill_value=False) if data is not None: new_data = data[findices] diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 45fe6816ed..b26c3b4a76 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -418,7 +418,7 @@ def regrid( if method in ("conservative_2nd", "nearest_dtos"): raise ValueError( f"Can't use destination grid partitioning for {method!r} " - "regridding. Got dst_grid_partitions={dst_grid_partitions!r}" + f"regridding. Got dst_grid_partitions={dst_grid_partitions!r}" ) if return_esmpy_regrid_operator: @@ -638,8 +638,8 @@ def regrid( esmpy_regrid_operator = [] if return_esmpy_regrid_operator else None - partitioned_dst_grid = ( - partitions(dst_grid, dst_grid_partitions, return_n=True) > 1 + dst_grid_partitions = partitions( + dst_grid, dst_grid_partitions, return_n=True ) # Create regrid weights @@ -653,7 +653,7 @@ def regrid( quarter=src_grid.dummy_size_2_dimension, esmpy_regrid_operator=esmpy_regrid_operator, weights_file=weights_file, - partitioned_dst_grid=partitioned_dst_grid, + dst_grid_partitions=dst_grid_partitions, ) if return_esmpy_regrid_operator: @@ -719,14 +719,14 @@ def regrid( if return_operator: if regrid_operator.weights is not None: regrid_operator.tosparse() - + if debug: logger.debug( "Sparse weights array for all partitions:\n" f"{regrid_operator.weights!r}\n" f"{regrid_operator.weights.__dict__}" ) # pragma: no cover - + return regrid_operator # ---------------------------------------------------------------- @@ -747,7 +747,7 @@ def regrid( src_grid.axis_indices, dst_grid.shape ) } - + elif src_grid.n_regrid_axes == 1: # Fewer source grid axes than destination grid axes (e.g. mesh # regridded to lat/lon). @@ -2453,7 +2453,7 @@ def create_esmpy_weights( quarter=False, esmpy_regrid_operator=None, weights_file=None, - partitioned_dst_grid=False, + dst_grid_partitions=1, ): """Create the `esmpy` regridding weights. @@ -2508,33 +2508,41 @@ def create_esmpy_weights( .. versionadded:: 3.15.2 - partitioned_dst_grid: `bool`, optional - Whether or not the destination grid has been partitioned. + dst_grid_partitions: `int`, optional + The number of destination grid partitions. .. versionadded:: NEXTVERSION :Returns: 5-`tuple` + * weights: Either the 1-d `numpy` array of the regridding - weights. Or `None` if the regridding weights are to - be read from a file. - * row: The 1-d `numpy` array of the row indices of the - regridding weights in the dense weights matrix, - which has J rows and I columns, where J and I are - the total number of cells in the destination and - source grids respectively. The start index is 1. Or - `None` if the indices are to be read from a file. - * col: The 1-d `numpy` array of column indices of the - regridding weights in the dense weights matrix, + weights. Or a CSR sparse array of the regridding + weights. Or `None` if the weights are to be read + from, or written to, a file. + + * row: The 1-d `numpy` array of the destination/row + indices of the weights in the dense weights matrix, which has J rows and I columns, where J and I are the total number of cells in the destination and - source grids respectively. The start index is 1. Or - `None` if the indices are to be read from a file. + source grids respectively. Or `None` if *weights* + is a CSR sparse array, or the weights are to be + read from a file. + + * col: The 1-d `numpy` array of source/column indices of + the weights in the dense weights matrix, which has + J rows and I columns, where J and I are the total + number of cells in the destination and source grids + respectively. Or `None` if *weights* is a CSR + sparse array, or the weights are to be read from a + file. + * start_index: The non-negative integer start index of the row and column indices. - * from_file: `True` if the weights were read from a file, - otherwise `False`. + + * from_file: `True` if the weights were read from, or + written to, a file. Otherwise `False`. """ from cfdm import integer_dtype @@ -2542,7 +2550,7 @@ def create_esmpy_weights( debug = is_log_level_debug(logger) start_index = 1 - + compute_weights = True if esmpy_regrid_operator is None and weights_file is not None: from os.path import isfile @@ -2559,17 +2567,20 @@ def create_esmpy_weights( # If we're returning the esmpy regrid operator, we need to # create it by computing the weights. compute_weights = True - + from_file = True - if compute_weights: # or esmpy_regrid_operator is not None: + if compute_weights: # or esmpy_regrid_operator is not None: + partitioned_dst_grid = dst_grid_partitions > 1 if debug: start_time0 = time() logger.debug( "Calculating weights ...\n\n" "Free memory before calculation of weights: " - f"{free_memory()/(2**30)} GiB\n" + f"{free_memory() / (2**30)} GiB\n\n" + "Number of destination grid partitions: " + f"{dst_grid_partitions}\n" ) # pragma: no cover - + # Create the weights using ESMF from_file = False @@ -2599,9 +2610,8 @@ def create_esmpy_weights( # Create source esmpy field src_esmpy_grid = src_esmpy_grids[0] if debug: - klass = src_esmpy_grid.__class__.__name__ logger.debug( - f"Source ESMF {src_esmpy_grid}\n" + f"Source ESMF {src_esmpy_grid.__class__.__name__}\n" ) # pragma: no cover src_esmpy_field = esmpy.Field( @@ -2613,26 +2623,25 @@ def create_esmpy_weights( if partitioned_dst_grid: from scipy.sparse import csr_array, vstack - + # Initialise the list of the sparse weights arrays for # each destination grid partition w = [] # Loop round destination grid partitions if debug: - import tracemalloc - start_time = time() - + start_time = time() # pragma: no cover + for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): if debug: - klass = dst_esmpy_grid.__class__.__name__ logger.debug( - f"Partition {i}: Time taken to create ESMF {klass}: " + f"Partition {i}: Time taken to create ESMF " + f"{dst_esmpy_grid.__class__.__name__}: " f"{time() - start_time} s\n" f"Partition {i}: Destination ESMF {dst_esmpy_grid}" ) # pragma: no cover - start_time = time() - + start_time = time() # pragma: no cover + print(dst_esmpy_grid.size[0]) # Create destination esmpy field dst_esmpy_field = esmpy.Field( dst_esmpy_grid, name="dst", meshloc=dst_meshloc @@ -2666,7 +2675,7 @@ def create_esmpy_weights( f"Partition {i}: Time taken by ESMF to create weights: " f"{time() - start_time} s\n" ) # pragma: no cover - start_time = time() + start_time = time() # pragma: no cover ESMF_unmapped_action = r.unmapped_action ESMF_ignore_degenerate = int(r.ignore_degenerate) @@ -2712,7 +2721,7 @@ def create_esmpy_weights( ) w.append(weights) del row, col - + if debug: logger.debug( f"Partition {i}: Time taken to create sparse weights " @@ -2720,14 +2729,14 @@ def create_esmpy_weights( f"Partition {i}: Sparse weights array: {weights!r}" ) # pragma: no cover start_time = time() - + if debug: logger.debug( f"Partition {i}: Free memory after weights calculation: " - f"{free_memory()/(2**30)} GiB\n" + f"{free_memory() / (2**30)} GiB\n" ) # pragma: no cover - start_time = time() - + start_time = time() # pragma: no cover + if esmpy_regrid_operator is None: # Destroy esmpy objects that are no longer needed src_esmpy_grid.destroy() @@ -2749,17 +2758,17 @@ def create_esmpy_weights( f"Time taken to concatenate sparse weights arrays: " f"{time() - start_time} s\n" f"Free memory after concatenation of sparse weights " - f"arrays: {free_memory()/(2**30)} GiB\n" + f"arrays: {free_memory() / (2**30)} GiB\n" f"Sparse weights array for all partitions: {weights!r}\n" - ) # pragma: no - start_time = time() + ) # pragma: no + start_time = time() # pragma: no cover if debug: logger.debug( "Total time taken to calculate all weights: " f"{time() - start_time0} s\n" "Free memory after calculation of all weights: " - f"{free_memory()/(2**30)} GiB\n" + f"{free_memory() / (2**30)} GiB\n" ) # pragma: no cover if weights_file is not None: @@ -2767,7 +2776,7 @@ def create_esmpy_weights( # dimension and variable names and structure of a weights # file created by ESMF). # - # Be careful with memory, by using `netCDF4.Dataset.sync` + # Be careful with memory by using `netCDF4.Dataset.sync` # and `del`, because the weights may be large, and keeping # copies of them in memory may not be possible. from cfdm.data.locks import netcdf_lock @@ -2778,10 +2787,10 @@ def create_esmpy_weights( from_file = True if partitioned_dst_grid: - # 'weights' is a CSR sparse array, so we have to infer - # the row and column arrays from it. + # 'weights' is a CSR sparse array, so we have to get + # from it 'row', 'col', and weights as a numpy array. row, col = weights.tocoo(copy=False).coords - weights = weights.data + weights = weights.data if start_index: # 'row' and 'col' come out of `tocoo().coords` as # zero-based values @@ -2822,9 +2831,7 @@ def create_esmpy_weights( v.long_name = "Destination grid shape" v[...] = dst_grid.esmpy_shape - v = nc.createVariable( - "S", weights.dtype, ("n_s",), zlib=True - ) + v = nc.createVariable("S", weights.dtype, ("n_s",), zlib=True) v.long_name = "Weights values" v[...] = weights @@ -2852,16 +2859,16 @@ def create_esmpy_weights( f"Time taken to create weights file {weights_file}: " f"{time() - start_time} s\n" f"Free memory after creation of weights file: " - f"{free_memory()/(2**30)} GiB\n" + f"{free_memory() / (2**30)} GiB\n" ) # pragma: no cover - start_time = time() - + start_time = time() # pragma: no cover + weights = None row = None col = None if esmpy_regrid_operator is not None: - # Make the Regrid instance available via the + # Make the `esmpy.Regrid` instance available via the # 'esmpy_regrid_operator' list esmpy_regrid_operator.append(r) @@ -3506,7 +3513,8 @@ def partitions(grid, grid_partitions, return_n=False): return (None,) - # Somewhere in the range [1, maximum] number of partitions + # Partition size: Somewhere in the range [1, maximum number of + # partitions] size = ceil(shape[-1] / grid_partitions) if return_n: diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index 0654cbc00f..173e53192f 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -817,8 +817,8 @@ def tosparse(self): start_index = self.start_index col_start_index = None row_start_index = None - - if weights is None: + + if weights is None: weights_file = self.weights_file if weights_file is not None: # Read the weights from the weights file @@ -866,7 +866,7 @@ def tosparse(self): (weights, (row, col)), shape=[dst_size, src_size] ) del row, col - + self._set_component("weights", weights, copy=False) self._set_component("row", None, copy=False) self._set_component("col", None, copy=False) From eca9fb8ee5d6b472e621d41f36a060dbe3db5638 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 15:57:55 +0100 Subject: [PATCH 33/54] dev --- Changelog.rst | 11 +++++++++++ README.md | 10 +++------- cf/docstring/docstring.py | 14 ++++++++++---- cf/field.py | 2 +- cf/regrid/regrid.py | 5 +++++ setup.py | 12 ++++-------- 6 files changed, 34 insertions(+), 20 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index a1589f3cfb..424debf5c2 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -1,3 +1,14 @@ +Version 3.NEXTVERSION +-------------- + +**2025-??-??** + +* Allow regridding for very large grid. New keyword parameter to + `cf.Field.regrids` and `cf.Field.regridc`: ``dst_grid_partitions`` + (https://github.com/NCAS-CMS/cf-python/issues/878) + +---- + Version 3.18.0 -------------- diff --git a/README.md b/README.md index ce752fd0af..d422dc0a07 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ of its array manipulation and can: * regrid structured grid, mesh and DSG field constructs with (multi-)linear, nearest neighbour, first- and second-order conservative and higher order patch recovery methods, including 3-d - regridding, + regridding, and large-grid support, * apply convolution filters to field constructs, * create running means from field constructs, * apply differential operators to field constructs, @@ -125,12 +125,8 @@ Visualization Powerful and flexible visualizations of `cf` field constructs, designed to be produced and configured in as few lines of code as possible, are available with the [cf-plot -package](https://ncas-cms.github.io/cf-plot/build/index.html), which -needs to be installed separately to the `cf` package. - -See the [cf-plot -gallery](https://ncas-cms.github.io/cf-plot/build/gallery.html) for a -range of plotting possibilities with example code. +package](https://ncas-cms.github.io/cf-plot), which needs to be +installed separately to the `cf` package. ![Example outputs of cf-plot displaying selected aspects of `cf` field constructs](https://raw.githubusercontent.com/NCAS-CMS/cf-plot/master/docs/source/images/cf_gallery_image.png) diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 1844884224..3f89e8ccc8 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -440,9 +440,10 @@ **Performance** - The computation of the weights can be much more costly - than the regridding itself, in which case reading - pre-calculated weights can improve performance. + The computation of the weights can take much longer, + and take much more memory, than the regridding itself, + in which case reading pre-calculated weights can + improve performance. Ignored if *dst* is a `RegridOperator`.""", # aggregated_units @@ -590,7 +591,12 @@ ``'maximum'`` is given then the largest possible number of partitions of the destination grid will be used. A positive integer specifies the exact number of partitions, - capped by the maximum allowed.""", + capped by the maximum allowed. + + Note that if setting *dst_grid_partitions* is a necessity + in allow the regridding to work, then it is worth + considering storing the weights in file via the + *weights_file* parameter.""", # ---------------------------------------------------------------- # Method description substitutions (4 levels of indentation) # ---------------------------------------------------------------- diff --git a/cf/field.py b/cf/field.py index d37e11f3c0..a8405d8509 100644 --- a/cf/field.py +++ b/cf/field.py @@ -13597,7 +13597,7 @@ def regridc( src_cyclic=False, dst_cyclic=False, use_src_mask=use_src_mask, - use_dst_mask=use_dst_mask, + use_dst_mask=use_dst_mask, axes=axes, ignore_degenerate=ignore_degenerate, return_operator=return_operator, diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index b26c3b4a76..429a6b535f 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -726,6 +726,11 @@ def regrid( f"{regrid_operator.weights!r}\n" f"{regrid_operator.weights.__dict__}" ) # pragma: no cover + print( + regrid_operator.weights.data.size, + regrid_operator.weights.indptr.size, + regrid_operator.weights.indices.size, + ) return regrid_operator diff --git a/setup.py b/setup.py index 495dfa08a8..aef989624a 100755 --- a/setup.py +++ b/setup.py @@ -156,13 +156,9 @@ def compile(): ============= Powerful, flexible, and very simple to produce visualizations of field -constructs are available with the -`cf-plot `_ package, that -needs to be installed seprately to the ``cf`` package. - -See the `cfplot gallery -`_ for the full range -of plotting possibilities with example code. +constructs are available with the `cf-plot +`_ package, that needs to be +installed seprately to the ``cf`` package. Functionality ============= @@ -219,7 +215,7 @@ def compile(): * regrid structured grid, mesh and DSG field constructs with (multi-)linear, nearest neighbour, first- and second-order conservative and higher order patch recovery methods, including 3-d - regridding, + regridding, and large-grid support, * apply convolution filters to field constructs, From 4af2ba4cbb42fb3060189fb7b07542e32a00ae2b Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 21 Jul 2025 16:28:34 +0100 Subject: [PATCH 34/54] dev --- Changelog.rst | 1 + cf/test/test_functions.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/Changelog.rst b/Changelog.rst index 31ecc60888..7b6d1b878f 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -1,4 +1,5 @@ Version NEXTVERSION +-------------- **2025-??-??** diff --git a/cf/test/test_functions.py b/cf/test/test_functions.py index 7ddf71d0e0..1a26a1afdc 100644 --- a/cf/test/test_functions.py +++ b/cf/test/test_functions.py @@ -38,7 +38,6 @@ def test_keyword_deprecation(self): def test_aliases(self): self.assertEqual(cf.log_level(), cf.LOG_LEVEL()) - self.assertEqual(cf.free_memory(), cf.FREE_MEMORY()) self.assertEqual(cf.total_memory(), cf.TOTAL_MEMORY()) self.assertEqual(cf.regrid_logging(), cf.REGRID_LOGGING()) self.assertEqual(cf.relaxed_identities(), cf.RELAXED_IDENTITIES()) From 89b048358f9580558fb551737d440e7f5fa49dfb Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 23 Jul 2025 12:02:53 +0100 Subject: [PATCH 35/54] dev --- cf/data/dask_regrid.py | 3 +++ cf/field.py | 3 ++- cf/mixin/propertiesdatabounds.py | 1 + cf/test/test_regrid_partitions.py | 2 ++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 6b642d0f79..1efe0d0d6a 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -513,6 +513,7 @@ def _regrid( indptr = weights.indptr indices = weights.indices data = weights.data + # REVIEW: The above change saves a lot of memory, and only slow things down a little bit for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] n_masked = count_nonzero(mask) @@ -554,6 +555,7 @@ def _regrid( indptr = weights.indptr indices = weights.indices data = weights.data + # REVIEW: The above changes save a lot of memory, and only slow things down a little bit for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] if not count_nonzero(mask): @@ -585,6 +587,7 @@ def _regrid( count_nonzero = np.count_nonzero indptr = weights.indptr indices = weights.indices + # REVIEW: The above change saves a lot of memory, and only slow things down a little bit for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] n_masked = count_nonzero(mask) diff --git a/cf/field.py b/cf/field.py index a8405d8509..ddc3abdae7 100644 --- a/cf/field.py +++ b/cf/field.py @@ -382,6 +382,7 @@ def __getitem__(self, indices): (6, 4, 3) """ + # REVIEW: the debug statements don't seem necessary anymore (the code has worked for years!), and they just clutter things up when verbose=-1 if indices is Ellipsis: return self.copy() @@ -13597,7 +13598,7 @@ def regridc( src_cyclic=False, dst_cyclic=False, use_src_mask=use_src_mask, - use_dst_mask=use_dst_mask, + use_dst_mask=use_dst_mask, axes=axes, ignore_degenerate=ignore_degenerate, return_operator=return_operator, diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index cafb8bb3f7..50fbee5759 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -42,6 +42,7 @@ def __getitem__(self, indices): x.__getitem__(indices) <==> x[indices] """ + # REVIEW: the debug statements don't seem necessary anymore (the code has worked for years!), and they just clutter things up when verbose=-1 if indices is Ellipsis: return self.copy() diff --git a/cf/test/test_regrid_partitions.py b/cf/test/test_regrid_partitions.py index 6aa7afd0f4..f3d400704d 100644 --- a/cf/test/test_regrid_partitions.py +++ b/cf/test/test_regrid_partitions.py @@ -133,10 +133,12 @@ def test_Field_regrid_partitions_cannot_partition(self): src = self.src_grid dst = self.dst_grid for n in (2, "maximum"): + # Can't partition for particular regrid methods for method in invalid_methods: with self.assertRaises(ValueError): src.regrids(dst, method=method, dst_grid_partitions=n) + # Can't partition when return_esmpy_regrid_operator=True with self.assertRaises(ValueError): src.regrids( dst, From 027c4d331c8d6c64079fe0f65dda7ca3446edb11 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 24 Jul 2025 15:30:22 +0100 Subject: [PATCH 36/54] dev --- cf/docstring/docstring.py | 30 +++++++++----- cf/field.py | 17 ++++---- cf/regrid/regrid.py | 68 ++++++++++++++++--------------- cf/test/test_regrid_partitions.py | 4 ++ 4 files changed, 68 insertions(+), 51 deletions(-) diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 3f89e8ccc8..8c656901c8 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -586,17 +586,25 @@ The *dst_grid_partitions* parameter sets the number of destination grid partitions for the weights calculations. The default value is ``1``, i.e. one - partition for the entire grid, that maximises memory usage - and minimises the weights calculation time. If the string - ``'maximum'`` is given then the largest possible number of - partitions of the destination grid will be used. A - positive integer specifies the exact number of partitions, - capped by the maximum allowed. - - Note that if setting *dst_grid_partitions* is a necessity - in allow the regridding to work, then it is worth - considering storing the weights in file via the - *weights_file* parameter.""", + partition for the entire destination grid, maximising + memory usage and minimising the calculation time. If the + string ``'maximum'`` is given then the largest possible + number of partitions of the destination grid will be used, + minimising memory usage and maximising the calculation + time. A positive integer specifies the exact number of + partitions, capped by the maximum allowed, allowing the + balance between memory usage and calculation time to be + adjusted. + + The actual number of destination grid partitions, and + their sizes and shapes, is displayed when ``'DEBUG'`` + logging is activated. See *verbose* for details. + + .. note:: If setting *dst_grid_partitions* is required for + the regridding to work, then it is worth + considering storing the weights in a file for + fast future access, via the *weights_file* + parameter.""", # ---------------------------------------------------------------- # Method description substitutions (4 levels of indentation) # ---------------------------------------------------------------- diff --git a/cf/field.py b/cf/field.py index a8405d8509..857ad5aeee 100644 --- a/cf/field.py +++ b/cf/field.py @@ -13198,9 +13198,12 @@ def regrids( {{dst_grid_partitions: `int` or `str`, optional}} - The maximum possible number of partitions is equal to - the size of the destination grid Y axis for 2-d - regridding, or the Z axis for 3-d regridding. + The maximum number of partitions, Nmax, depends on the + nature of the destination grid: If the Z axis is being + regridded, Nmax = the size of the Z axis. For a 2-d + structured grid, Nmax = the size of the Y axis. For a + UGRID, HEALPix, or DSG grid, Nmax = the size of the + horizontal discrete axis. .. versionadded:: NEXTVERSION @@ -13507,9 +13510,9 @@ def regridc( {{dst_grid_partitions: `int` or `str`, optional}} Partitioning is only available for 2-d or 3-d - regridding. The maximum possible number of partitions - is equal to the size of the first of the destination - grid axes specified by the *axes* parameter. + regridding. The maximum number of partitions is the + size of the first of the destination grid axes + specified by the *axes* parameter. .. versionadded:: NEXTVERSION @@ -13597,7 +13600,7 @@ def regridc( src_cyclic=False, dst_cyclic=False, use_src_mask=use_src_mask, - use_dst_mask=use_dst_mask, + use_dst_mask=use_dst_mask, axes=axes, ignore_degenerate=ignore_degenerate, return_operator=return_operator, diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 429a6b535f..440a50763c 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -726,11 +726,6 @@ def regrid( f"{regrid_operator.weights!r}\n" f"{regrid_operator.weights.__dict__}" ) # pragma: no cover - print( - regrid_operator.weights.data.size, - regrid_operator.weights.indptr.size, - regrid_operator.weights.indices.size, - ) return regrid_operator @@ -1984,11 +1979,13 @@ def create_esmpy_grid(grid, mask=None, grid_partitions=1): spherical = False coord_sys = esmpy.CoordSys.CART + # Create the esmpy.Grid for each partition for i, partition in enumerate(partitions(grid, grid_partitions)): coords = grid.coords[:] bounds = grid.bounds[:] if partition is not None: + # Subspace the coordinates for this partition if debug: logger.debug( f"Partition {i}: Index: {partition}" @@ -2262,33 +2259,37 @@ def create_esmpy_mesh(grid, mask=None, grid_partitions=1): # Cartesian coord_sys = esmpy.CoordSys.CART + # Create the esmpy.Mesh for each partition for i, partition in enumerate(partitions(grid, grid_partitions)): - # Create an empty esmpy.Mesh for this partition + # Initialise the esmpy.Mesh esmpy_mesh = esmpy.Mesh( parametric_dim=2, spatial_dim=2, coord_sys=coord_sys ) + grid_coords = grid.coords + grid_bounds = grid.bounds domain_topology = grid.domain_topology if partition is not None: + # Subspace the coordinates and domain topology for this + # partition if debug: logger.debug( f"Partition {i} index: {partition}" ) # pragma: no cover + # All coordinates and the domain topology span the same + # discrete axis + grid_coords = [c[partition] for c in grid_coords] + grid_bounds = [b[partition] for b in grid_bounds] domain_topology = domain_topology[partition] - element_conn = domain_topology.normalise().array + element_conn = domain_topology.normalise(start_index=0).array element_count = element_conn.shape[0] element_types = np.ma.count(element_conn, axis=1) element_conn = np.ma.compressed(element_conn) # Element coordinates - grid_coords = grid.coords if grid_coords: - if partition is not None: - # All coordinates span the same discrete axis - grid_coords = [c[partition] for c in grid_coords] - try: element_coords = [c.array for c in grid_coords] except AttributeError: @@ -2299,22 +2300,15 @@ def create_esmpy_mesh(grid, mask=None, grid_partitions=1): else: element_coords = None - grid_bounds = grid.bounds - if partition is not None: - # All coordinates span the same discrete axis - grid_bounds = [b[partition] for b in grid_bounds] - node_ids, index = np.unique(element_conn, return_index=True) node_coords = [b.data.compressed().array[index] for b in grid_bounds] node_coords = np.stack(node_coords, axis=-1) node_count = node_ids.size - node_owners = np.zeros(node_count) + node_owners = np.zeros(node_count, "int32") - # Make sure that node IDs are >= 1, as needed by newer versions of - # esmpy. - min_id = node_ids.min() - if min_id < 1: - node_ids = node_ids + min_id + 1 + # esmpy requires that node IDs are >= 1 (they're currently >= + # 0 due to using 'start_index=0' above) + node_ids += 1 # Add nodes. This must be done before `add_elements`. esmpy_mesh.add_nodes( @@ -2399,9 +2393,11 @@ def create_esmpy_locstream(grid, mask=None, grid_partitions=1): coord_sys = esmpy.CoordSys.CART keys = ("ESMF:X", "ESMF:Y", "ESMF:Z") + # Create the esmpy.LocStream for each partition for i, partition in enumerate(partitions(grid, grid_partitions)): coords = grid.coords if partition is not None: + # Subspace the coordinates for this partition if debug: logger.debug( f"Partition {i} index: {partition}" @@ -2633,10 +2629,10 @@ def create_esmpy_weights( # each destination grid partition w = [] - # Loop round destination grid partitions if debug: start_time = time() # pragma: no cover + # Loop round destination grid partitions for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): if debug: logger.debug( @@ -2646,7 +2642,7 @@ def create_esmpy_weights( f"Partition {i}: Destination ESMF {dst_esmpy_grid}" ) # pragma: no cover start_time = time() # pragma: no cover - print(dst_esmpy_grid.size[0]) + # Create destination esmpy field dst_esmpy_field = esmpy.Field( dst_esmpy_grid, name="dst", meshloc=dst_meshloc @@ -3451,8 +3447,7 @@ def set_grid_type(grid): def partitions(grid, grid_partitions, return_n=False): """Partitions of the grid. - Each partition is defined as an index to cell coordinates, which - may be used to create the actual partition of the grid. + Each partition is defined as an index to cell coordinates. Only a destinaton grid without a dummy size 2 dimension can be partitioned. @@ -3480,10 +3475,17 @@ def partitions(grid, grid_partitions, return_n=False): generator or `tuple` or `int` The partition specifications. Each partition specification - is a tuple of `slice` objects. When there is a single - partition that spans the entire grid, then the special - value of ``(None,)`` is returned. If *return_n* is True - then the number of partitions will be returned. + is a tuple of `slice` objects. Each of these tuples + contains a slice for each axis of the coordinate + constructs, i.e. for N-d coordinates, each tuple has N + elements. + + When there is a single partition that spans the entire + grid, then the special value of ``(None,)`` is + returned. + + If *return_n* is True then the integer number of + partitions will be returned, instead. """ if ( @@ -3518,8 +3520,8 @@ def partitions(grid, grid_partitions, return_n=False): return (None,) - # Partition size: Somewhere in the range [1, maximum number of - # partitions] + # Set the partition size to somewhere in the range [1, maximum + # number of partitions] size = ceil(shape[-1] / grid_partitions) if return_n: diff --git a/cf/test/test_regrid_partitions.py b/cf/test/test_regrid_partitions.py index 6aa7afd0f4..00cf6933c5 100644 --- a/cf/test/test_regrid_partitions.py +++ b/cf/test/test_regrid_partitions.py @@ -128,6 +128,10 @@ def test_Field_regrid_partitions_2d_grid_to_grid(self): self.assertTrue(r0.equal_weights(r1)) self.assertTrue(r0.equal_dst_mask(r1)) + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") + def test_Field_regrids_partitions_3d_grid_to_grid(self): + self.assertFalse(cf.regrid_logging()) + @unittest.skipUnless(esmpy_imported, "Requires esmpy/ESMF package.") def test_Field_regrid_partitions_cannot_partition(self): src = self.src_grid From 7bacca45c96d33bf0e5a56d830071a25f7c44cab Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 25 Jul 2025 11:55:08 +0100 Subject: [PATCH 37/54] dev --- cf/data/dask_regrid.py | 1 + cf/regrid/regrid.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index 1efe0d0d6a..d1b45f08a0 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -617,6 +617,7 @@ def _regrid( # ---------------------------------------------------------------- a = np.ma.getdata(a) a = weights.dot(a) + if dst_mask is not None: a = np.ma.array(a) a[dst_mask] = np.ma.masked diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 440a50763c..5c164a7fdf 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -638,6 +638,7 @@ def regrid( esmpy_regrid_operator = [] if return_esmpy_regrid_operator else None + # Convert dst_grid_partitions to a number dst_grid_partitions = partitions( dst_grid, dst_grid_partitions, return_n=True ) @@ -3447,7 +3448,9 @@ def set_grid_type(grid): def partitions(grid, grid_partitions, return_n=False): """Partitions of the grid. - Each partition is defined as an index to cell coordinates. + Each partition is defined as an index to cell coordinates. Only + the last (left-most) dimension in esmpy order is partitioned. Note + that the coordinates storded in `grid.coords` are in esmpy order. Only a destinaton grid without a dummy size 2 dimension can be partitioned. From ba7dc31ead4eb476e9d05c37ebf35ce988189474 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 25 Jul 2025 15:18:25 +0100 Subject: [PATCH 38/54] dev --- cf/regrid/regrid.py | 25 ++++++++++++++++++++----- cf/regrid/regridoperator.py | 5 +++-- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 5c164a7fdf..18e62ac6d6 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2811,12 +2811,20 @@ def create_esmpy_weights( f"with shape {src_grid.esmpy_shape} to destination " f"{dst_grid.type} with shape {dst_grid.esmpy_shape}" ) + nc.comment = "See https://earthsystemmodeling.org/docs/release/latest/ESMF_refdoc for information on the dataset structure" nc.source = f"cf v{__version__}, esmpy v{esmpy.__version__}" nc.history = f"Created at {datetime.now()}" - nc.regrid_method = regrid_method + nc.map_method = regrid_method + nc.domain_a = src_grid.type + nc.domain_b = dst_grid.type nc.ESMF_unmapped_action = ESMF_unmapped_action nc.ESMF_ignore_degenerate = ESMF_ignore_degenerate + # The number of source cells + nc.createDimension("n_a", src_size) + # The number of destination cells + nc.createDimension("n_b", dst_size) + # The number of entries in the regridding matrix nc.createDimension("n_s", weights.size) nc.createDimension("src_grid_rank", src_rank) nc.createDimension("dst_grid_rank", dst_rank) @@ -2834,21 +2842,28 @@ def create_esmpy_weights( v[...] = dst_grid.esmpy_shape v = nc.createVariable("S", weights.dtype, ("n_s",), zlib=True) - - v.long_name = "Weights values" + v.long_name = ( + "The weight for each entry in the regridding matrix" + ) v[...] = weights nc.sync() del weights v = nc.createVariable("row", row.dtype, ("n_s",), zlib=True) - v.long_name = "Destination/row indices" + v.long_name = ( + "The position in the destination grid for each entry " + "in the weight matrix" + ) v.start_index = start_index v[...] = row nc.sync() del row v = nc.createVariable("col", col.dtype, ("n_s",), zlib=True) - v.long_name = "Source/col indices" + v.long_name = ( + "The position in the source grid for each entry " + "in the regridding matrix" + ) v.start_index = start_index v[...] = col nc.sync() diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index 173e53192f..6852fecb24 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -687,16 +687,17 @@ def equal_weights(self, other, rtol=None, atol=None): """ if atol is None: - atol = cf_atol() + atol = float(cf_atol()) if rtol is None: - rtol = cf_rtol() + rtol = float(cf_rtol()) self.tosparse() other.tosparse() w0 = self.weights w1 = other.weights + return ( w0.shape == w1.shape and w0.data.shape == w1.data.shape From 292ee380f139d11d76440f41bedfae32aa3efd7f Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 29 Jul 2025 12:52:07 +0100 Subject: [PATCH 39/54] dev --- cf/data/dask_regrid.py | 3 --- cf/field.py | 1 - cf/mixin/propertiesdatabounds.py | 1 - 3 files changed, 5 deletions(-) diff --git a/cf/data/dask_regrid.py b/cf/data/dask_regrid.py index d1b45f08a0..2536c4c606 100644 --- a/cf/data/dask_regrid.py +++ b/cf/data/dask_regrid.py @@ -513,7 +513,6 @@ def _regrid( indptr = weights.indptr indices = weights.indices data = weights.data - # REVIEW: The above change saves a lot of memory, and only slow things down a little bit for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] n_masked = count_nonzero(mask) @@ -555,7 +554,6 @@ def _regrid( indptr = weights.indptr indices = weights.indices data = weights.data - # REVIEW: The above changes save a lot of memory, and only slow things down a little bit for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] if not count_nonzero(mask): @@ -587,7 +585,6 @@ def _regrid( count_nonzero = np.count_nonzero indptr = weights.indptr indices = weights.indices - # REVIEW: The above change saves a lot of memory, and only slow things down a little bit for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])): mask = src_mask[indices[i0:i1]] n_masked = count_nonzero(mask) diff --git a/cf/field.py b/cf/field.py index 6c3e0f0d4b..857ad5aeee 100644 --- a/cf/field.py +++ b/cf/field.py @@ -382,7 +382,6 @@ def __getitem__(self, indices): (6, 4, 3) """ - # REVIEW: the debug statements don't seem necessary anymore (the code has worked for years!), and they just clutter things up when verbose=-1 if indices is Ellipsis: return self.copy() diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 50fbee5759..cafb8bb3f7 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -42,7 +42,6 @@ def __getitem__(self, indices): x.__getitem__(indices) <==> x[indices] """ - # REVIEW: the debug statements don't seem necessary anymore (the code has worked for years!), and they just clutter things up when verbose=-1 if indices is Ellipsis: return self.copy() From 60ed1283388d07ac8f87434dac2bfe8ff6dcd3e1 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 4 Aug 2025 13:51:24 +0100 Subject: [PATCH 40/54] dev --- cf/docstring/docstring.py | 7 ++++--- cf/regrid/regrid.py | 33 ++++++++++++++++++++++++++------- cf/test/test_Data.py | 4 +++- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 55f3de560e..93293880dc 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -596,9 +596,10 @@ balance between memory usage and calculation time to be adjusted. - The actual number of destination grid partitions, and - their sizes and shapes, is displayed when ``'DEBUG'`` - logging is activated. See *verbose* for details. + The actual number of destination grid partitions, their + sizes and shapes, and the each partition's time and memory + requirement are displayed when ``'DEBUG'`` logging is + activated. See *verbose* for details. .. note:: If setting *dst_grid_partitions* is required for the regridding to work, then it is worth diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 18e62ac6d6..2dab8497da 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -638,7 +638,7 @@ def regrid( esmpy_regrid_operator = [] if return_esmpy_regrid_operator else None - # Convert dst_grid_partitions to a number + # Find the actual number of partitions dst_grid_partitions = partitions( dst_grid, dst_grid_partitions, return_n=True ) @@ -2574,6 +2574,8 @@ def create_esmpy_weights( if compute_weights: # or esmpy_regrid_operator is not None: partitioned_dst_grid = dst_grid_partitions > 1 if debug: + from resource import getrusage, RUSAGE_SELF + start_time0 = time() logger.debug( "Calculating weights ...\n\n" @@ -2655,6 +2657,9 @@ def create_esmpy_weights( mask_values = np.array([0], dtype="int32") # Create the esmpy.Regrid operator + if debug: + ru_maxrss0 = getrusage(RUSAGE_SELF).ru_maxrss + r = esmpy.Regrid( src_esmpy_field, dst_esmpy_field, @@ -2672,12 +2677,15 @@ def create_esmpy_weights( col = weights["col_src"] weights = weights["weights"] - if debug: - logger.debug( - f"Partition {i}: Time taken by ESMF to create weights: " - f"{time() - start_time} s\n" - ) # pragma: no cover - start_time = time() # pragma: no cover +# if debug: +# ru_maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss +# logger.debug( +# f"Partition {i}: Time taken by ESMF to create weights: " +# f"{time() - start_time} s\n" +# f"Partition {i}: Memory used by ESMF to create weights: " +# f"{(ru_maxrss1 - ru_maxrss0) * 1000/(2**30)} GiB" +# ) # pragma: no cover +# start_time = time() # pragma: no cover ESMF_unmapped_action = r.unmapped_action ESMF_ignore_degenerate = int(r.ignore_degenerate) @@ -2690,6 +2698,17 @@ def create_esmpy_weights( r.dstfield.destroy() r.destroy() + if debug: + ru_maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss + logger.debug( + f"Partition {i}: Time taken by ESMF to create weights: " + f"{time() - start_time} s\n" + f"Partition {i}: Memory used by ESMF to create weights: " + f"{(ru_maxrss1 - ru_maxrss0) * 1000/(2**30)} GiB" + ) # pragma: no cover + start_time = time() # pragma: no cover + + if quarter: # The weights were created with a dummy size 2 # dimension such that the weights for each dummy axis diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 5918fb79b6..2300a62e47 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -4592,7 +4592,9 @@ def test_Data_masked_values(self): d = cf.Data(array) e = d.masked_values(1.1) ea = e.array - a = np.ma.masked_values(array, 1.1, rtol=cf.rtol(), atol=cf.atol()) + a = np.ma.masked_values( + array, 1.1, rtol=float(cf.rtol()), atol=float(cf.atol()) + ) self.assertTrue(np.isclose(ea, a).all()) self.assertTrue((ea.mask == a.mask).all()) self.assertIsNone(d.masked_values(1.1, inplace=True)) From 07791080b390959572f75e4696499c9a616c84b3 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 4 Aug 2025 14:06:31 +0100 Subject: [PATCH 41/54] dev --- cf/regrid/regrid.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 2dab8497da..841906bc35 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2634,6 +2634,7 @@ def create_esmpy_weights( if debug: start_time = time() # pragma: no cover + ru_maxrss1 =None # Loop round destination grid partitions for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): @@ -2707,8 +2708,16 @@ def create_esmpy_weights( f"{(ru_maxrss1 - ru_maxrss0) * 1000/(2**30)} GiB" ) # pragma: no cover start_time = time() # pragma: no cover + if ru_maxrss1 is None: + ru_maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss + logger.debug( + f"Partition {i}: Memory used by ESMF to create " + f"weights: {(ru_maxrss1 - ru_maxrss0) * 1000/(2**30)} " + "GiB" + ) # pragma: no cover + + start_time = time() # pragma: no cover - if quarter: # The weights were created with a dummy size 2 # dimension such that the weights for each dummy axis From ffe2761f369e7bb94490dc718ad53d959360df0e Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 4 Aug 2025 14:08:16 +0100 Subject: [PATCH 42/54] dev --- cf/regrid/regrid.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 841906bc35..2082d3b171 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2700,7 +2700,6 @@ def create_esmpy_weights( r.destroy() if debug: - ru_maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss logger.debug( f"Partition {i}: Time taken by ESMF to create weights: " f"{time() - start_time} s\n" @@ -2708,7 +2707,7 @@ def create_esmpy_weights( f"{(ru_maxrss1 - ru_maxrss0) * 1000/(2**30)} GiB" ) # pragma: no cover start_time = time() # pragma: no cover - if ru_maxrss1 is None: + if ru_maxrss1 is None: ru_maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss logger.debug( f"Partition {i}: Memory used by ESMF to create " From ca1f80847a56948ba416752d77e0ff219aebd632 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 4 Aug 2025 14:14:25 +0100 Subject: [PATCH 43/54] dev --- cf/regrid/regrid.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 2082d3b171..51b7c21553 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2703,8 +2703,6 @@ def create_esmpy_weights( logger.debug( f"Partition {i}: Time taken by ESMF to create weights: " f"{time() - start_time} s\n" - f"Partition {i}: Memory used by ESMF to create weights: " - f"{(ru_maxrss1 - ru_maxrss0) * 1000/(2**30)} GiB" ) # pragma: no cover start_time = time() # pragma: no cover if ru_maxrss1 is None: From 77eeecd717c160070e6fb97ac08fa33744b70755 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 4 Aug 2025 14:18:54 +0100 Subject: [PATCH 44/54] dev --- cf/regrid/regrid.py | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 51b7c21553..88822f5c39 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2580,7 +2580,7 @@ def create_esmpy_weights( logger.debug( "Calculating weights ...\n\n" "Free memory before calculation of weights: " - f"{free_memory() / (2**30)} GiB\n\n" + f"{free_memory() / 2**30} GiB\n\n" "Number of destination grid partitions: " f"{dst_grid_partitions}\n" ) # pragma: no cover @@ -2634,7 +2634,7 @@ def create_esmpy_weights( if debug: start_time = time() # pragma: no cover - ru_maxrss1 =None + maxrss1 =None # Loop round destination grid partitions for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): @@ -2659,7 +2659,7 @@ def create_esmpy_weights( # Create the esmpy.Regrid operator if debug: - ru_maxrss0 = getrusage(RUSAGE_SELF).ru_maxrss + maxrss0 = getrusage(RUSAGE_SELF).ru_maxrss r = esmpy.Regrid( src_esmpy_field, @@ -2678,16 +2678,6 @@ def create_esmpy_weights( col = weights["col_src"] weights = weights["weights"] -# if debug: -# ru_maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss -# logger.debug( -# f"Partition {i}: Time taken by ESMF to create weights: " -# f"{time() - start_time} s\n" -# f"Partition {i}: Memory used by ESMF to create weights: " -# f"{(ru_maxrss1 - ru_maxrss0) * 1000/(2**30)} GiB" -# ) # pragma: no cover -# start_time = time() # pragma: no cover - ESMF_unmapped_action = r.unmapped_action ESMF_ignore_degenerate = int(r.ignore_degenerate) @@ -2702,15 +2692,14 @@ def create_esmpy_weights( if debug: logger.debug( f"Partition {i}: Time taken by ESMF to create weights: " - f"{time() - start_time} s\n" + f"{time() - start_time} s" ) # pragma: no cover start_time = time() # pragma: no cover - if ru_maxrss1 is None: - ru_maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss + if maxrss1 is None: + maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss logger.debug( f"Partition {i}: Memory used by ESMF to create " - f"weights: {(ru_maxrss1 - ru_maxrss0) * 1000/(2**30)} " - "GiB" + f"weights: {(maxrss1 - maxrss0) * 1000/2**30} GiB" ) # pragma: no cover start_time = time() # pragma: no cover @@ -2760,7 +2749,7 @@ def create_esmpy_weights( if debug: logger.debug( f"Partition {i}: Free memory after weights calculation: " - f"{free_memory() / (2**30)} GiB\n" + f"{free_memory() / 2**30} GiB\n" ) # pragma: no cover start_time = time() # pragma: no cover @@ -2785,7 +2774,7 @@ def create_esmpy_weights( f"Time taken to concatenate sparse weights arrays: " f"{time() - start_time} s\n" f"Free memory after concatenation of sparse weights " - f"arrays: {free_memory() / (2**30)} GiB\n" + f"arrays: {free_memory() / 2**30} GiB\n" f"Sparse weights array for all partitions: {weights!r}\n" ) # pragma: no start_time = time() # pragma: no cover @@ -2795,7 +2784,7 @@ def create_esmpy_weights( "Total time taken to calculate all weights: " f"{time() - start_time0} s\n" "Free memory after calculation of all weights: " - f"{free_memory() / (2**30)} GiB\n" + f"{free_memory() / 2**30} GiB\n" ) # pragma: no cover if weights_file is not None: @@ -2901,7 +2890,7 @@ def create_esmpy_weights( f"Time taken to create weights file {weights_file}: " f"{time() - start_time} s\n" f"Free memory after creation of weights file: " - f"{free_memory() / (2**30)} GiB\n" + f"{free_memory() / 2**30} GiB\n" ) # pragma: no cover start_time = time() # pragma: no cover From 3758f5f4fd97ed23a74dc9ac832868fb7f6bb602 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 4 Aug 2025 14:21:01 +0100 Subject: [PATCH 45/54] dev --- cf/regrid/regrid.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 88822f5c39..b134b76c23 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2574,8 +2574,8 @@ def create_esmpy_weights( if compute_weights: # or esmpy_regrid_operator is not None: partitioned_dst_grid = dst_grid_partitions > 1 if debug: - from resource import getrusage, RUSAGE_SELF - + from resource import RUSAGE_SELF, getrusage + start_time0 = time() logger.debug( "Calculating weights ...\n\n" @@ -2634,7 +2634,7 @@ def create_esmpy_weights( if debug: start_time = time() # pragma: no cover - maxrss1 =None + maxrss1 = None # Loop round destination grid partitions for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): @@ -2660,7 +2660,7 @@ def create_esmpy_weights( # Create the esmpy.Regrid operator if debug: maxrss0 = getrusage(RUSAGE_SELF).ru_maxrss - + r = esmpy.Regrid( src_esmpy_field, dst_esmpy_field, @@ -2696,12 +2696,12 @@ def create_esmpy_weights( ) # pragma: no cover start_time = time() # pragma: no cover if maxrss1 is None: - maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss + maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss logger.debug( f"Partition {i}: Memory used by ESMF to create " - f"weights: {(maxrss1 - maxrss0) * 1000/2**30} GiB" + f"weights: {(maxrss1 - maxrss0) * 1000 / 2**30} GiB" ) # pragma: no cover - + start_time = time() # pragma: no cover if quarter: From 72444308363ed55d46f8da338321cf445693c804 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 4 Aug 2025 15:38:55 +0100 Subject: [PATCH 46/54] dev --- cf/regrid/regrid.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index b134b76c23..51dcf5b999 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2577,12 +2577,16 @@ def create_esmpy_weights( from resource import RUSAGE_SELF, getrusage start_time0 = time() + maxrss = getrusage(RUSAGE_SELF).ru_maxrss logger.debug( "Calculating weights ...\n\n" - "Free memory before calculation of weights: " - f"{free_memory() / 2**30} GiB\n\n" "Number of destination grid partitions: " f"{dst_grid_partitions}\n" + "Free memory before calculation of weights: " + f"{free_memory() / 2**30} GiB\n" + "Maximum RSS before weights creation: " + f"{maxrss * 1000/ 2**30} GiB\n" + ) # pragma: no cover # Create the weights using ESMF @@ -2633,8 +2637,7 @@ def create_esmpy_weights( w = [] if debug: - start_time = time() # pragma: no cover - maxrss1 = None + start_time = time() # Loop round destination grid partitions for i, dst_esmpy_grid in enumerate(dst_esmpy_grids): @@ -2646,7 +2649,7 @@ def create_esmpy_weights( f"Partition {i}: Destination ESMF {dst_esmpy_grid}" ) # pragma: no cover start_time = time() # pragma: no cover - + # Create destination esmpy field dst_esmpy_field = esmpy.Field( dst_esmpy_grid, name="dst", meshloc=dst_meshloc @@ -2658,9 +2661,6 @@ def create_esmpy_weights( mask_values = np.array([0], dtype="int32") # Create the esmpy.Regrid operator - if debug: - maxrss0 = getrusage(RUSAGE_SELF).ru_maxrss - r = esmpy.Regrid( src_esmpy_field, dst_esmpy_field, @@ -2690,19 +2690,14 @@ def create_esmpy_weights( r.destroy() if debug: + maxrss = getrusage(RUSAGE_SELF).ru_maxrss logger.debug( f"Partition {i}: Time taken by ESMF to create weights: " - f"{time() - start_time} s" + f"{time() - start_time} s\n" + f"Partition {i}: Maximum RSS after ESMF weights creation: " + f"creation: {maxrss * 1000/ 2**30} GiB" ) # pragma: no cover start_time = time() # pragma: no cover - if maxrss1 is None: - maxrss1 = getrusage(RUSAGE_SELF).ru_maxrss - logger.debug( - f"Partition {i}: Memory used by ESMF to create " - f"weights: {(maxrss1 - maxrss0) * 1000 / 2**30} GiB" - ) # pragma: no cover - - start_time = time() # pragma: no cover if quarter: # The weights were created with a dummy size 2 From 339b6c3ed39c770463bc9738f84ec83884a52e81 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 4 Aug 2025 15:55:37 +0100 Subject: [PATCH 47/54] dev --- cf/regrid/regrid.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 51dcf5b999..8bbe8758d8 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -3,7 +3,6 @@ import logging from dataclasses import dataclass, field from datetime import datetime -from time import time from typing import Any import dask.array as da @@ -2575,18 +2574,18 @@ def create_esmpy_weights( partitioned_dst_grid = dst_grid_partitions > 1 if debug: from resource import RUSAGE_SELF, getrusage + from time import time - start_time0 = time() - maxrss = getrusage(RUSAGE_SELF).ru_maxrss + maxrss = getrusage(RUSAGE_SELF).ru_maxrss # pragma: no cover + start_time0 = time() # pragma: no cover logger.debug( "Calculating weights ...\n\n" "Number of destination grid partitions: " f"{dst_grid_partitions}\n" - "Free memory before calculation of weights: " + "Free memory before weights creation: " f"{free_memory() / 2**30} GiB\n" "Maximum RSS before weights creation: " f"{maxrss * 1000/ 2**30} GiB\n" - ) # pragma: no cover # Create the weights using ESMF @@ -2618,9 +2617,7 @@ def create_esmpy_weights( # Create source esmpy field src_esmpy_grid = src_esmpy_grids[0] if debug: - logger.debug( - f"Source ESMF {src_esmpy_grid.__class__.__name__}\n" - ) # pragma: no cover + logger.debug(f"Source ESMF {src_esmpy_grid}\n") # pragma: no cover src_esmpy_field = esmpy.Field( src_esmpy_grid, name="src", meshloc=src_meshloc @@ -2649,7 +2646,7 @@ def create_esmpy_weights( f"Partition {i}: Destination ESMF {dst_esmpy_grid}" ) # pragma: no cover start_time = time() # pragma: no cover - + # Create destination esmpy field dst_esmpy_field = esmpy.Field( dst_esmpy_grid, name="dst", meshloc=dst_meshloc @@ -2730,8 +2727,8 @@ def create_esmpy_weights( weights = csr_array( (weights, (row, col)), shape=[dst_size, src_size] ) - w.append(weights) del row, col + w.append(weights) if debug: logger.debug( From f2ca2d346f7bec4dfc6c71a830f6196bb3d5ce15 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Mon, 22 Sep 2025 12:55:09 +0100 Subject: [PATCH 48/54] dev --- docs/source/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 294b46dca0..945eec8118 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,6 @@ import cf - print("\ncf environment:") print("-----------------") cf.environment() From 3f2388609442c499e3fb6ec4376f179d9ccf1c9a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 16 Oct 2025 14:12:03 +0100 Subject: [PATCH 49/54] grammar Co-authored-by: Sadie L. Bartholomew --- cf/docstring/docstring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index bf19988105..dfd65e28d1 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -596,7 +596,7 @@ balance between memory usage and calculation time to be adjusted. - The actual number of destination grid partitions; and each + The actual number of destination grid partitions and each partition's shape, and weights calculation time and memory requirement are displayed when ``'DEBUG'`` logging is activated. See *verbose* for details. From 58d058bf4f6f19dea8bb1c440ed07a95112fd281 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 16 Oct 2025 14:15:26 +0100 Subject: [PATCH 50/54] Typos Co-authored-by: Sadie L. Bartholomew --- cf/regrid/regrid.py | 4 ++-- cf/regrid/regridoperator.py | 4 ++-- setup.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 8bbe8758d8..4279ea0272 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2000,7 +2000,7 @@ def create_esmpy_grid(grid, mask=None, grid_partitions=1): if bounds: bounds[-1] = bounds[-1][partition] else: - # All coordinates spans the same axes + # All coordinates span the same axes coords = [c[partition] for c in coords] if bounds: bounds = [b[partition] for b in bounds] @@ -3471,7 +3471,7 @@ def partitions(grid, grid_partitions, return_n=False): Each partition is defined as an index to cell coordinates. Only the last (left-most) dimension in esmpy order is partitioned. Note - that the coordinates storded in `grid.coords` are in esmpy order. + that the coordinates stored in `grid.coords` are in esmpy order. Only a destinaton grid without a dummy size 2 dimension can be partitioned. diff --git a/cf/regrid/regridoperator.py b/cf/regrid/regridoperator.py index 6852fecb24..fe6387acb9 100644 --- a/cf/regrid/regridoperator.py +++ b/cf/regrid/regridoperator.py @@ -649,8 +649,8 @@ def equal_dst_mask(self, other): :Returns: `bool` - True if the regrid operators have destination masks, - otherwise False. + True if the regrid operators have identical destination + masks, otherwise False. """ self.tosparse() diff --git a/setup.py b/setup.py index a1ff4f12e2..4779cc8219 100755 --- a/setup.py +++ b/setup.py @@ -158,7 +158,7 @@ def compile(): Powerful, flexible, and very simple to produce visualizations of field constructs are available with the `cf-plot `_ package, that needs to be -installed seprately to the ``cf`` package. +installed separately to the ``cf`` package. Functionality ============= From 3a97a1f346e9b91e61ecd4b2858d17981a9ed96e Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 16 Oct 2025 14:17:51 +0100 Subject: [PATCH 51/54] remove old development comment --- cf/regrid/regrid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 4279ea0272..2ccfee2291 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -2570,7 +2570,7 @@ def create_esmpy_weights( compute_weights = True from_file = True - if compute_weights: # or esmpy_regrid_operator is not None: + if compute_weights: partitioned_dst_grid = dst_grid_partitions > 1 if debug: from resource import RUSAGE_SELF, getrusage From 5decf3fe1152fdf916cc949567cc22bb54f777dd Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 16 Oct 2025 15:42:48 +0100 Subject: [PATCH 52/54] report requested dst_grid_partitions --- cf/regrid/regrid.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 2ccfee2291..95597e1c68 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -638,8 +638,9 @@ def regrid( esmpy_regrid_operator = [] if return_esmpy_regrid_operator else None # Find the actual number of partitions + requested_dst_grid_partitions = dst_grid_partitions dst_grid_partitions = partitions( - dst_grid, dst_grid_partitions, return_n=True + dst_grid, requested_dst_grid_partitions, return_n=True ) # Create regrid weights @@ -654,6 +655,7 @@ def regrid( esmpy_regrid_operator=esmpy_regrid_operator, weights_file=weights_file, dst_grid_partitions=dst_grid_partitions, + requested_dst_grid_partitions=requested_dst_grid_partitions, ) if return_esmpy_regrid_operator: @@ -2455,6 +2457,7 @@ def create_esmpy_weights( esmpy_regrid_operator=None, weights_file=None, dst_grid_partitions=1, + requested_dst_grid_partitions=1, ): """Create the `esmpy` regridding weights. @@ -2510,7 +2513,14 @@ def create_esmpy_weights( .. versionadded:: 3.15.2 dst_grid_partitions: `int`, optional - The number of destination grid partitions. + The actual number of destination grid partitions to be + used. + + .. versionadded:: NEXTVERSION + + requested_dst_grid_partitions: `int` or `str`, optional + The requested number of destination grid + partitions. Either an integer, or ``'maximum'``. .. versionadded:: NEXTVERSION @@ -2581,11 +2591,12 @@ def create_esmpy_weights( logger.debug( "Calculating weights ...\n\n" "Number of destination grid partitions: " - f"{dst_grid_partitions}\n" + f"{dst_grid_partitions} " + f"({requested_dst_grid_partitions!r} requested)\n" "Free memory before weights creation: " f"{free_memory() / 2**30} GiB\n" "Maximum RSS before weights creation: " - f"{maxrss * 1000/ 2**30} GiB\n" + f"{maxrss * 1000 / 2**30} GiB\n" ) # pragma: no cover # Create the weights using ESMF @@ -2692,7 +2703,7 @@ def create_esmpy_weights( f"Partition {i}: Time taken by ESMF to create weights: " f"{time() - start_time} s\n" f"Partition {i}: Maximum RSS after ESMF weights creation: " - f"creation: {maxrss * 1000/ 2**30} GiB" + f"creation: {maxrss * 1000 / 2**30} GiB" ) # pragma: no cover start_time = time() # pragma: no cover From 730c47a4a2bf5f363dfceb97d5c05746ee7cc3a6 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 16 Oct 2025 15:54:14 +0100 Subject: [PATCH 53/54] tidy --- cf/test/test_Field.py | 5 +++-- cf/test/test_RegridOperator.py | 3 +-- cf/test/test_read_write.py | 10 ++++++---- cf/test/test_style.py | 5 +++-- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index 92a72ff8a8..52fc191a3b 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -1,12 +1,12 @@ import atexit import datetime import faulthandler -from importlib.util import find_spec import itertools import os import re import tempfile import unittest +from importlib.util import find_spec import numpy import numpy as np @@ -1154,7 +1154,8 @@ def test_Field_insert_dimension(self): f.insert_dimension(1, "qwerty") @unittest.skipUnless( - find_spec("matplotlib"), "matplotlib required but not installed") + find_spec("matplotlib"), "matplotlib required but not installed" + ) def test_Field_indices(self): f = cf.read(self.filename)[0] diff --git a/cf/test/test_RegridOperator.py b/cf/test/test_RegridOperator.py index 25581e74aa..28cf79f215 100644 --- a/cf/test/test_RegridOperator.py +++ b/cf/test/test_RegridOperator.py @@ -1,13 +1,12 @@ import datetime import faulthandler -from importlib.util import find_spec import unittest +from importlib.util import find_spec faulthandler.enable() # to debug seg faults and timeouts import cf - # ESMF renamed its Python module to `esmpy` at ESMF version 8.4.0. Allow # either for now for backwards compatibility. esmpy_imported = False diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index 2810c335ed..93d58c1d2e 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -4,7 +4,6 @@ import inspect import os import shutil -import shutil import subprocess import tempfile import unittest @@ -644,7 +643,8 @@ def test_read_write_unlimited(self): self.assertTrue(domain_axes["domainaxis2"].nc_is_unlimited()) @unittest.skipUnless( - shutil.which("ncdump"), "ncdump required - install nco") + shutil.which("ncdump"), "ncdump required - install nco" + ) def test_read_CDL(self): subprocess.run( " ".join(["ncdump", self.filename, ">", tmpfile]), @@ -707,7 +707,8 @@ def test_read_CDL(self): cf.read("test_read_write.py") @unittest.skipUnless( - shutil.which("ncdump"), "ncdump required - install nco") + shutil.which("ncdump"), "ncdump required - install nco" + ) def test_read_cdl_string(self): """Test the cf.read 'cdl_string' keyword.""" f = cf.read("example_field_0.nc")[0] @@ -882,7 +883,8 @@ def test_read_url(self): self.assertEqual(len(f), 1) @unittest.skipUnless( - shutil.which("ncdump"), "ncdump required - install nco") + shutil.which("ncdump"), "ncdump required - install nco" + ) def test_read_dataset_type(self): """Test the cf.read 'dataset_type' keyword.""" # netCDF dataset diff --git a/cf/test/test_style.py b/cf/test/test_style.py index 907cfd892b..e6ea9ed894 100644 --- a/cf/test/test_style.py +++ b/cf/test/test_style.py @@ -1,8 +1,8 @@ import datetime import faulthandler -from importlib.util import find_spec import os import unittest +from importlib.util import find_spec faulthandler.enable() # to debug seg faults and timeouts @@ -31,7 +31,8 @@ def setUp(self): ] @unittest.skipUnless( - find_spec("pycodestyle"), "pycodestyle required but not installed") + find_spec("pycodestyle"), "pycodestyle required but not installed" + ) def test_pep8_compliance(self): import pycodestyle From 3acbdf0c72e70eee9bd74f1b4bb72def9d3f6840 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 16 Oct 2025 16:05:12 +0100 Subject: [PATCH 54/54] dummy_size_1_dimension --- cf/regrid/regrid.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cf/regrid/regrid.py b/cf/regrid/regrid.py index 95597e1c68..3feae9687c 100644 --- a/cf/regrid/regrid.py +++ b/cf/regrid/regrid.py @@ -85,9 +85,13 @@ class Grid: cyclic: Any = None # The regridding method. method: str = "" - # If True then, for 1-d regridding, the esmpy weights are generated - # for a 2-d grid for which one of the dimensions is a size 2 dummy - # dimension. + # If True then, for Cartesian 1-d conservative regridding, the + # esmpy weights are generated for a 2-d grid for which one of the + # dimensions is a size 1 dummy dimension. + dummy_size_1_dimension: bool = False + # If True then, for Cartesian 1-d non-conservative regridding, the + # esmpy weights are generated for a 2-d grid for which one of the + # dimensions is a size 2 dummy dimension. dummy_size_2_dimension: bool = False # Whether or not the grid is a structured grid. is_grid: bool = False @@ -1668,6 +1672,7 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): bounds = get_bounds(method, coords, mesh_location) + dummy_size_1_dimension = False dummy_size_2_dimension = False if not (mesh_location or featureType) and len(coords) == 1: # Create a dummy axis because esmpy doesn't like creating @@ -1678,6 +1683,7 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): # size 1 coords.append(np.array([0.0])) bounds.append(np.array([data])) + dummy_size_1_dimension = True else: # For linear regridding the extra dimension must be size 2 coords.append(data) @@ -1708,6 +1714,7 @@ def Cartesian_grid(f, name=None, method=None, axes=None, z=None, ln_z=None): coords=coords, bounds=bounds, cyclic=cyclic, + dummy_size_1_dimension=dummy_size_1_dimension, dummy_size_2_dimension=dummy_size_2_dimension, is_mesh=is_mesh, is_locstream=is_locstream, @@ -3526,6 +3533,7 @@ def partitions(grid, grid_partitions, return_n=False): if ( grid_partitions == 1 or grid.name == "source" + or grid.dummy_size_1_dimension or grid.dummy_size_2_dimension ): # One partition