diff --git a/imod/common/constants.py b/imod/common/constants.py new file mode 100644 index 000000000..91866c107 --- /dev/null +++ b/imod/common/constants.py @@ -0,0 +1,21 @@ +""" +Store constants here that are used across the package. This is to avoid circular +imports and to have a single source of truth for these values. +""" + +from dataclasses import dataclass + +import numpy as np + + +@dataclass +class MaskValues: + """ + Stores mask values for nodata. Special sentinel values can be stored in + here, such as the -9999.0 for MetaSWAP. + """ + + bool = False + float = np.nan + integer = 0 + msw_default = -9999.0 diff --git a/imod/common/utilities/clip.py b/imod/common/utilities/clip.py index c742312b5..da1862a5c 100644 --- a/imod/common/utilities/clip.py +++ b/imod/common/utilities/clip.py @@ -19,6 +19,7 @@ clipped_zbound_linestrings_to_vertical_polygons, vertical_polygons_to_zbound_linestrings, ) +from imod.common.utilities.mask import mask_da from imod.common.utilities.value_filters import is_valid from imod.typing import GeoDataFrameType, GridDataArray, GridDataset from imod.typing.grid import bounding_polygon, is_spatial_grid @@ -128,18 +129,18 @@ def _filter_inactive_cells(package: IPackage, active: GridDataArray): return package_vars = package.dataset.data_vars - for var in package_vars: - if package_vars[var].shape != (): - if is_spatial_grid(package.dataset[var]): - other = ( - 0 - if np.issubdtype(package.dataset[var].dtype, np.integer) - else np.nan - ) - - package.dataset[var] = package.dataset[var].where( - active > 0, other=other - ) + to_mask = [ + var + for var in package_vars + if (package_vars[var].shape != () and is_spatial_grid(package.dataset[var])) + ] + # Shortcut if nothing to mask to avoid computing mask unnecessarily. + if not to_mask: + return + + mask = active > 0 + for var in to_mask: + package.dataset[var] = mask_da(package.dataset[var], mask) def _clip_linestring( diff --git a/imod/common/utilities/dtype.py b/imod/common/utilities/dtype.py new file mode 100644 index 000000000..c2482919d --- /dev/null +++ b/imod/common/utilities/dtype.py @@ -0,0 +1,36 @@ +""" +Module for utilities related to data types of different kinds of origins. +np.issubdtype unfortunately is too restrictive for some of our use cases, as it +does not for example consider pandas extension dtypes. +""" + +import numbers + +import numpy as np +from numpy.typing import DTypeLike + + +def is_float(dtype: DTypeLike) -> bool: + try: + return np.issubdtype(dtype, np.floating) + except TypeError: + # Catch cases where dtype is not a numpy dtype and check if subclass is + # not an integer. As numpy-style integers are also considered real + # numbers. + return issubclass(dtype.type, numbers.Real) and not issubclass( + dtype.type, numbers.Integral + ) + + +def is_integer(dtype: DTypeLike) -> bool: + try: + return np.issubdtype(dtype, np.integer) + except TypeError: + return issubclass(dtype.type, numbers.Integral) + + +def is_bool(dtype: DTypeLike) -> bool: + try: + return np.issubdtype(dtype, np.bool_) + except TypeError: + return issubclass(dtype.type, np.bool) diff --git a/imod/common/utilities/mask.py b/imod/common/utilities/mask.py index b0199cb30..acfdeb77d 100644 --- a/imod/common/utilities/mask.py +++ b/imod/common/utilities/mask.py @@ -1,13 +1,13 @@ -import numbers - import xarray as xr from plum import Dispatcher from xarray.core.utils import is_scalar +from imod.common.constants import MaskValues from imod.common.interfaces.imaskingsettings import IMaskingSettings from imod.common.interfaces.imodel import IModel from imod.common.interfaces.ipackage import IPackage from imod.common.interfaces.isimulation import ISimulation +from imod.common.utilities.dtype import is_bool, is_float, is_integer from imod.typing.grid import ( GridDataArray, concat, @@ -80,7 +80,7 @@ def mask_package(package: IPackage, mask: GridDataArray) -> IPackage: if _skip_dataarray(package.dataset[var]) or _skip_variable(package, var): masked[var] = package.dataset[var] else: - masked[var] = _mask_spatial_var(package, var, mask) + masked[var] = _mask_spatial_var_pkg(package, var, mask) return type(package)(**masked) @@ -108,22 +108,43 @@ def _skip_variable(package: IMaskingSettings, var: str) -> bool: return var in package.skip_variables -def _mask_spatial_var(self, var: str, mask: GridDataArray) -> GridDataArray: - da = self.dataset[var] - array_mask = _adjust_mask_for_unlayered_data(da, mask) - active = array_mask > 0 +def mask_da(da: GridDataArray, mask: GridDataArray) -> GridDataArray: + """ + Mask a DataArray with a boolean mask. Function attempts to preserve the + dtype of the original DataArray. It will set the + value to 0 for integers, np.nan for floats, and False for booleans. + """ - if issubclass(da.dtype.type, numbers.Integral): - if var == "idomain": - return da.where(active, other=array_mask) - else: - return da.where(active, other=0) - elif issubclass(da.dtype.type, numbers.Real): - return da.where(active) + if is_integer(da.dtype): + other = MaskValues.integer + elif is_float(da.dtype): + other = MaskValues.float + elif is_bool(da.dtype): + other = MaskValues.bool else: raise TypeError( - f"Expected dtype float or integer. Received instead: {da.dtype}" + f"Expected dtype float, integer, or bool. Received instead: {da.dtype}" ) + # Align the mask, as calling where with "other" specified does not + # automatically align the mask to the DataArray. + _, mask = xr.align(da, mask, join="left", copy=False) + return da.where(mask, other=other) + + +def _mask_spatial_var_pkg( + package: IPackage, var: str, mask: GridDataArray +) -> GridDataArray: + """ + Mask a spatial variable in a package. There is some additional logic for the + MF6 DIS/DISV packages to work with unlayered grids for the "top" value. + """ + da = package.dataset[var] + array_mask = _adjust_mask_for_unlayered_data(da, mask) + active = array_mask > 0 + + if var == "idomain": + return da.where(active, other=array_mask) + return mask_da(da, active) def _adjust_mask_for_unlayered_data( @@ -145,17 +166,37 @@ def _adjust_mask_for_unlayered_data( return array_mask +def make_mask(da: GridDataArray): + """ + Make a boolean mask from a DataArray. The mask is True where the values are + not equal to the nodata value. The nodata value is determined by the dtype + of the DataArray. For integers, the nodata value is 0. For floats, the + nodata value is np.nan. For booleans, the nodata value is False. + """ + if is_integer(da.dtype): + return da != MaskValues.integer + elif is_float(da.dtype): + return notnull(da) + elif is_bool(da.dtype): + return da != MaskValues.bool + else: + raise TypeError( + f"Expected dtype float, integer, or bool. Received instead: {da.dtype}" + ) + + def mask_arrays(arrays: dict[str, GridDataArray]) -> dict[str, GridDataArray]: """ This function takes a dictionary of xr.DataArrays. The arrays are assumed to have the same coordinates. When a np.nan value is found in any array, the other arrays are also set to np.nan at the same coordinates. """ - masks = [notnull(array) for array in arrays.values()] - # Get total mask across all arrays + + masks = [make_mask(array) for array in arrays.values()] + # Get total mask across all arrays. total_mask = concat(masks, dim="arrays").all("arrays") # Mask arrays with total mask - arrays_masked = {key: array.where(total_mask) for key, array in arrays.items()} + arrays_masked = {key: mask_da(array, total_mask) for key, array in arrays.items()} return arrays_masked diff --git a/imod/common/utilities/regrid.py b/imod/common/utilities/regrid.py index c38354110..4fbea0a31 100644 --- a/imod/common/utilities/regrid.py +++ b/imod/common/utilities/regrid.py @@ -9,6 +9,7 @@ from xarray.core.utils import is_scalar from xugrid.regrid.regridder import BaseRegridder +from imod.common.constants import MaskValues from imod.common.interfaces.ilinedatapackage import ILineDataPackage from imod.common.interfaces.imodel import IModel from imod.common.interfaces.ipackage import IPackage @@ -17,6 +18,7 @@ from imod.common.interfaces.isimulation import ISimulation from imod.common.utilities.clip import clip_by_grid from imod.common.utilities.dataclass_type import DataclassType, EmptyRegridMethod +from imod.common.utilities.dtype import is_integer from imod.common.utilities.value_filters import is_valid from imod.typing.grid import ( GridDataArray, @@ -107,8 +109,8 @@ def _regrid_array( # Nans can be introduced when the source data has a nan value, or when the # target grid has a larger domain. Fill nans with 0 for integer, as this is # mainly important for the idomain array where 0 indicates an inactive cell. - if np.issubdtype(original_dtype, np.integer): - regridded_array = regridded_array.fillna(0) + if is_integer(original_dtype): + regridded_array = regridded_array.fillna(MaskValues.integer) return regridded_array.astype(original_dtype) @@ -438,7 +440,7 @@ def _get_regridding_domain( # to track nodata cells and verify that the regridding process does not # inadvertently affect them. The use of np.abs() simplifies the logic by # avoiding additional conditional checks for -1 and 1 separately. - is_active = np.abs(idomain.where(idomain != 0, other=np.nan)) + is_active = np.abs(idomain.where(idomain != 0, other=MaskValues.float)) included_in_all = ones_like(target_grid) # Take the first regridder function, as each regridder type handles nans # consistently amongst methods. @@ -452,6 +454,8 @@ def _get_regridding_domain( idomain_regridder_type = regridder.regrid(is_active) included_in_all = included_in_all.where(idomain_regridder_type.notnull()) - new_idomain = regridded_domain.where(included_in_all.notnull(), other=0).astype(int) + new_idomain = regridded_domain.where( + included_in_all.notnull(), other=MaskValues.integer + ).astype(int) return new_idomain diff --git a/imod/msw/infiltration.py b/imod/msw/infiltration.py index d9258f70e..483a9bbcc 100644 --- a/imod/msw/infiltration.py +++ b/imod/msw/infiltration.py @@ -2,13 +2,13 @@ import xarray as xr +from imod.common.constants import MaskValues from imod.common.interfaces.iregridpackage import IRegridPackage from imod.logging import LogLevel, logger from imod.msw.fixed_format import VariableMetaData from imod.msw.pkgbase import MetaSwapPackage from imod.msw.regrid.regrid_schemes import InfiltrationRegridMethod from imod.msw.utilities.common import concat_imod5 -from imod.msw.utilities.mask import MaskValues from imod.typing import GridDataDict, Imod5DataDict from imod.typing.grid import ones_like @@ -29,7 +29,7 @@ def deactivate_small_resistances_in_data(data: GridDataDict): message=message.format(var=var), additional_depth=1, ) - data[var] = data[var].where(~to_deactivate, MaskValues.default) + data[var] = data[var].where(~to_deactivate, MaskValues.msw_default) return data @@ -142,7 +142,7 @@ def from_imod5_data(cls, imod5_data: Imod5DataDict) -> "Infiltration": data = deactivate_small_resistances_in_data(data) like = data["downward_resistance"].isel(subunit=0, drop=True) - data["bottom_resistance"] = ones_like(like) * MaskValues.default + data["bottom_resistance"] = ones_like(like) * MaskValues.msw_default data["extra_storage_coefficient"] = ones_like(like) return cls(**data) diff --git a/imod/msw/meteo_grid.py b/imod/msw/meteo_grid.py index d4c080412..151d9943e 100644 --- a/imod/msw/meteo_grid.py +++ b/imod/msw/meteo_grid.py @@ -8,13 +8,13 @@ import xarray as xr import imod +from imod.common.constants import MaskValues from imod.common.interfaces.iregridpackage import IRegridPackage from imod.common.utilities.dataclass_type import DataclassType, EmptyRegridMethod from imod.msw.pkgbase import MetaSwapPackage from imod.msw.regrid.regrid_schemes import MeteoGridRegridMethod from imod.msw.timeutil import to_metaswap_timeformat from imod.msw.utilities.common import find_in_file_list -from imod.msw.utilities.mask import MaskValues from imod.typing import Imod5DataDict @@ -178,7 +178,7 @@ def write(self, directory: Union[str, Path], *args): ".asc" ) imod.rasterio.save( - path, self.dataset[str(varname)], nodata=MaskValues.default + path, self.dataset[str(varname)], nodata=MaskValues.msw_default ) def _pkgcheck(self): diff --git a/imod/msw/model.py b/imod/msw/model.py index ce12c6ff0..eaf81fdbf 100644 --- a/imod/msw/model.py +++ b/imod/msw/model.py @@ -10,6 +10,7 @@ import numpy as np import xarray as xr +from imod.common.constants import MaskValues from imod.common.utilities.clip import clip_by_grid from imod.common.utilities.partitioninfo import create_partition_info from imod.common.utilities.value_filters import enforce_scalar @@ -42,7 +43,7 @@ from imod.msw.timeutil import to_metaswap_timeformat from imod.msw.utilities.common import find_in_file_list from imod.msw.utilities.imod5_converter import has_active_scaling_factor -from imod.msw.utilities.mask import MaskValues, mask_and_broadcast_cap_data +from imod.msw.utilities.mask import mask_and_broadcast_cap_data from imod.msw.utilities.parse import read_para_sim from imod.msw.vegetation import AnnualCropFactors from imod.typing import GridDataArray, Imod5DataDict @@ -652,7 +653,7 @@ def from_imod5_data( if has_active_scaling_factor(imod5_cap_no_layer["cap"]): model["scaling_factor"] = ScalingFactors.from_imod5_data(imod5_masked) area = model["grid"]["area"].isel(subunit=0, drop=True) - model["idf_mapping"] = IdfMapping(area, MaskValues.default) + model["idf_mapping"] = IdfMapping(area, MaskValues.msw_default) model["coupling"] = CouplerMapping() model["extra_files"] = FileCopier.from_imod5_data(imod5_masked) diff --git a/imod/msw/utilities/imod5_converter.py b/imod/msw/utilities/imod5_converter.py index 07436d5c5..58fcce04c 100644 --- a/imod/msw/utilities/imod5_converter.py +++ b/imod/msw/utilities/imod5_converter.py @@ -1,9 +1,10 @@ from xarray.core.utils import is_scalar +from imod.common.constants import MaskValues from imod.logging import LogLevel, logger from imod.mf6 import StructuredDiscretization from imod.msw.utilities.common import concat_imod5 -from imod.msw.utilities.mask import MaskValues, MetaSwapActive +from imod.msw.utilities.mask import MetaSwapActive from imod.typing import GridDataArray, GridDataDict from imod.typing.grid import ones_like from imod.util.spatial import get_cell_area @@ -103,7 +104,7 @@ def has_active_scaling_factor(imod5_cap: GridDataDict): function shortcuts if data is provided as constant. """ variable_inactive_mapping = { - "perched_water_table_level": MaskValues.default, + "perched_water_table_level": MaskValues.msw_default, "soil_moisture_fraction": 1.0, "conductivitiy_factor": 1.0, } diff --git a/imod/msw/utilities/mask.py b/imod/msw/utilities/mask.py index 6a26c9b9d..e71eb4ac2 100644 --- a/imod/msw/utilities/mask.py +++ b/imod/msw/utilities/mask.py @@ -1,6 +1,6 @@ -import numbers from dataclasses import dataclass +from imod.common.utilities.mask import mask_da from imod.msw.pkgbase import MetaSwapPackage from imod.typing import GridDataArray, GridDataDict @@ -11,24 +11,13 @@ class MetaSwapActive: per_subunit: GridDataArray -@dataclass -class MaskValues: - """Stores sentinel values for nodata. Most cases use -9999.0, but exceptions - can be added here.""" - - default = -9999.0 - integer = 0 - - def mask_and_broadcast_cap_data( cap_data: GridDataDict, msw_active: MetaSwapActive ) -> GridDataDict: """ Mask and broadcast cap data, always mask with "all" of MetaSwapActive. """ - return { - key: _mask_spatial_var(grid, msw_active.all) for key, grid in cap_data.items() - } + return {key: mask_da(grid, msw_active.all) for key, grid in cap_data.items()} def mask_and_broadcast_pkg_data( @@ -41,20 +30,9 @@ def mask_and_broadcast_pkg_data( return { key: ( - _mask_spatial_var(grid, msw_active.per_subunit) + mask_da(grid, msw_active.per_subunit) if key in package._with_subunit - else _mask_spatial_var(grid, msw_active.all) + else mask_da(grid, msw_active.all) ) for key, grid in grid_data.items() } - - -def _mask_spatial_var(da: GridDataArray, active: GridDataArray) -> GridDataArray: - if issubclass(da.dtype.type, numbers.Integral): - return da.where(active, other=MaskValues.integer) - elif issubclass(da.dtype.type, numbers.Real): - return da.where(active) - else: - raise TypeError( - f"Expected dtype float or integer. Received instead: {da.dtype}" - ) diff --git a/imod/tests/test_common/test_utilities/test_dtype.py b/imod/tests/test_common/test_utilities/test_dtype.py new file mode 100644 index 000000000..e1889ebf7 --- /dev/null +++ b/imod/tests/test_common/test_utilities/test_dtype.py @@ -0,0 +1,56 @@ +from zoneinfo import ZoneInfo + +import numpy as np +import pandas as pd + +from imod.common.utilities.dtype import is_bool, is_float, is_integer + +FLOAT_DTYPES = [ + np.dtype("float16"), + np.dtype("float32"), + np.dtype("float64"), + pd.Float64Dtype(), +] +INTEGER_DTYPES = [ + np.dtype("int8"), + np.dtype("int16"), + np.dtype("int32"), + np.dtype("int64"), + pd.Int8Dtype(), + pd.Int16Dtype(), + pd.Int32Dtype(), + pd.Int64Dtype(), +] +BOOL_DTYPES = [np.dtype("bool"), pd.BooleanDtype()] +UNSUPPORTED_DTYPES = [ + np.dtype("str"), + pd.StringDtype(), + np.dtype("datetime64"), + pd.DatetimeTZDtype("s", ZoneInfo("UTC")), + pd.IntervalDtype(), + pd.PeriodDtype("20d"), +] + + +def test_is_float(): + for dtype in FLOAT_DTYPES: + assert is_float(dtype) + + for dtype in INTEGER_DTYPES + BOOL_DTYPES + UNSUPPORTED_DTYPES: + assert not is_float(dtype) + + +def test_is_integer(): + for dtype in INTEGER_DTYPES: + assert is_integer(dtype) + + for dtype in FLOAT_DTYPES + BOOL_DTYPES + UNSUPPORTED_DTYPES: + assert not is_integer(dtype) + + +def test_is_bool(): + for dtype in BOOL_DTYPES: + assert is_bool(dtype) + + for dtype in FLOAT_DTYPES + INTEGER_DTYPES + UNSUPPORTED_DTYPES: + assert not is_bool(dtype) diff --git a/imod/tests/test_common/test_utilities/test_mask_util.py b/imod/tests/test_common/test_utilities/test_mask_util.py index cde4dfbef..72a15edab 100644 --- a/imod/tests/test_common/test_utilities/test_mask_util.py +++ b/imod/tests/test_common/test_utilities/test_mask_util.py @@ -76,10 +76,16 @@ def test_array_masking(arrays): masked_arrays = mask_arrays({"array1": array1, "array2": array2}) + assert isinstance(masked_arrays["array1"], type(array1)) + assert isinstance(masked_arrays["array2"], type(array2)) + + assert masked_arrays["array1"].dtype == array1.dtype + assert masked_arrays["array2"].dtype == array2.dtype + # element first element should be nan in both arrays array1_1d = masked_arrays["array1"].values.ravel() array2_1d = masked_arrays["array2"].values.ravel() - assert np.isnan(array1_1d[0]) + assert array1_1d[0] == 0 assert np.isnan(array2_1d[0]) # there should be only 1 nan in both arrays @@ -96,7 +102,7 @@ def test_broadcast_and_mask_arrays(): # Test broadcasting and masking with two arrays with the same shape result1 = broadcast_and_mask_arrays({"array1": array1, "array2": array2}) xr.testing.assert_equal( - result1["array1"], array2 + result1["array1"], array2.fillna(0) ) # Masking turns array1 into array2 xr.testing.assert_equal(result1["array2"], array2) # Test broadcasting and masking with one array and a scalar array diff --git a/pixi.lock b/pixi.lock index 39e7316da..03d4b3985 100644 --- a/pixi.lock +++ b/pixi.lock @@ -14752,7 +14752,7 @@ packages: - pypi: ./ name: imod version: 1.0.1.dev1 - sha256: b242a1acd646ffa5410ebc6470e9fa558e81e42073304819f54be9e77949a462 + sha256: d27105972c53a243e6793668ecb915b0b24268a493441e035e214e01a8a8e389 requires_dist: - affine - cftime>=1