From a64657e7002526612bc461a6280240bbaad2f8f8 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 20 Mar 2026 11:25:08 +0100 Subject: [PATCH 1/6] Refactor load_dataset to return dataset traits, remove mxalign from CLI, and update README --- README.md | 103 ++++++++++------------ src/mlwp_data_loaders/api.py | 47 ++++++++-- src/mlwp_data_loaders/cli.py | 23 ++--- tests/test_anemoi_datasets_integration.py | 6 +- tests/test_api.py | 29 ++++++ tests/test_cli.py | 19 ++-- 6 files changed, 132 insertions(+), 95 deletions(-) diff --git a/README.md b/README.md index 9b9f9ca..bacebdc 100644 --- a/README.md +++ b/README.md @@ -7,106 +7,101 @@ Loader package for opening source datasets before validating them with `mlwp-data-loaders` is responsible for: -1. importing a loader module or script -2. opening one or more source datasets using loader-defined traits -3. optionally checking that the chosen trait profiles are compatible with the loader -4. returning an `xarray.Dataset` that can then be validated with `mlwp-data-specs` +1. Importing a loader module (or Python file). +2. Using the loader to open and normalize source datasets. +3. Extracting the appropriate validation trait profiles (`TIME_PROFILE`, `SPACE_PROFILE`, `UNCERTAINTY_PROFILE`) defined by the loader. +4. Validating the returned `xarray.Dataset` automatically via `mlwp-data-specs`. +5. Returning the `xarray.Dataset` (and optionally the trait dict) for further use or machine learning workloads. The intended split is: - -1. `mlwp-data-loaders`: source-specific loading and normalization -2. `mlwp-data-specs`: trait validation +- **`mlwp-data-loaders`**: Source-specific loading and normalization logic. +- **`mlwp-data-specs`**: General trait validation and compliance checks. ## Python API The `loader` argument is interpreted as: +- A Python file path if it ends with `.py`. +- A Python module path if it contains `.` (e.g. `mlwp_data_loaders.loaders.anemoi.anemoi_inference`). -- a Python file path if it ends with `.py` -- a Python module path if it contains `.` - -Load a dataset through a loader module: +You can load a dataset and its trait profiles natively: ```python from mlwp_data_loaders import load_dataset -from mlwp_data_specs import validate_dataset -ds = load_dataset( +ds, dataset_traits = load_dataset( [ "/path/to/anemoi-inference-20260101T00.nc", "/path/to/anemoi-inference-20260102T00.nc", ], loader="mlwp_data_loaders.loaders.anemoi.anemoi_inference", - time="forecast", - space="grid", - uncertainty="deterministic", + return_dataset_traits=True, ) -report = validate_dataset( - ds, - time="forecast", - space="grid", - uncertainty="deterministic", -) +print(f"Validation time profile used: {dataset_traits['time_profile']}") +print(f"Dataset has {len(ds.data_vars)} variables.") +``` + +If you don't need the traits dictionary returned, simply omit `return_dataset_traits` (defaults to `False`): -report.console_print() +```python +ds = load_dataset( + "s3://my-bucket/dataset.zarr", + loader="mlwp_data_loaders.loaders.anemoi.anemoi_datasets", + storage_options={"anon": True}, +) ``` ## CLI -Use the loader-aware CLI: +Use the loader-aware CLI to load and validate data from the command line: ```bash uv run mlwp.load_and_validate_dataset \ /path/to/anemoi-inference-20260101T00.nc \ /path/to/anemoi-inference-20260102T00.nc \ - --loader mlwp_data_loaders.loaders.anemoi.anemoi_inference \ - --time forecast \ - --space grid \ - --uncertainty deterministic + --loader mlwp_data_loaders.loaders.anemoi.anemoi_inference ``` -Using a user-provided loader script: +Using a user-provided custom loader script: ```bash uv run mlwp.load_and_validate_dataset \ /path/to/source-a.nc \ /path/to/source-b.nc \ - --loader ./examples/my_loader.py \ - --time forecast \ - --space grid \ - --uncertainty deterministic + --loader ./examples/my_loader.py ``` -## Loader module contract +## Loader Module Contract -The loader module may define a subset of the following: +Each loader module must define a function and optionally standard profile variables: -1. Variables defining how each provided path is opened with `xarray.open_dataset` - - `OPEN_KWARGS`: keyword arguments forwarded to `xarray.open_dataset`, including backend selection such as `{"engine": "zarr"}` or `{"engine": "h5netcdf"}` -2. Functions and variables around preprocessing, concatenation, and postprocessing - - `preprocess(ds)`: normalize each opened source dataset before combination - - `CONCAT_DIM`: dimension used when combining multiple inputs; required if more than one path is provided - - `postprocess(ds)`: finalize the combined dataset before validation -3. Variables defining valid trait profiles - - `valid_time_profiles`: allowed `time=` profile values for this loader - - `valid_space_profiles`: allowed `space=` profile values for this loader - - `valid_uncertainty_profiles`: allowed `uncertainty=` profile values for this loader +1. `load_dataset(path: str | list[str], **kwargs) -> xr.Dataset` + - **Required**. Handles opening the path(s), preprocessing, concatenating, and postprocessing, returning a single normalized `xarray.Dataset`. +2. `TIME_PROFILE`: `str` + - Defines the time trait profile for `mlwp-data-specs` validation (e.g. `"forecast"`). +3. `SPACE_PROFILE`: `str` + - Defines the space trait profile (e.g. `"grid"`). +4. `UNCERTAINTY_PROFILE`: `str` + - Defines the uncertainty trait profile (e.g. `"deterministic"`). -Example: +### Example Loader (`my_loader.py`) ```python import xarray as xr -OPEN_KWARGS = {} - - -def preprocess(ds: xr.Dataset) -> xr.Dataset | xr.DataArray: - return ds - +TIME_PROFILE = "observation" +SPACE_PROFILE = "grid" +UNCERTAINTY_PROFILE = "deterministic" -CONCAT_DIM = "valid_time" +def load_dataset(path: str | list[str], **kwargs) -> xr.Dataset: + if isinstance(path, list): + ds = xr.open_mfdataset(path, combine="by_coords", **kwargs) + else: + ds = xr.open_dataset(path, **kwargs) + # Example post-processing + if "time" in ds.dims: + ds = ds.rename({"time": "valid_time"}) -def postprocess(ds: xr.Dataset | xr.DataArray) -> xr.Dataset | xr.DataArray: return ds ``` diff --git a/src/mlwp_data_loaders/api.py b/src/mlwp_data_loaders/api.py index be5b57a..c6402dd 100644 --- a/src/mlwp_data_loaders/api.py +++ b/src/mlwp_data_loaders/api.py @@ -3,7 +3,7 @@ from __future__ import annotations import inspect -from typing import Any +from typing import Any, Literal, overload import xarray as xr from mlwp_data_specs import validate_dataset @@ -11,13 +11,38 @@ from .core import get_dataset_traits_from_loader +@overload def load_dataset( dataset_path: str | list[str], *, loader: str, storage_options: dict[str, Any] | None = None, + return_dataset_traits: Literal[False] = False, **kwargs: Any, ) -> xr.Dataset: + ... + + +@overload +def load_dataset( + dataset_path: str | list[str], + *, + loader: str, + storage_options: dict[str, Any] | None = None, + return_dataset_traits: Literal[True], + **kwargs: Any, +) -> tuple[xr.Dataset, dict[str, Any]]: + ... + + +def load_dataset( + dataset_path: str | list[str], + *, + loader: str, + storage_options: dict[str, Any] | None = None, + return_dataset_traits: bool = False, + **kwargs: Any, +) -> xr.Dataset | tuple[xr.Dataset, dict[str, Any]]: """Load a dataset through a loader module and validate it. Parameters @@ -29,18 +54,22 @@ def load_dataset( path. A value containing ``.`` is treated as a Python module path. storage_options : dict[str, Any] | None, optional Storage options forwarded to the loader's ``load_dataset`` function. + return_dataset_traits : bool, optional + If True, return a tuple containing the dataset and the loader traits. + Defaults to False. **kwargs Additional keyword arguments forwarded to the loader's ``load_dataset`` function if its signature accepts them. Returns ------- - xr.Dataset - Loaded and validated dataset. + xr.Dataset | tuple[xr.Dataset, dict[str, Any]] + Loaded and validated dataset. If `return_dataset_traits` is True, + returns a tuple of (dataset, dataset_traits). """ - traits = get_dataset_traits_from_loader(loader) + dataset_traits = get_dataset_traits_from_loader(loader) - loader_func = traits["load_dataset"] + loader_func = dataset_traits["load_dataset"] sig = inspect.signature(loader_func) loader_kwargs: dict[str, Any] = {} @@ -65,9 +94,11 @@ def load_dataset( validate_dataset( ds, - time=traits.get("time_profile"), - space=traits.get("space_profile"), - uncertainty=traits.get("uncertainty_profile"), + time=dataset_traits.get("time_profile"), + space=dataset_traits.get("space_profile"), + uncertainty=dataset_traits.get("uncertainty_profile"), ) + if return_dataset_traits: + return ds, dataset_traits return ds diff --git a/src/mlwp_data_loaders/cli.py b/src/mlwp_data_loaders/cli.py index 77501b0..c6b222e 100644 --- a/src/mlwp_data_loaders/cli.py +++ b/src/mlwp_data_loaders/cli.py @@ -10,8 +10,6 @@ from mlwp_data_specs.api import validate_dataset from .api import load_dataset -from .core import get_dataset_traits_from_loader -from .mxalign_api import validate_dataset_with_mxalign def build_parser() -> argparse.ArgumentParser: @@ -79,18 +77,18 @@ def main(argv: Sequence[str] | None = None) -> int: logger.info(f"Using mlwp-data-specs {specs_version}") # Load the dataset - ds = load_dataset( + res = load_dataset( dataset_input, loader=args.loader, storage_options=storage_options or None, + return_dataset_traits=True, ) + assert isinstance(res, tuple) + ds, dataset_traits = res - # Re-run validation to get the report for printing - traits = get_dataset_traits_from_loader(args.loader) - - time_profile = traits.get("time_profile") - space_profile = traits.get("space_profile") - uncertainty_profile = traits.get("uncertainty_profile") + time_profile = dataset_traits.get("time_profile") + space_profile = dataset_traits.get("space_profile") + uncertainty_profile = dataset_traits.get("uncertainty_profile") report = validate_dataset( ds, @@ -99,13 +97,6 @@ def main(argv: Sequence[str] | None = None) -> int: uncertainty=uncertainty_profile, ) - report += validate_dataset_with_mxalign( - ds, - time=time_profile, - space=space_profile, - uncertainty=uncertainty_profile, - ) - report.console_print() return 1 if report.has_fails() else 0 diff --git a/tests/test_anemoi_datasets_integration.py b/tests/test_anemoi_datasets_integration.py index f2d027a..9cae798 100644 --- a/tests/test_anemoi_datasets_integration.py +++ b/tests/test_anemoi_datasets_integration.py @@ -5,7 +5,6 @@ from mlwp_data_specs import validate_dataset from mlwp_data_loaders.api import load_dataset -from mlwp_data_loaders.core import get_dataset_traits_from_loader from mlwp_data_loaders.mxalign_api import validate_dataset_with_mxalign # Use small CERRA sample dataset stored on EWC (European Weather Cloud) @@ -25,15 +24,14 @@ def test_load_dataset_opens_anemoi_store_from_ewc() -> None: "anon": True, } - ds = load_dataset( + ds, traits = load_dataset( DATASET_PATH, loader=LOADER, storage_options=storage_options, chunks=None, + return_dataset_traits=True, ) - traits = get_dataset_traits_from_loader(LOADER) - report_mxalign = validate_dataset_with_mxalign( ds, time=traits.get("time_profile"), diff --git a/tests/test_api.py b/tests/test_api.py index c4e7970..5833aaa 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -82,11 +82,40 @@ def test_load_dataset_filters_kwargs(tmp_path, monkeypatch: pytest.MonkeyPatch) engine="h5netcdf", # Should be ignored because strict load_dataset doesn't take 'engine' storage_options={"anon": True}, # Should be ignored ) + assert isinstance(ds, xr.Dataset) assert ds.attrs["chunks"] == "auto" assert "engine" not in ds.attrs +def test_load_dataset_returns_traits(tmp_path, monkeypatch: pytest.MonkeyPatch) -> None: + """Check that api.load_dataset returns traits when requested.""" + loader_file = tmp_path / "loader_traits.py" + loader_file.write_text( + "def load_dataset(path, **kwargs):\n" + " from xarray import Dataset\n" + " return Dataset()\n" + "TIME_PROFILE = 'forecast'\n", + encoding="utf-8", + ) + + monkeypatch.setattr( + "mlwp_data_loaders.api.validate_dataset", lambda *args, **kwargs: None + ) + + res = load_dataset( + "dummy.nc", + loader=str(loader_file), + return_dataset_traits=True, + ) + assert isinstance(res, tuple) + + ds, dataset_traits = res + assert isinstance(ds, xr.Dataset) + assert isinstance(dataset_traits, dict) + assert dataset_traits.get("time_profile") == "forecast" + + def test_validate_dataset_with_mxalign_returns_fail_report_for_invalid_dims( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/tests/test_cli.py b/tests/test_cli.py index 2d070aa..be11051 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -37,15 +37,14 @@ def test_cli_accepts_multiple_dataset_paths(monkeypatch: MonkeyPatch) -> None: def _load_dataset(dataset_path, **kwargs): observed["dataset_path"] = dataset_path + if kwargs.get("return_dataset_traits"): + return _forecast_grid_ds(), { + "time_profile": "forecast", + "space_profile": "grid", + "uncertainty_profile": "deterministic", + } return _forecast_grid_ds() - def _get_dataset_traits_from_loader(loader): - return { - "time_profile": "forecast", - "space_profile": "grid", - "uncertainty_profile": "deterministic", - } - class _Report: def __init__(self): self.fails = False @@ -63,13 +62,7 @@ def _validate_dataset(ds, **kwargs): return _Report() monkeypatch.setattr(cli, "load_dataset", _load_dataset) - monkeypatch.setattr( - cli, "get_dataset_traits_from_loader", _get_dataset_traits_from_loader - ) monkeypatch.setattr(cli, "validate_dataset", _validate_dataset) - monkeypatch.setattr( - cli, "validate_dataset_with_mxalign", lambda *args, **kwargs: _Report() - ) code = cli.main( [ From 0d5967ecd2900c99e5fbda4ea435362743ca4531 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 20 Mar 2026 11:37:57 +0100 Subject: [PATCH 2/6] Simplify api.py return types and clarify mxalign validation in tests --- src/mlwp_data_loaders/api.py | 26 +---------------------- tests/test_anemoi_datasets_integration.py | 4 +++- tests/test_api.py | 2 +- tests/test_harp_obstable_integration.py | 8 +++---- 4 files changed, 9 insertions(+), 31 deletions(-) diff --git a/src/mlwp_data_loaders/api.py b/src/mlwp_data_loaders/api.py index c6402dd..4d077c0 100644 --- a/src/mlwp_data_loaders/api.py +++ b/src/mlwp_data_loaders/api.py @@ -3,7 +3,7 @@ from __future__ import annotations import inspect -from typing import Any, Literal, overload +from typing import Any import xarray as xr from mlwp_data_specs import validate_dataset @@ -11,30 +11,6 @@ from .core import get_dataset_traits_from_loader -@overload -def load_dataset( - dataset_path: str | list[str], - *, - loader: str, - storage_options: dict[str, Any] | None = None, - return_dataset_traits: Literal[False] = False, - **kwargs: Any, -) -> xr.Dataset: - ... - - -@overload -def load_dataset( - dataset_path: str | list[str], - *, - loader: str, - storage_options: dict[str, Any] | None = None, - return_dataset_traits: Literal[True], - **kwargs: Any, -) -> tuple[xr.Dataset, dict[str, Any]]: - ... - - def load_dataset( dataset_path: str | list[str], *, diff --git a/tests/test_anemoi_datasets_integration.py b/tests/test_anemoi_datasets_integration.py index 9cae798..85d0ba3 100644 --- a/tests/test_anemoi_datasets_integration.py +++ b/tests/test_anemoi_datasets_integration.py @@ -24,7 +24,7 @@ def test_load_dataset_opens_anemoi_store_from_ewc() -> None: "anon": True, } - ds, traits = load_dataset( + ds, traits = load_dataset( # type: ignore # load_dataset returns a tuple when return_dataset_traits=True DATASET_PATH, loader=LOADER, storage_options=storage_options, @@ -32,6 +32,8 @@ def test_load_dataset_opens_anemoi_store_from_ewc() -> None: return_dataset_traits=True, ) + # Note: mxalign validation is temporarily kept here during early development + # to ensure `mlwp-data-specs` behaves identically. It will eventually be removed. report_mxalign = validate_dataset_with_mxalign( ds, time=traits.get("time_profile"), diff --git a/tests/test_api.py b/tests/test_api.py index 5833aaa..64eccad 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -110,7 +110,7 @@ def test_load_dataset_returns_traits(tmp_path, monkeypatch: pytest.MonkeyPatch) ) assert isinstance(res, tuple) - ds, dataset_traits = res + ds, dataset_traits = res # type: ignore # load_dataset returns a tuple when return_dataset_traits=True assert isinstance(ds, xr.Dataset) assert isinstance(dataset_traits, dict) assert dataset_traits.get("time_profile") == "forecast" diff --git a/tests/test_harp_obstable_integration.py b/tests/test_harp_obstable_integration.py index 43d6616..989cdb9 100644 --- a/tests/test_harp_obstable_integration.py +++ b/tests/test_harp_obstable_integration.py @@ -7,7 +7,6 @@ from mlwp_data_specs import validate_dataset from mlwp_data_loaders.api import load_dataset -from mlwp_data_loaders.core import get_dataset_traits_from_loader from mlwp_data_loaders.mxalign_api import validate_dataset_with_mxalign HARP_DATA_URL = "https://raw.githubusercontent.com/harphub/harpData/master/inst/OBSTABLE/OBSTABLE_2019.sqlite" @@ -26,13 +25,14 @@ def obstable_path() -> str: def test_load_dataset_opens_harp_obstable(obstable_path: str) -> None: """The harp.obstable loader can open and validate the sample SQLite file.""" - ds = load_dataset( + ds, traits = load_dataset( # type: ignore # load_dataset returns a tuple when return_dataset_traits=True obstable_path, loader=LOADER, + return_dataset_traits=True, ) - traits = get_dataset_traits_from_loader(LOADER) - + # Note: mxalign validation is temporarily kept here during early development + # to ensure `mlwp-data-specs` behaves identically. It will eventually be removed. report_mxalign = validate_dataset_with_mxalign( ds, time=traits.get("time_profile"), From 5f556f49cd63069327524392936c9ae88336a779 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 20 Mar 2026 11:40:33 +0100 Subject: [PATCH 3/6] docs: Restore validate_dataset execution in README python API example --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bacebdc..fbf5436 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,9 @@ You can load a dataset and its trait profiles natively: ```python from mlwp_data_loaders import load_dataset +from mlwp_data_specs import validate_dataset +# 1. Load the dataset and extract the trait profiles defined by the loader ds, dataset_traits = load_dataset( [ "/path/to/anemoi-inference-20260101T00.nc", @@ -37,8 +39,16 @@ ds, dataset_traits = load_dataset( return_dataset_traits=True, ) -print(f"Validation time profile used: {dataset_traits['time_profile']}") -print(f"Dataset has {len(ds.data_vars)} variables.") +# 2. Get a detailed validation report by passing the extracted traits +report = validate_dataset( + ds, + time=dataset_traits.get("time_profile"), + space=dataset_traits.get("space_profile"), + uncertainty=dataset_traits.get("uncertainty_profile"), +) + +# 3. Print the validation results to the console +report.console_print() ``` If you don't need the traits dictionary returned, simply omit `return_dataset_traits` (defaults to `False`): From 3b40ce5edf4d072ca914ee6a5849494429f8186d Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 20 Mar 2026 11:50:45 +0100 Subject: [PATCH 4/6] Refactor cli.py to directly unpack load_dataset results --- src/mlwp_data_loaders/cli.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mlwp_data_loaders/cli.py b/src/mlwp_data_loaders/cli.py index c6b222e..949f69b 100644 --- a/src/mlwp_data_loaders/cli.py +++ b/src/mlwp_data_loaders/cli.py @@ -77,14 +77,12 @@ def main(argv: Sequence[str] | None = None) -> int: logger.info(f"Using mlwp-data-specs {specs_version}") # Load the dataset - res = load_dataset( + ds, dataset_traits = load_dataset( # type: ignore # load_dataset returns a tuple when return_dataset_traits=True dataset_input, loader=args.loader, storage_options=storage_options or None, return_dataset_traits=True, ) - assert isinstance(res, tuple) - ds, dataset_traits = res time_profile = dataset_traits.get("time_profile") space_profile = dataset_traits.get("space_profile") From 8cb9e70b935e55e611a5613ebc800ac6a7866dda Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 20 Mar 2026 12:05:59 +0100 Subject: [PATCH 5/6] Rename traits to dataset_traits in anemoi integration test --- tests/test_anemoi_datasets_integration.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_anemoi_datasets_integration.py b/tests/test_anemoi_datasets_integration.py index 85d0ba3..4a33c00 100644 --- a/tests/test_anemoi_datasets_integration.py +++ b/tests/test_anemoi_datasets_integration.py @@ -24,7 +24,7 @@ def test_load_dataset_opens_anemoi_store_from_ewc() -> None: "anon": True, } - ds, traits = load_dataset( # type: ignore # load_dataset returns a tuple when return_dataset_traits=True + ds, dataset_traits = load_dataset( # type: ignore # load_dataset returns a tuple when return_dataset_traits=True DATASET_PATH, loader=LOADER, storage_options=storage_options, @@ -36,9 +36,9 @@ def test_load_dataset_opens_anemoi_store_from_ewc() -> None: # to ensure `mlwp-data-specs` behaves identically. It will eventually be removed. report_mxalign = validate_dataset_with_mxalign( ds, - time=traits.get("time_profile"), - space=traits.get("space_profile"), - uncertainty=traits.get("uncertainty_profile"), + time=dataset_traits.get("time_profile"), + space=dataset_traits.get("space_profile"), + uncertainty=dataset_traits.get("uncertainty_profile"), ) if report_mxalign.has_fails(): report_mxalign.console_print() @@ -46,9 +46,9 @@ def test_load_dataset_opens_anemoi_store_from_ewc() -> None: report_specs = validate_dataset( ds, - time=traits.get("time_profile"), - space=traits.get("space_profile"), - uncertainty=traits.get("uncertainty_profile"), + time=dataset_traits.get("time_profile"), + space=dataset_traits.get("space_profile"), + uncertainty=dataset_traits.get("uncertainty_profile"), ) if report_specs.has_fails(): report_specs.console_print() From d13c6fbad99292caef2ede561f6dd5f42da82401 Mon Sep 17 00:00:00 2001 From: Leif Denby Date: Fri, 20 Mar 2026 12:06:23 +0100 Subject: [PATCH 6/6] Rename traits to dataset_traits in harp integration test --- tests/test_harp_obstable_integration.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_harp_obstable_integration.py b/tests/test_harp_obstable_integration.py index 989cdb9..744ad02 100644 --- a/tests/test_harp_obstable_integration.py +++ b/tests/test_harp_obstable_integration.py @@ -25,7 +25,7 @@ def obstable_path() -> str: def test_load_dataset_opens_harp_obstable(obstable_path: str) -> None: """The harp.obstable loader can open and validate the sample SQLite file.""" - ds, traits = load_dataset( # type: ignore # load_dataset returns a tuple when return_dataset_traits=True + ds, dataset_traits = load_dataset( # type: ignore # load_dataset returns a tuple when return_dataset_traits=True obstable_path, loader=LOADER, return_dataset_traits=True, @@ -35,9 +35,9 @@ def test_load_dataset_opens_harp_obstable(obstable_path: str) -> None: # to ensure `mlwp-data-specs` behaves identically. It will eventually be removed. report_mxalign = validate_dataset_with_mxalign( ds, - time=traits.get("time_profile"), - space=traits.get("space_profile"), - uncertainty=traits.get("uncertainty_profile"), + time=dataset_traits.get("time_profile"), + space=dataset_traits.get("space_profile"), + uncertainty=dataset_traits.get("uncertainty_profile"), ) if report_mxalign.has_fails(): report_mxalign.console_print() @@ -45,9 +45,9 @@ def test_load_dataset_opens_harp_obstable(obstable_path: str) -> None: report_specs = validate_dataset( ds, - time=traits.get("time_profile"), - space=traits.get("space_profile"), - uncertainty=traits.get("uncertainty_profile"), + time=dataset_traits.get("time_profile"), + space=dataset_traits.get("space_profile"), + uncertainty=dataset_traits.get("uncertainty_profile"), ) if report_specs.has_fails(): report_specs.console_print()