From ed9b6f1c8a3ab204d9bbc0373adebdad3b13e1ba Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Tue, 21 Apr 2026 13:57:22 +0200 Subject: [PATCH 01/14] added to_netcdf() test to show the missed testcoverage --- tests/test_ddlpy.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py index 552bc37..908acda 100755 --- a/tests/test_ddlpy.py +++ b/tests/test_ddlpy.py @@ -735,6 +735,17 @@ def test_dataframe_to_xarray(measurements): assert ds_firsttime.tz is None +def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path): + """ + should be in test_utils.py + """ + ds_clean = ddlpy.dataframe_to_xarray( + df=measurements, + ) + file_out = tmp_path / "test.nc" + ds_clean.to_netcdf(file_out) + + def test_dataframe_to_xarray_drop_omschrijving(measurements): """ in case of non-unique Code/Omschrijving pairs, the Omschrijving variable should be From 6f8d5755f422eeb2757eda30145f29645d44dfe5 Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Tue, 21 Apr 2026 14:08:10 +0200 Subject: [PATCH 02/14] add netcdf4 dev dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 25e9958..0abe953 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,7 @@ examples = [ netcdf = [ "xarray", "h5netcdf", + "netcdf4", ] [project.scripts] From 3059dc4036bb182f497cf736d64ab7e285e6a34c Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Tue, 21 Apr 2026 14:15:00 +0200 Subject: [PATCH 03/14] check engine h5netcdf --- pyproject.toml | 1 - tests/test_ddlpy.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0abe953..25e9958 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,6 @@ examples = [ netcdf = [ "xarray", "h5netcdf", - "netcdf4", ] [project.scripts] diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py index 908acda..b4cc725 100755 --- a/tests/test_ddlpy.py +++ b/tests/test_ddlpy.py @@ -743,7 +743,7 @@ def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path): df=measurements, ) file_out = tmp_path / "test.nc" - ds_clean.to_netcdf(file_out) + ds_clean.to_netcdf(file_out, engine="h5netcdf") def test_dataframe_to_xarray_drop_omschrijving(measurements): From 1bf88048ad6f3d17e25c87de7ad31e8b3dd2158f Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Tue, 21 Apr 2026 14:18:39 +0200 Subject: [PATCH 04/14] add h5py --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 25e9958..5a4df17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,7 @@ examples = [ netcdf = [ "xarray", "h5netcdf", + "h5py", ] [project.scripts] From 3c53caa8e562017fa97e0fba25ee9b421c1503a5 Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Tue, 21 Apr 2026 14:27:10 +0200 Subject: [PATCH 05/14] add py3.10 to testing workflow --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 5b313d0..a7d9fa0 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: # we assume it also works for inbetween versions - python-version: ["3.9", "3.12", "3.14"] + python-version: ["3.9", "3.10", "3.12", "3.14"] os: [ubuntu-latest, windows-latest, macos-latest] runs-on: ${{ matrix.os }} From d80736ec120013f962dece9dc9b7de487729cb63 Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Tue, 21 Apr 2026 14:50:45 +0200 Subject: [PATCH 06/14] added netcdf4 teests --- pyproject.toml | 4 ++++ tests/test_ddlpy.py | 24 +++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5a4df17..b36c3f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,7 @@ netcdf = [ "xarray", "h5netcdf", "h5py", + "netcdf4", ] [project.scripts] @@ -87,6 +88,9 @@ testpaths = ["tests"] addopts = "--durations=0" filterwarnings = [ "error", + # temporarily ignore numpy incompatibility: https://github.com/Deltares/dfm_tools/issues/1243 + # this warning broke github tests for python 3.11, 3.12 and 3.13 + "ignore:numpy.ndarray size changed:RuntimeWarning", ] [tool.flake8] diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py index b4cc725..11dd26d 100755 --- a/tests/test_ddlpy.py +++ b/tests/test_ddlpy.py @@ -735,7 +735,7 @@ def test_dataframe_to_xarray(measurements): assert ds_firsttime.tz is None -def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path): +def test_dataframe_to_xarray_to_netcdf_h5netcdf(measurements, tmp_path): """ should be in test_utils.py """ @@ -746,6 +746,28 @@ def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path): ds_clean.to_netcdf(file_out, engine="h5netcdf") +def test_dataframe_to_xarray_to_netcdf_netcdf4(measurements, tmp_path): + """ + should be in test_utils.py + """ + ds_clean = ddlpy.dataframe_to_xarray( + df=measurements, + ) + file_out = tmp_path / "test.nc" + ds_clean.to_netcdf(file_out, engine="netcdf4") + + +def test_dataframe_to_xarray_to_netcdf_netcdf4_classic(measurements, tmp_path): + """ + should be in test_utils.py + """ + ds_clean = ddlpy.dataframe_to_xarray( + df=measurements, + ) + file_out = tmp_path / "test.nc" + ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC") + + def test_dataframe_to_xarray_drop_omschrijving(measurements): """ in case of non-unique Code/Omschrijving pairs, the Omschrijving variable should be From 99cdb3683998ad09900c7af58c94c08aa115fe4d Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Tue, 21 Apr 2026 16:28:19 +0200 Subject: [PATCH 07/14] test adding scipy dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index b36c3f1..f0539d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ netcdf = [ "h5netcdf", "h5py", "netcdf4", + "scipy", ] [project.scripts] From 135d822a8bc68123aa2d40fc0529a738e3b915fc Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Wed, 22 Apr 2026 10:04:42 +0200 Subject: [PATCH 08/14] restructured tests with conftest.py --- pyproject.toml | 1 - tests/conftest.py | 48 ++++++++++ tests/test_ddlpy.py | 228 -------------------------------------------- tests/test_utils.py | 183 ++++++++++++++++++++++++++++++++--- 4 files changed, 220 insertions(+), 240 deletions(-) create mode 100644 tests/conftest.py diff --git a/pyproject.toml b/pyproject.toml index f0539d1..b36c3f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,6 @@ netcdf = [ "h5netcdf", "h5py", "netcdf4", - "scipy", ] [project.scripts] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..f24de56 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Apr 22 09:56:57 2026 + +@author: veenstra +""" +import pytest +import ddlpy +import datetime as dt + + +@pytest.fixture +def endpoints(): + """ + Get the endpoints from the api + """ + endpoints = ddlpy.ddlpy.ENDPOINTS + return endpoints + + +@pytest.fixture +def locations(): + """return all locations""" + locations = ddlpy.locations() + return locations + + +@pytest.fixture +def location(locations): + """return sample location""" + bool_grootheid = locations["Grootheid.Code"] == "WATHTE" + bool_groepering = locations["Groepering.Code"] == "" + bool_procestype = locations["ProcesType"] == "meting" + location = locations[bool_grootheid & bool_groepering & bool_procestype].loc[ + "denhelder.marsdiep" + ] + return location + + +@pytest.fixture +def measurements(location): + """measurements for a location""" + start_date = dt.datetime(1953, 1, 1) + end_date = dt.datetime(1953, 4, 1) + measurements = ddlpy.measurements( + location, start_date=start_date, end_date=end_date + ) + return measurements diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py index 11dd26d..cd0fe86 100755 --- a/tests/test_ddlpy.py +++ b/tests/test_ddlpy.py @@ -20,44 +20,6 @@ } -@pytest.fixture(scope="session") -def endpoints(): - """ - Get the endpoints from the api - """ - endpoints = ddlpy.ddlpy.ENDPOINTS - return endpoints - - -@pytest.fixture(scope="session") -def locations(): - """return all locations""" - locations = ddlpy.locations() - return locations - - -@pytest.fixture(scope="session") -def location(locations): - """return sample location""" - bool_grootheid = locations["Grootheid.Code"] == "WATHTE" - bool_groepering = locations["Groepering.Code"] == "" - bool_procestype = locations["ProcesType"] == "meting" - location = locations[bool_grootheid & bool_groepering & bool_procestype].loc[ - "denhelder.marsdiep" - ] - return location - - -@pytest.fixture(scope="session") -def measurements(location): - """measurements for a location""" - start_date = dt.datetime(1953, 1, 1) - end_date = dt.datetime(1953, 4, 1) - measurements = ddlpy.measurements( - location, start_date=start_date, end_date=end_date - ) - return measurements - def test_send_post_request_errors_wrongapi(): url = "https://ddapi20-waterwebservices.rijkswaterstaat.nl/ONLINEWAARNEMINGENSERVICES/OphalenCatalogus" @@ -613,193 +575,3 @@ def test_check_convert_wrongorder(): with pytest.raises(ValueError): _, _ = ddlpy.ddlpy._check_convert_dates(end_date, start_date) - -def test_simplify_dataframe(measurements): - """ - should be in test_utils.py - """ - assert len(measurements.columns) == 48 - meas_simple = ddlpy.simplify_dataframe(measurements) - assert hasattr(meas_simple, "attrs") - # TODO: the below should be 47 and 1, but there are still RIKZ_WAT instances in - # OpdrachtgevendeInstantie column, which is different from RIKZMON_WAT - # this also probably partly causes the 96 duplicated timestamps - # https://github.com/Rijkswaterstaat/WaterWebservices/issues/16 - assert len(meas_simple.attrs) == 46 - assert len(meas_simple.columns) == 2 - expected_columns = [ - "WaarnemingMetadata.OpdrachtgevendeInstantie", - "Meetwaarde.Waarde_Numeriek", - ] - assert set(meas_simple.columns) == set(expected_columns) - - -def test_simplify_dataframe_always_preserve(measurements): - """ - should be in test_utils.py - """ - assert len(measurements.columns) == 48 - always_preserve = [ - "WaarnemingMetadata.Statuswaarde", - "WaarnemingMetadata.OpdrachtgevendeInstantie", - "WaarnemingMetadata.Kwaliteitswaardecode", - "Groepering.Code", - "BemonsteringsApparaat.Code", - "Meetwaarde.Waarde_Numeriek", - ] - meas_simple = ddlpy.simplify_dataframe( - measurements, always_preserve=always_preserve - ) - assert hasattr(meas_simple, "attrs") - assert len(meas_simple.attrs) == 42 - assert len(meas_simple.columns) == 6 - expected_columns = [ - "WaarnemingMetadata.Statuswaarde", - "WaarnemingMetadata.OpdrachtgevendeInstantie", - "WaarnemingMetadata.Kwaliteitswaardecode", - "Groepering.Code", - "BemonsteringsApparaat.Code", - "Meetwaarde.Waarde_Numeriek", - ] - assert set(meas_simple.columns) == set(expected_columns) - - -def test_simplify_dataframe_always_preserve_invalid_key(measurements): - """ - should be in test_utils.py - """ - assert len(measurements.columns) == 48 - always_preserve = ["invalid_key"] - with pytest.raises(ValueError) as e: - _ = ddlpy.simplify_dataframe(measurements, always_preserve=always_preserve) - assert "column 'invalid_key' not present in dataframe" in str(e.value) - - -def test_simplify_dataframe_alfanumeriek_with_nan_dropped(locations): - bool_grootheid = locations["Grootheid.Code"] == "WATHTE" - bool_groepering = locations["Groepering.Code"] == "" - bool_procestype = locations["ProcesType"] == "meting" - location = locations[bool_grootheid & bool_groepering & bool_procestype].loc["a12"] - - start_date = dt.datetime(2009, 1, 1) - end_date = dt.datetime(2009, 4, 1) - measurements = ddlpy.measurements( - location, start_date=start_date, end_date=end_date - ) - meas_simple = ddlpy.simplify_dataframe(df=measurements) - expected_columns = [ - "WaarnemingMetadata.Kwaliteitswaardecode", - "Meetwaarde.Waarde_Numeriek", - ] - assert set(meas_simple.columns) == set(expected_columns) - - -def test_dataframe_to_xarray(measurements): - """ - should be in test_utils.py - """ - always_preserve = [ - "WaarnemingMetadata.Statuswaarde", - "WaarnemingMetadata.Kwaliteitswaardecode", - "MeetApparaat.Code", - "WaardeBepalingsMethode.Code", - "Meetwaarde.Waarde_Numeriek", - ] - ds_clean = ddlpy.dataframe_to_xarray( - df=measurements, - always_preserve=always_preserve, - ) - - non_constant_columns = [ - "WaarnemingMetadata.OpdrachtgevendeInstantie", - "Meetwaarde.Waarde_Numeriek", - ] - - preserved = always_preserve + non_constant_columns - - for varname in measurements.columns: - # check if all varnames in always_preserve and non-constant columns are indeed preserved as variables - if varname in preserved: - assert varname in ds_clean.data_vars - assert varname not in ds_clean.attrs.keys() - else: - assert varname not in ds_clean.data_vars - assert varname in ds_clean.attrs.keys() - varname_oms = varname.replace(".Code", ".Omschrijving") - assert varname_oms in ds_clean.attrs.keys() - - # check if times and timezone are correct - refdate_utc = measurements.tz_convert(None).index[0] - ds_firsttime = ds_clean.time.to_pandas().iloc[0] - assert refdate_utc == ds_firsttime - assert ds_firsttime.tz is None - - -def test_dataframe_to_xarray_to_netcdf_h5netcdf(measurements, tmp_path): - """ - should be in test_utils.py - """ - ds_clean = ddlpy.dataframe_to_xarray( - df=measurements, - ) - file_out = tmp_path / "test.nc" - ds_clean.to_netcdf(file_out, engine="h5netcdf") - - -def test_dataframe_to_xarray_to_netcdf_netcdf4(measurements, tmp_path): - """ - should be in test_utils.py - """ - ds_clean = ddlpy.dataframe_to_xarray( - df=measurements, - ) - file_out = tmp_path / "test.nc" - ds_clean.to_netcdf(file_out, engine="netcdf4") - - -def test_dataframe_to_xarray_to_netcdf_netcdf4_classic(measurements, tmp_path): - """ - should be in test_utils.py - """ - ds_clean = ddlpy.dataframe_to_xarray( - df=measurements, - ) - file_out = tmp_path / "test.nc" - ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC") - - -def test_dataframe_to_xarray_drop_omschrijving(measurements): - """ - in case of non-unique Code/Omschrijving pairs, the Omschrijving variable should be - dropped also. The information it contains is added as attrs to the Code value. - """ - # make MeetApparaat non-unique - measurements.loc["1953-01-01 02:40:00+01:00", "MeetApparaat.Code"] = "newcode" - measurements.loc["1953-01-01 02:40:00+01:00", "MeetApparaat.Omschrijving"] = ( - "newoms" - ) - - always_preserve = [ - "WaarnemingMetadata.Statuswaarde", - "WaarnemingMetadata.Kwaliteitswaardecode", - "WaardeBepalingsMethode.Code", - "Meetwaarde.Waarde_Numeriek", - ] - - ds = ddlpy.dataframe_to_xarray(measurements, always_preserve=always_preserve) - for varn in ds.data_vars: - assert not varn.endswith(".Omschrijving") - - expected_attrs = {"newcode": "newoms", "10272": "other:Vlotterniveaumeter"} - assert ds["MeetApparaat.Code"].attrs == expected_attrs - - -def test_code_description_attrs_from_dataframe_prevent_empty(measurements): - """ - should be in test_utils.py - https://github.com/Deltares/ddlpy/issues/156 - """ - assert "" in measurements["Groepering.Code"].unique() - attr_dict = ddlpy.utils.code_description_attrs_from_dataframe(measurements) - for attr_key_value_pairs in attr_dict.values(): - assert "" not in attr_key_value_pairs.keys() diff --git a/tests/test_utils.py b/tests/test_utils.py index 67e1a59..f0fe5e1 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,29 +4,190 @@ """Tests for `utils` package.""" from ddlpy.utils import date_series -import datetime +import datetime as dt +import ddlpy +import pytest def test_date_series(): """Sample pytest test function with the pytest fixture as an argument.""" # from bs4 import BeautifulSoup # assert 'GitHub' in BeautifulSoup(response.content).title.string - start = datetime.datetime(2018, 1, 1) - end = datetime.datetime(2018, 3, 1) + start = dt.datetime(2018, 1, 1) + end = dt.datetime(2018, 3, 1) result = date_series(start, end) expected = [ - (datetime.datetime(2018, 1, 1, 0, 0), datetime.datetime(2018, 2, 1, 0, 0)), - (datetime.datetime(2018, 2, 1, 0, 0), datetime.datetime(2018, 3, 1, 0, 0)), + (dt.datetime(2018, 1, 1, 0, 0), dt.datetime(2018, 2, 1, 0, 0)), + (dt.datetime(2018, 2, 1, 0, 0), dt.datetime(2018, 3, 1, 0, 0)), ] assert result == expected - start = datetime.datetime(2017, 11, 15) - end = datetime.datetime(2018, 3, 5) + start = dt.datetime(2017, 11, 15) + end = dt.datetime(2018, 3, 5) result = date_series(start, end) expected = [ - (datetime.datetime(2017, 11, 15, 0, 0), datetime.datetime(2017, 12, 15, 0, 0)), - (datetime.datetime(2017, 12, 15, 0, 0), datetime.datetime(2018, 1, 15, 0, 0)), - (datetime.datetime(2018, 1, 15, 0, 0), datetime.datetime(2018, 2, 15, 0, 0)), - (datetime.datetime(2018, 2, 15, 0, 0), datetime.datetime(2018, 3, 5, 0, 0)), + (dt.datetime(2017, 11, 15, 0, 0), dt.datetime(2017, 12, 15, 0, 0)), + (dt.datetime(2017, 12, 15, 0, 0), dt.datetime(2018, 1, 15, 0, 0)), + (dt.datetime(2018, 1, 15, 0, 0), dt.datetime(2018, 2, 15, 0, 0)), + (dt.datetime(2018, 2, 15, 0, 0), dt.datetime(2018, 3, 5, 0, 0)), ] assert result == expected + + +def test_simplify_dataframe(measurements): + assert len(measurements.columns) == 48 + meas_simple = ddlpy.simplify_dataframe(measurements) + assert hasattr(meas_simple, "attrs") + # TODO: the below should be 47 and 1, but there are still RIKZ_WAT instances in + # OpdrachtgevendeInstantie column, which is different from RIKZMON_WAT + # this also probably partly causes the 96 duplicated timestamps + # https://github.com/Rijkswaterstaat/WaterWebservices/issues/16 + assert len(meas_simple.attrs) == 46 + assert len(meas_simple.columns) == 2 + expected_columns = [ + "WaarnemingMetadata.OpdrachtgevendeInstantie", + "Meetwaarde.Waarde_Numeriek", + ] + assert set(meas_simple.columns) == set(expected_columns) + + +def test_simplify_dataframe_always_preserve(measurements): + assert len(measurements.columns) == 48 + always_preserve = [ + "WaarnemingMetadata.Statuswaarde", + "WaarnemingMetadata.OpdrachtgevendeInstantie", + "WaarnemingMetadata.Kwaliteitswaardecode", + "Groepering.Code", + "BemonsteringsApparaat.Code", + "Meetwaarde.Waarde_Numeriek", + ] + meas_simple = ddlpy.simplify_dataframe( + measurements, always_preserve=always_preserve + ) + assert hasattr(meas_simple, "attrs") + assert len(meas_simple.attrs) == 42 + assert len(meas_simple.columns) == 6 + expected_columns = [ + "WaarnemingMetadata.Statuswaarde", + "WaarnemingMetadata.OpdrachtgevendeInstantie", + "WaarnemingMetadata.Kwaliteitswaardecode", + "Groepering.Code", + "BemonsteringsApparaat.Code", + "Meetwaarde.Waarde_Numeriek", + ] + assert set(meas_simple.columns) == set(expected_columns) + + +def test_simplify_dataframe_always_preserve_invalid_key(measurements): + assert len(measurements.columns) == 48 + always_preserve = ["invalid_key"] + with pytest.raises(ValueError) as e: + _ = ddlpy.simplify_dataframe(measurements, always_preserve=always_preserve) + assert "column 'invalid_key' not present in dataframe" in str(e.value) + + +def test_simplify_dataframe_alfanumeriek_with_nan_dropped(locations): + bool_grootheid = locations["Grootheid.Code"] == "WATHTE" + bool_groepering = locations["Groepering.Code"] == "" + bool_procestype = locations["ProcesType"] == "meting" + location = locations[bool_grootheid & bool_groepering & bool_procestype].loc["a12"] + + start_date = dt.datetime(2009, 1, 1) + end_date = dt.datetime(2009, 4, 1) + measurements = ddlpy.measurements( + location, start_date=start_date, end_date=end_date + ) + meas_simple = ddlpy.simplify_dataframe(df=measurements) + expected_columns = [ + "WaarnemingMetadata.Kwaliteitswaardecode", + "Meetwaarde.Waarde_Numeriek", + ] + assert set(meas_simple.columns) == set(expected_columns) + + +def test_dataframe_to_xarray(measurements): + always_preserve = [ + "WaarnemingMetadata.Statuswaarde", + "WaarnemingMetadata.Kwaliteitswaardecode", + "MeetApparaat.Code", + "WaardeBepalingsMethode.Code", + "Meetwaarde.Waarde_Numeriek", + ] + ds_clean = ddlpy.dataframe_to_xarray( + df=measurements, + always_preserve=always_preserve, + ) + + non_constant_columns = [ + "WaarnemingMetadata.OpdrachtgevendeInstantie", + "Meetwaarde.Waarde_Numeriek", + ] + + preserved = always_preserve + non_constant_columns + + for varname in measurements.columns: + # check if all varnames in always_preserve and non-constant columns are indeed preserved as variables + if varname in preserved: + assert varname in ds_clean.data_vars + assert varname not in ds_clean.attrs.keys() + else: + assert varname not in ds_clean.data_vars + assert varname in ds_clean.attrs.keys() + varname_oms = varname.replace(".Code", ".Omschrijving") + assert varname_oms in ds_clean.attrs.keys() + + # check if times and timezone are correct + refdate_utc = measurements.tz_convert(None).index[0] + ds_firsttime = ds_clean.time.to_pandas().iloc[0] + assert refdate_utc == ds_firsttime + assert ds_firsttime.tz is None + + +def test_dataframe_to_xarray_drop_omschrijving(measurements): + """ + in case of non-unique Code/Omschrijving pairs, the Omschrijving variable should be + dropped also. The information it contains is added as attrs to the Code value. + """ + # make MeetApparaat non-unique + measurements.loc["1953-01-01 02:40:00+01:00", "MeetApparaat.Code"] = "newcode" + measurements.loc["1953-01-01 02:40:00+01:00", "MeetApparaat.Omschrijving"] = ( + "newoms" + ) + + always_preserve = [ + "WaarnemingMetadata.Statuswaarde", + "WaarnemingMetadata.Kwaliteitswaardecode", + "WaardeBepalingsMethode.Code", + "Meetwaarde.Waarde_Numeriek", + ] + + ds = ddlpy.dataframe_to_xarray(measurements, always_preserve=always_preserve) + for varn in ds.data_vars: + assert not varn.endswith(".Omschrijving") + + expected_attrs = {"newcode": "newoms", "10272": "other:Vlotterniveaumeter"} + assert ds["MeetApparaat.Code"].attrs == expected_attrs + + +def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path): + ds_clean = ddlpy.dataframe_to_xarray( + df=measurements, + ) + file_out = tmp_path / "test_default.nc" + ds_clean.to_netcdf(file_out, engine=None) + file_out = tmp_path / "test_h5netcdf.nc" + ds_clean.to_netcdf(file_out, engine="h5netcdf") + file_out = tmp_path / "test_netcdf4.nc" + ds_clean.to_netcdf(file_out, engine="netcdf4") + file_out = tmp_path / "test_netcdf4_classic.nc" + ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC") + + +def test_code_description_attrs_from_dataframe_prevent_empty(measurements): + """ + https://github.com/Deltares/ddlpy/issues/156 + """ + assert "" in measurements["Groepering.Code"].unique() + attr_dict = ddlpy.utils.code_description_attrs_from_dataframe(measurements) + for attr_key_value_pairs in attr_dict.values(): + assert "" not in attr_key_value_pairs.keys() From 6297fe293fdcea87175702b5cd954a15ec88400c Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Wed, 22 Apr 2026 10:07:29 +0200 Subject: [PATCH 09/14] black --- pyproject.toml | 2 +- tests/conftest.py | 1 + tests/test_ddlpy.py | 2 -- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b36c3f1..9db54c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ Issues = "https://github.com/deltares/ddlpy/issues" [project.optional-dependencies] dev = [ "bump2version>=0.5.11", - "flake8", + "black", "pytest>=3.8.2", "pytest-cov", "twine", diff --git a/tests/conftest.py b/tests/conftest.py index f24de56..bc8f8aa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ @author: veenstra """ + import pytest import ddlpy import datetime as dt diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py index cd0fe86..56ae2a8 100755 --- a/tests/test_ddlpy.py +++ b/tests/test_ddlpy.py @@ -20,7 +20,6 @@ } - def test_send_post_request_errors_wrongapi(): url = "https://ddapi20-waterwebservices.rijkswaterstaat.nl/ONLINEWAARNEMINGENSERVICES/OphalenCatalogus" with pytest.raises(IOError) as e: @@ -574,4 +573,3 @@ def test_check_convert_wrongorder(): # assert output with pytest.raises(ValueError): _, _ = ddlpy.ddlpy._check_convert_dates(end_date, start_date) - From 99942ace3903f6b889ea5ad6f859ac90d404b2a2 Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Wed, 22 Apr 2026 11:18:38 +0200 Subject: [PATCH 10/14] session scoped fixtures --- tests/conftest.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bc8f8aa..6ad95ac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ import datetime as dt -@pytest.fixture +@pytest.fixture(scope="session") def endpoints(): """ Get the endpoints from the api @@ -19,14 +19,14 @@ def endpoints(): return endpoints -@pytest.fixture +@pytest.fixture(scope="session") def locations(): """return all locations""" locations = ddlpy.locations() return locations -@pytest.fixture +@pytest.fixture(scope="session") def location(locations): """return sample location""" bool_grootheid = locations["Grootheid.Code"] == "WATHTE" @@ -38,7 +38,7 @@ def location(locations): return location -@pytest.fixture +@pytest.fixture(scope="session") def measurements(location): """measurements for a location""" start_date = dt.datetime(1953, 1, 1) From cff0bdf33bd58a569bcc5c67d7df61c95af39cb0 Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Wed, 22 Apr 2026 11:26:33 +0200 Subject: [PATCH 11/14] update comment --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9db54c3..94b457c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,7 +88,7 @@ testpaths = ["tests"] addopts = "--durations=0" filterwarnings = [ "error", - # temporarily ignore numpy incompatibility: https://github.com/Deltares/dfm_tools/issues/1243 + # temporarily ignore numpy incompatibility: https://github.com/Deltares/ddlpy/issues/192 # this warning broke github tests for python 3.11, 3.12 and 3.13 "ignore:numpy.ndarray size changed:RuntimeWarning", ] From 9b1e735ebbca63c42ccfc40d4cf49fc57f4d7857 Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Wed, 22 Apr 2026 11:26:53 +0200 Subject: [PATCH 12/14] minimize dependencies --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 94b457c..c9d11d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,6 @@ examples = [ netcdf = [ "xarray", "h5netcdf", - "h5py", "netcdf4", ] From 15425999acc95845f62136b9f05834e7fdb417b9 Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Wed, 22 Apr 2026 11:38:24 +0200 Subject: [PATCH 13/14] dependencies --- pyproject.toml | 2 +- tests/test_utils.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c9d11d2..b7ee1be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ examples = [ ] netcdf = [ "xarray", - "h5netcdf", + "h5netcdf[h5py]", "netcdf4", ] diff --git a/tests/test_utils.py b/tests/test_utils.py index f0fe5e1..cabbb2b 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -169,18 +169,18 @@ def test_dataframe_to_xarray_drop_omschrijving(measurements): assert ds["MeetApparaat.Code"].attrs == expected_attrs -def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path): +@pytest.mark.parametrize("engine", [None, "h5netcdf", "netcdf4", "netcdf4_classic"]) +def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path, engine): ds_clean = ddlpy.dataframe_to_xarray( df=measurements, ) - file_out = tmp_path / "test_default.nc" - ds_clean.to_netcdf(file_out, engine=None) - file_out = tmp_path / "test_h5netcdf.nc" - ds_clean.to_netcdf(file_out, engine="h5netcdf") - file_out = tmp_path / "test_netcdf4.nc" - ds_clean.to_netcdf(file_out, engine="netcdf4") - file_out = tmp_path / "test_netcdf4_classic.nc" - ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC") + + file_out = tmp_path / f"test_{engine}.nc" + + if engine == "netcdf4_classic": + ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC") + else: + ds_clean.to_netcdf(file_out, engine=engine) def test_code_description_attrs_from_dataframe_prevent_empty(measurements): From 6a1680472719fbe685b74a9887212213c91a64a6 Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Wed, 22 Apr 2026 11:39:54 +0200 Subject: [PATCH 14/14] black --- tests/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index cabbb2b..73c3af6 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -174,9 +174,9 @@ def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path, engine): ds_clean = ddlpy.dataframe_to_xarray( df=measurements, ) - + file_out = tmp_path / f"test_{engine}.nc" - + if engine == "netcdf4_classic": ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC") else: