From ed9b6f1c8a3ab204d9bbc0373adebdad3b13e1ba Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Tue, 21 Apr 2026 13:57:22 +0200
Subject: [PATCH 01/14] added to_netcdf() test to show the missed testcoverage

---
 tests/test_ddlpy.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py
index 552bc37..908acda 100755
--- a/tests/test_ddlpy.py
+++ b/tests/test_ddlpy.py
@@ -735,6 +735,17 @@ def test_dataframe_to_xarray(measurements):
     assert ds_firsttime.tz is None
 
 
+def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path):
+    """
+    should be in test_utils.py
+    """
+    ds_clean = ddlpy.dataframe_to_xarray(
+        df=measurements,
+    )
+    file_out = tmp_path / "test.nc"
+    ds_clean.to_netcdf(file_out)
+
+
 def test_dataframe_to_xarray_drop_omschrijving(measurements):
     """
     in case of non-unique Code/Omschrijving pairs, the Omschrijving variable should be

From 6f8d5755f422eeb2757eda30145f29645d44dfe5 Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Tue, 21 Apr 2026 14:08:10 +0200
Subject: [PATCH 02/14] add netcdf4 dev dependency

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 25e9958..0abe953 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,6 +73,7 @@ examples = [
 netcdf = [
 	"xarray",
 	"h5netcdf",
+	"netcdf4",
 ]
 
 [project.scripts]

From 3059dc4036bb182f497cf736d64ab7e285e6a34c Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Tue, 21 Apr 2026 14:15:00 +0200
Subject: [PATCH 03/14] check engine h5netcdf

---
 pyproject.toml      | 1 -
 tests/test_ddlpy.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0abe953..25e9958 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,7 +73,6 @@ examples = [
 netcdf = [
 	"xarray",
 	"h5netcdf",
-	"netcdf4",
 ]
 
 [project.scripts]
diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py
index 908acda..b4cc725 100755
--- a/tests/test_ddlpy.py
+++ b/tests/test_ddlpy.py
@@ -743,7 +743,7 @@ def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path):
         df=measurements,
     )
     file_out = tmp_path / "test.nc"
-    ds_clean.to_netcdf(file_out)
+    ds_clean.to_netcdf(file_out, engine="h5netcdf")
 
 
 def test_dataframe_to_xarray_drop_omschrijving(measurements):

From 1bf88048ad6f3d17e25c87de7ad31e8b3dd2158f Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Tue, 21 Apr 2026 14:18:39 +0200
Subject: [PATCH 04/14] add h5py

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 25e9958..5a4df17 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,6 +73,7 @@ examples = [
 netcdf = [
 	"xarray",
 	"h5netcdf",
+	"h5py",
 ]
 
 [project.scripts]

From 3c53caa8e562017fa97e0fba25ee9b421c1503a5 Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Tue, 21 Apr 2026 14:27:10 +0200
Subject: [PATCH 05/14] add py3.10 to testing workflow

---
 .github/workflows/pytest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 5b313d0..a7d9fa0 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -15,7 +15,7 @@ jobs:
       fail-fast: false
       matrix:
         # we assume it also works for inbetween versions
-        python-version: ["3.9", "3.12", "3.14"]
+        python-version: ["3.9", "3.10", "3.12", "3.14"]
         os: [ubuntu-latest, windows-latest, macos-latest]
     runs-on: ${{ matrix.os }}
 

From d80736ec120013f962dece9dc9b7de487729cb63 Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Tue, 21 Apr 2026 14:50:45 +0200
Subject: [PATCH 06/14] added netcdf4 teests

---
 pyproject.toml      |  4 ++++
 tests/test_ddlpy.py | 24 +++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5a4df17..b36c3f1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,6 +74,7 @@ netcdf = [
 	"xarray",
 	"h5netcdf",
 	"h5py",
+	"netcdf4",
 ]
 
 [project.scripts]
@@ -87,6 +88,9 @@ testpaths = ["tests"]
 addopts = "--durations=0"
 filterwarnings = [
 	"error",
+	# temporarily ignore numpy incompatibility: https://github.com/Deltares/dfm_tools/issues/1243
+	# this warning broke github tests for python 3.11, 3.12 and 3.13
+	"ignore:numpy.ndarray size changed:RuntimeWarning",
 	]
 
 [tool.flake8]
diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py
index b4cc725..11dd26d 100755
--- a/tests/test_ddlpy.py
+++ b/tests/test_ddlpy.py
@@ -735,7 +735,7 @@ def test_dataframe_to_xarray(measurements):
     assert ds_firsttime.tz is None
 
 
-def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path):
+def test_dataframe_to_xarray_to_netcdf_h5netcdf(measurements, tmp_path):
     """
     should be in test_utils.py
     """
@@ -746,6 +746,28 @@ def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path):
     ds_clean.to_netcdf(file_out, engine="h5netcdf")
 
 
+def test_dataframe_to_xarray_to_netcdf_netcdf4(measurements, tmp_path):
+    """
+    should be in test_utils.py
+    """
+    ds_clean = ddlpy.dataframe_to_xarray(
+        df=measurements,
+    )
+    file_out = tmp_path / "test.nc"
+    ds_clean.to_netcdf(file_out, engine="netcdf4")
+
+
+def test_dataframe_to_xarray_to_netcdf_netcdf4_classic(measurements, tmp_path):
+    """
+    should be in test_utils.py
+    """
+    ds_clean = ddlpy.dataframe_to_xarray(
+        df=measurements,
+    )
+    file_out = tmp_path / "test.nc"
+    ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC")
+
+
 def test_dataframe_to_xarray_drop_omschrijving(measurements):
     """
     in case of non-unique Code/Omschrijving pairs, the Omschrijving variable should be

From 99cdb3683998ad09900c7af58c94c08aa115fe4d Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Tue, 21 Apr 2026 16:28:19 +0200
Subject: [PATCH 07/14] test adding scipy dependency

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index b36c3f1..f0539d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,6 +75,7 @@ netcdf = [
 	"h5netcdf",
 	"h5py",
 	"netcdf4",
+	"scipy",
 ]
 
 [project.scripts]

From 135d822a8bc68123aa2d40fc0529a738e3b915fc Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Wed, 22 Apr 2026 10:04:42 +0200
Subject: [PATCH 08/14] restructured tests with conftest.py

---
 pyproject.toml      |   1 -
 tests/conftest.py   |  48 ++++++++++
 tests/test_ddlpy.py | 228 --------------------------------------------
 tests/test_utils.py | 183 ++++++++++++++++++++++++++++++++---
 4 files changed, 220 insertions(+), 240 deletions(-)
 create mode 100644 tests/conftest.py

diff --git a/pyproject.toml b/pyproject.toml
index f0539d1..b36c3f1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,7 +75,6 @@ netcdf = [
 	"h5netcdf",
 	"h5py",
 	"netcdf4",
-	"scipy",
 ]
 
 [project.scripts]
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..f24de56
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Apr 22 09:56:57 2026
+
+@author: veenstra
+"""
+import pytest
+import ddlpy
+import datetime as dt
+
+
+@pytest.fixture
+def endpoints():
+    """
+    Get the endpoints from the api
+    """
+    endpoints = ddlpy.ddlpy.ENDPOINTS
+    return endpoints
+
+
+@pytest.fixture
+def locations():
+    """return all locations"""
+    locations = ddlpy.locations()
+    return locations
+
+
+@pytest.fixture
+def location(locations):
+    """return sample location"""
+    bool_grootheid = locations["Grootheid.Code"] == "WATHTE"
+    bool_groepering = locations["Groepering.Code"] == ""
+    bool_procestype = locations["ProcesType"] == "meting"
+    location = locations[bool_grootheid & bool_groepering & bool_procestype].loc[
+        "denhelder.marsdiep"
+    ]
+    return location
+
+
+@pytest.fixture
+def measurements(location):
+    """measurements for a location"""
+    start_date = dt.datetime(1953, 1, 1)
+    end_date = dt.datetime(1953, 4, 1)
+    measurements = ddlpy.measurements(
+        location, start_date=start_date, end_date=end_date
+    )
+    return measurements
diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py
index 11dd26d..cd0fe86 100755
--- a/tests/test_ddlpy.py
+++ b/tests/test_ddlpy.py
@@ -20,44 +20,6 @@
 }
 
 
-@pytest.fixture(scope="session")
-def endpoints():
-    """
-    Get the endpoints from the api
-    """
-    endpoints = ddlpy.ddlpy.ENDPOINTS
-    return endpoints
-
-
-@pytest.fixture(scope="session")
-def locations():
-    """return all locations"""
-    locations = ddlpy.locations()
-    return locations
-
-
-@pytest.fixture(scope="session")
-def location(locations):
-    """return sample location"""
-    bool_grootheid = locations["Grootheid.Code"] == "WATHTE"
-    bool_groepering = locations["Groepering.Code"] == ""
-    bool_procestype = locations["ProcesType"] == "meting"
-    location = locations[bool_grootheid & bool_groepering & bool_procestype].loc[
-        "denhelder.marsdiep"
-    ]
-    return location
-
-
-@pytest.fixture(scope="session")
-def measurements(location):
-    """measurements for a location"""
-    start_date = dt.datetime(1953, 1, 1)
-    end_date = dt.datetime(1953, 4, 1)
-    measurements = ddlpy.measurements(
-        location, start_date=start_date, end_date=end_date
-    )
-    return measurements
-
 
 def test_send_post_request_errors_wrongapi():
     url = "https://ddapi20-waterwebservices.rijkswaterstaat.nl/ONLINEWAARNEMINGENSERVICES/OphalenCatalogus"
@@ -613,193 +575,3 @@ def test_check_convert_wrongorder():
     with pytest.raises(ValueError):
         _, _ = ddlpy.ddlpy._check_convert_dates(end_date, start_date)
 
-
-def test_simplify_dataframe(measurements):
-    """
-    should be in test_utils.py
-    """
-    assert len(measurements.columns) == 48
-    meas_simple = ddlpy.simplify_dataframe(measurements)
-    assert hasattr(meas_simple, "attrs")
-    # TODO: the below should be 47 and 1, but there are still RIKZ_WAT instances in
-    # OpdrachtgevendeInstantie column, which is different from RIKZMON_WAT
-    # this also probably partly causes the 96 duplicated timestamps
-    # https://github.com/Rijkswaterstaat/WaterWebservices/issues/16
-    assert len(meas_simple.attrs) == 46
-    assert len(meas_simple.columns) == 2
-    expected_columns = [
-        "WaarnemingMetadata.OpdrachtgevendeInstantie",
-        "Meetwaarde.Waarde_Numeriek",
-    ]
-    assert set(meas_simple.columns) == set(expected_columns)
-
-
-def test_simplify_dataframe_always_preserve(measurements):
-    """
-    should be in test_utils.py
-    """
-    assert len(measurements.columns) == 48
-    always_preserve = [
-        "WaarnemingMetadata.Statuswaarde",
-        "WaarnemingMetadata.OpdrachtgevendeInstantie",
-        "WaarnemingMetadata.Kwaliteitswaardecode",
-        "Groepering.Code",
-        "BemonsteringsApparaat.Code",
-        "Meetwaarde.Waarde_Numeriek",
-    ]
-    meas_simple = ddlpy.simplify_dataframe(
-        measurements, always_preserve=always_preserve
-    )
-    assert hasattr(meas_simple, "attrs")
-    assert len(meas_simple.attrs) == 42
-    assert len(meas_simple.columns) == 6
-    expected_columns = [
-        "WaarnemingMetadata.Statuswaarde",
-        "WaarnemingMetadata.OpdrachtgevendeInstantie",
-        "WaarnemingMetadata.Kwaliteitswaardecode",
-        "Groepering.Code",
-        "BemonsteringsApparaat.Code",
-        "Meetwaarde.Waarde_Numeriek",
-    ]
-    assert set(meas_simple.columns) == set(expected_columns)
-
-
-def test_simplify_dataframe_always_preserve_invalid_key(measurements):
-    """
-    should be in test_utils.py
-    """
-    assert len(measurements.columns) == 48
-    always_preserve = ["invalid_key"]
-    with pytest.raises(ValueError) as e:
-        _ = ddlpy.simplify_dataframe(measurements, always_preserve=always_preserve)
-    assert "column 'invalid_key' not present in dataframe" in str(e.value)
-
-
-def test_simplify_dataframe_alfanumeriek_with_nan_dropped(locations):
-    bool_grootheid = locations["Grootheid.Code"] == "WATHTE"
-    bool_groepering = locations["Groepering.Code"] == ""
-    bool_procestype = locations["ProcesType"] == "meting"
-    location = locations[bool_grootheid & bool_groepering & bool_procestype].loc["a12"]
-
-    start_date = dt.datetime(2009, 1, 1)
-    end_date = dt.datetime(2009, 4, 1)
-    measurements = ddlpy.measurements(
-        location, start_date=start_date, end_date=end_date
-    )
-    meas_simple = ddlpy.simplify_dataframe(df=measurements)
-    expected_columns = [
-        "WaarnemingMetadata.Kwaliteitswaardecode",
-        "Meetwaarde.Waarde_Numeriek",
-    ]
-    assert set(meas_simple.columns) == set(expected_columns)
-
-
-def test_dataframe_to_xarray(measurements):
-    """
-    should be in test_utils.py
-    """
-    always_preserve = [
-        "WaarnemingMetadata.Statuswaarde",
-        "WaarnemingMetadata.Kwaliteitswaardecode",
-        "MeetApparaat.Code",
-        "WaardeBepalingsMethode.Code",
-        "Meetwaarde.Waarde_Numeriek",
-    ]
-    ds_clean = ddlpy.dataframe_to_xarray(
-        df=measurements,
-        always_preserve=always_preserve,
-    )
-
-    non_constant_columns = [
-        "WaarnemingMetadata.OpdrachtgevendeInstantie",
-        "Meetwaarde.Waarde_Numeriek",
-    ]
-
-    preserved = always_preserve + non_constant_columns
-
-    for varname in measurements.columns:
-        # check if all varnames in always_preserve and non-constant columns are indeed preserved as variables
-        if varname in preserved:
-            assert varname in ds_clean.data_vars
-            assert varname not in ds_clean.attrs.keys()
-        else:
-            assert varname not in ds_clean.data_vars
-            assert varname in ds_clean.attrs.keys()
-            varname_oms = varname.replace(".Code", ".Omschrijving")
-            assert varname_oms in ds_clean.attrs.keys()
-
-    # check if times and timezone are correct
-    refdate_utc = measurements.tz_convert(None).index[0]
-    ds_firsttime = ds_clean.time.to_pandas().iloc[0]
-    assert refdate_utc == ds_firsttime
-    assert ds_firsttime.tz is None
-
-
-def test_dataframe_to_xarray_to_netcdf_h5netcdf(measurements, tmp_path):
-    """
-    should be in test_utils.py
-    """
-    ds_clean = ddlpy.dataframe_to_xarray(
-        df=measurements,
-    )
-    file_out = tmp_path / "test.nc"
-    ds_clean.to_netcdf(file_out, engine="h5netcdf")
-
-
-def test_dataframe_to_xarray_to_netcdf_netcdf4(measurements, tmp_path):
-    """
-    should be in test_utils.py
-    """
-    ds_clean = ddlpy.dataframe_to_xarray(
-        df=measurements,
-    )
-    file_out = tmp_path / "test.nc"
-    ds_clean.to_netcdf(file_out, engine="netcdf4")
-
-
-def test_dataframe_to_xarray_to_netcdf_netcdf4_classic(measurements, tmp_path):
-    """
-    should be in test_utils.py
-    """
-    ds_clean = ddlpy.dataframe_to_xarray(
-        df=measurements,
-    )
-    file_out = tmp_path / "test.nc"
-    ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC")
-
-
-def test_dataframe_to_xarray_drop_omschrijving(measurements):
-    """
-    in case of non-unique Code/Omschrijving pairs, the Omschrijving variable should be
-    dropped also. The information it contains is added as attrs to the Code value.
-    """
-    # make MeetApparaat non-unique
-    measurements.loc["1953-01-01 02:40:00+01:00", "MeetApparaat.Code"] = "newcode"
-    measurements.loc["1953-01-01 02:40:00+01:00", "MeetApparaat.Omschrijving"] = (
-        "newoms"
-    )
-
-    always_preserve = [
-        "WaarnemingMetadata.Statuswaarde",
-        "WaarnemingMetadata.Kwaliteitswaardecode",
-        "WaardeBepalingsMethode.Code",
-        "Meetwaarde.Waarde_Numeriek",
-    ]
-
-    ds = ddlpy.dataframe_to_xarray(measurements, always_preserve=always_preserve)
-    for varn in ds.data_vars:
-        assert not varn.endswith(".Omschrijving")
-
-    expected_attrs = {"newcode": "newoms", "10272": "other:Vlotterniveaumeter"}
-    assert ds["MeetApparaat.Code"].attrs == expected_attrs
-
-
-def test_code_description_attrs_from_dataframe_prevent_empty(measurements):
-    """
-    should be in test_utils.py
-    https://github.com/Deltares/ddlpy/issues/156
-    """
-    assert "" in measurements["Groepering.Code"].unique()
-    attr_dict = ddlpy.utils.code_description_attrs_from_dataframe(measurements)
-    for attr_key_value_pairs in attr_dict.values():
-        assert "" not in attr_key_value_pairs.keys()
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 67e1a59..f0fe5e1 100755
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -4,29 +4,190 @@
 """Tests for `utils` package."""
 
 from ddlpy.utils import date_series
-import datetime
+import datetime as dt
+import ddlpy
+import pytest
 
 
 def test_date_series():
     """Sample pytest test function with the pytest fixture as an argument."""
     # from bs4 import BeautifulSoup
     # assert 'GitHub' in BeautifulSoup(response.content).title.string
-    start = datetime.datetime(2018, 1, 1)
-    end = datetime.datetime(2018, 3, 1)
+    start = dt.datetime(2018, 1, 1)
+    end = dt.datetime(2018, 3, 1)
     result = date_series(start, end)
     expected = [
-        (datetime.datetime(2018, 1, 1, 0, 0), datetime.datetime(2018, 2, 1, 0, 0)),
-        (datetime.datetime(2018, 2, 1, 0, 0), datetime.datetime(2018, 3, 1, 0, 0)),
+        (dt.datetime(2018, 1, 1, 0, 0), dt.datetime(2018, 2, 1, 0, 0)),
+        (dt.datetime(2018, 2, 1, 0, 0), dt.datetime(2018, 3, 1, 0, 0)),
     ]
     assert result == expected
 
-    start = datetime.datetime(2017, 11, 15)
-    end = datetime.datetime(2018, 3, 5)
+    start = dt.datetime(2017, 11, 15)
+    end = dt.datetime(2018, 3, 5)
     result = date_series(start, end)
     expected = [
-        (datetime.datetime(2017, 11, 15, 0, 0), datetime.datetime(2017, 12, 15, 0, 0)),
-        (datetime.datetime(2017, 12, 15, 0, 0), datetime.datetime(2018, 1, 15, 0, 0)),
-        (datetime.datetime(2018, 1, 15, 0, 0), datetime.datetime(2018, 2, 15, 0, 0)),
-        (datetime.datetime(2018, 2, 15, 0, 0), datetime.datetime(2018, 3, 5, 0, 0)),
+        (dt.datetime(2017, 11, 15, 0, 0), dt.datetime(2017, 12, 15, 0, 0)),
+        (dt.datetime(2017, 12, 15, 0, 0), dt.datetime(2018, 1, 15, 0, 0)),
+        (dt.datetime(2018, 1, 15, 0, 0), dt.datetime(2018, 2, 15, 0, 0)),
+        (dt.datetime(2018, 2, 15, 0, 0), dt.datetime(2018, 3, 5, 0, 0)),
     ]
     assert result == expected
+
+
+def test_simplify_dataframe(measurements):
+    assert len(measurements.columns) == 48
+    meas_simple = ddlpy.simplify_dataframe(measurements)
+    assert hasattr(meas_simple, "attrs")
+    # TODO: the below should be 47 and 1, but there are still RIKZ_WAT instances in
+    # OpdrachtgevendeInstantie column, which is different from RIKZMON_WAT
+    # this also probably partly causes the 96 duplicated timestamps
+    # https://github.com/Rijkswaterstaat/WaterWebservices/issues/16
+    assert len(meas_simple.attrs) == 46
+    assert len(meas_simple.columns) == 2
+    expected_columns = [
+        "WaarnemingMetadata.OpdrachtgevendeInstantie",
+        "Meetwaarde.Waarde_Numeriek",
+    ]
+    assert set(meas_simple.columns) == set(expected_columns)
+
+
+def test_simplify_dataframe_always_preserve(measurements):
+    assert len(measurements.columns) == 48
+    always_preserve = [
+        "WaarnemingMetadata.Statuswaarde",
+        "WaarnemingMetadata.OpdrachtgevendeInstantie",
+        "WaarnemingMetadata.Kwaliteitswaardecode",
+        "Groepering.Code",
+        "BemonsteringsApparaat.Code",
+        "Meetwaarde.Waarde_Numeriek",
+    ]
+    meas_simple = ddlpy.simplify_dataframe(
+        measurements, always_preserve=always_preserve
+    )
+    assert hasattr(meas_simple, "attrs")
+    assert len(meas_simple.attrs) == 42
+    assert len(meas_simple.columns) == 6
+    expected_columns = [
+        "WaarnemingMetadata.Statuswaarde",
+        "WaarnemingMetadata.OpdrachtgevendeInstantie",
+        "WaarnemingMetadata.Kwaliteitswaardecode",
+        "Groepering.Code",
+        "BemonsteringsApparaat.Code",
+        "Meetwaarde.Waarde_Numeriek",
+    ]
+    assert set(meas_simple.columns) == set(expected_columns)
+
+
+def test_simplify_dataframe_always_preserve_invalid_key(measurements):
+    assert len(measurements.columns) == 48
+    always_preserve = ["invalid_key"]
+    with pytest.raises(ValueError) as e:
+        _ = ddlpy.simplify_dataframe(measurements, always_preserve=always_preserve)
+    assert "column 'invalid_key' not present in dataframe" in str(e.value)
+
+
+def test_simplify_dataframe_alfanumeriek_with_nan_dropped(locations):
+    bool_grootheid = locations["Grootheid.Code"] == "WATHTE"
+    bool_groepering = locations["Groepering.Code"] == ""
+    bool_procestype = locations["ProcesType"] == "meting"
+    location = locations[bool_grootheid & bool_groepering & bool_procestype].loc["a12"]
+
+    start_date = dt.datetime(2009, 1, 1)
+    end_date = dt.datetime(2009, 4, 1)
+    measurements = ddlpy.measurements(
+        location, start_date=start_date, end_date=end_date
+    )
+    meas_simple = ddlpy.simplify_dataframe(df=measurements)
+    expected_columns = [
+        "WaarnemingMetadata.Kwaliteitswaardecode",
+        "Meetwaarde.Waarde_Numeriek",
+    ]
+    assert set(meas_simple.columns) == set(expected_columns)
+
+
+def test_dataframe_to_xarray(measurements):
+    always_preserve = [
+        "WaarnemingMetadata.Statuswaarde",
+        "WaarnemingMetadata.Kwaliteitswaardecode",
+        "MeetApparaat.Code",
+        "WaardeBepalingsMethode.Code",
+        "Meetwaarde.Waarde_Numeriek",
+    ]
+    ds_clean = ddlpy.dataframe_to_xarray(
+        df=measurements,
+        always_preserve=always_preserve,
+    )
+
+    non_constant_columns = [
+        "WaarnemingMetadata.OpdrachtgevendeInstantie",
+        "Meetwaarde.Waarde_Numeriek",
+    ]
+
+    preserved = always_preserve + non_constant_columns
+
+    for varname in measurements.columns:
+        # check if all varnames in always_preserve and non-constant columns are indeed preserved as variables
+        if varname in preserved:
+            assert varname in ds_clean.data_vars
+            assert varname not in ds_clean.attrs.keys()
+        else:
+            assert varname not in ds_clean.data_vars
+            assert varname in ds_clean.attrs.keys()
+            varname_oms = varname.replace(".Code", ".Omschrijving")
+            assert varname_oms in ds_clean.attrs.keys()
+
+    # check if times and timezone are correct
+    refdate_utc = measurements.tz_convert(None).index[0]
+    ds_firsttime = ds_clean.time.to_pandas().iloc[0]
+    assert refdate_utc == ds_firsttime
+    assert ds_firsttime.tz is None
+
+
+def test_dataframe_to_xarray_drop_omschrijving(measurements):
+    """
+    in case of non-unique Code/Omschrijving pairs, the Omschrijving variable should be
+    dropped also. The information it contains is added as attrs to the Code value.
+    """
+    # make MeetApparaat non-unique
+    measurements.loc["1953-01-01 02:40:00+01:00", "MeetApparaat.Code"] = "newcode"
+    measurements.loc["1953-01-01 02:40:00+01:00", "MeetApparaat.Omschrijving"] = (
+        "newoms"
+    )
+
+    always_preserve = [
+        "WaarnemingMetadata.Statuswaarde",
+        "WaarnemingMetadata.Kwaliteitswaardecode",
+        "WaardeBepalingsMethode.Code",
+        "Meetwaarde.Waarde_Numeriek",
+    ]
+
+    ds = ddlpy.dataframe_to_xarray(measurements, always_preserve=always_preserve)
+    for varn in ds.data_vars:
+        assert not varn.endswith(".Omschrijving")
+
+    expected_attrs = {"newcode": "newoms", "10272": "other:Vlotterniveaumeter"}
+    assert ds["MeetApparaat.Code"].attrs == expected_attrs
+
+
+def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path):
+    ds_clean = ddlpy.dataframe_to_xarray(
+        df=measurements,
+    )
+    file_out = tmp_path / "test_default.nc"
+    ds_clean.to_netcdf(file_out, engine=None)
+    file_out = tmp_path / "test_h5netcdf.nc"
+    ds_clean.to_netcdf(file_out, engine="h5netcdf")
+    file_out = tmp_path / "test_netcdf4.nc"
+    ds_clean.to_netcdf(file_out, engine="netcdf4")
+    file_out = tmp_path / "test_netcdf4_classic.nc"
+    ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC")
+
+
+def test_code_description_attrs_from_dataframe_prevent_empty(measurements):
+    """
+    https://github.com/Deltares/ddlpy/issues/156
+    """
+    assert "" in measurements["Groepering.Code"].unique()
+    attr_dict = ddlpy.utils.code_description_attrs_from_dataframe(measurements)
+    for attr_key_value_pairs in attr_dict.values():
+        assert "" not in attr_key_value_pairs.keys()

From 6297fe293fdcea87175702b5cd954a15ec88400c Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Wed, 22 Apr 2026 10:07:29 +0200
Subject: [PATCH 09/14] black

---
 pyproject.toml      | 2 +-
 tests/conftest.py   | 1 +
 tests/test_ddlpy.py | 2 --
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b36c3f1..9db54c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,7 +49,7 @@ Issues = "https://github.com/deltares/ddlpy/issues"
 [project.optional-dependencies]
 dev = [
 	"bump2version>=0.5.11",
-	"flake8",
+	"black",
 	"pytest>=3.8.2",
 	"pytest-cov",
 	"twine",
diff --git a/tests/conftest.py b/tests/conftest.py
index f24de56..bc8f8aa 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,6 +4,7 @@
 
 @author: veenstra
 """
+
 import pytest
 import ddlpy
 import datetime as dt
diff --git a/tests/test_ddlpy.py b/tests/test_ddlpy.py
index cd0fe86..56ae2a8 100755
--- a/tests/test_ddlpy.py
+++ b/tests/test_ddlpy.py
@@ -20,7 +20,6 @@
 }
 
 
-
 def test_send_post_request_errors_wrongapi():
     url = "https://ddapi20-waterwebservices.rijkswaterstaat.nl/ONLINEWAARNEMINGENSERVICES/OphalenCatalogus"
     with pytest.raises(IOError) as e:
@@ -574,4 +573,3 @@ def test_check_convert_wrongorder():
     # assert output
     with pytest.raises(ValueError):
         _, _ = ddlpy.ddlpy._check_convert_dates(end_date, start_date)
-

From 99942ace3903f6b889ea5ad6f859ac90d404b2a2 Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Wed, 22 Apr 2026 11:18:38 +0200
Subject: [PATCH 10/14] session scoped fixtures

---
 tests/conftest.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index bc8f8aa..6ad95ac 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,7 +10,7 @@
 import datetime as dt
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def endpoints():
     """
     Get the endpoints from the api
@@ -19,14 +19,14 @@ def endpoints():
     return endpoints
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def locations():
     """return all locations"""
     locations = ddlpy.locations()
     return locations
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def location(locations):
     """return sample location"""
     bool_grootheid = locations["Grootheid.Code"] == "WATHTE"
@@ -38,7 +38,7 @@ def location(locations):
     return location
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def measurements(location):
     """measurements for a location"""
     start_date = dt.datetime(1953, 1, 1)

From cff0bdf33bd58a569bcc5c67d7df61c95af39cb0 Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Wed, 22 Apr 2026 11:26:33 +0200
Subject: [PATCH 11/14] update comment

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9db54c3..94b457c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -88,7 +88,7 @@ testpaths = ["tests"]
 addopts = "--durations=0"
 filterwarnings = [
 	"error",
-	# temporarily ignore numpy incompatibility: https://github.com/Deltares/dfm_tools/issues/1243
+	# temporarily ignore numpy incompatibility: https://github.com/Deltares/ddlpy/issues/192
 	# this warning broke github tests for python 3.11, 3.12 and 3.13
 	"ignore:numpy.ndarray size changed:RuntimeWarning",
 	]

From 9b1e735ebbca63c42ccfc40d4cf49fc57f4d7857 Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Wed, 22 Apr 2026 11:26:53 +0200
Subject: [PATCH 12/14] minimize dependencies

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 94b457c..c9d11d2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,7 +73,6 @@ examples = [
 netcdf = [
 	"xarray",
 	"h5netcdf",
-	"h5py",
 	"netcdf4",
 ]
 

From 15425999acc95845f62136b9f05834e7fdb417b9 Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Wed, 22 Apr 2026 11:38:24 +0200
Subject: [PATCH 13/14] dependencies

---
 pyproject.toml      |  2 +-
 tests/test_utils.py | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c9d11d2..b7ee1be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,7 +72,7 @@ examples = [
 ]
 netcdf = [
 	"xarray",
-	"h5netcdf",
+	"h5netcdf[h5py]",
 	"netcdf4",
 ]
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f0fe5e1..cabbb2b 100755
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -169,18 +169,18 @@ def test_dataframe_to_xarray_drop_omschrijving(measurements):
     assert ds["MeetApparaat.Code"].attrs == expected_attrs
 
 
-def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path):
+@pytest.mark.parametrize("engine", [None, "h5netcdf", "netcdf4", "netcdf4_classic"])
+def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path, engine):
     ds_clean = ddlpy.dataframe_to_xarray(
         df=measurements,
     )
-    file_out = tmp_path / "test_default.nc"
-    ds_clean.to_netcdf(file_out, engine=None)
-    file_out = tmp_path / "test_h5netcdf.nc"
-    ds_clean.to_netcdf(file_out, engine="h5netcdf")
-    file_out = tmp_path / "test_netcdf4.nc"
-    ds_clean.to_netcdf(file_out, engine="netcdf4")
-    file_out = tmp_path / "test_netcdf4_classic.nc"
-    ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC")
+    
+    file_out = tmp_path / f"test_{engine}.nc"
+    
+    if engine == "netcdf4_classic":
+        ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC")
+    else:
+        ds_clean.to_netcdf(file_out, engine=engine)
 
 
 def test_code_description_attrs_from_dataframe_prevent_empty(measurements):

From 6a1680472719fbe685b74a9887212213c91a64a6 Mon Sep 17 00:00:00 2001
From: veenstrajelmer <veenstrajelmer@gmail.com>
Date: Wed, 22 Apr 2026 11:39:54 +0200
Subject: [PATCH 14/14] black

---
 tests/test_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index cabbb2b..73c3af6 100755
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -174,9 +174,9 @@ def test_dataframe_to_xarray_to_netcdf(measurements, tmp_path, engine):
     ds_clean = ddlpy.dataframe_to_xarray(
         df=measurements,
     )
-    
+
     file_out = tmp_path / f"test_{engine}.nc"
-    
+
     if engine == "netcdf4_classic":
         ds_clean.to_netcdf(file_out, engine="netcdf4", format="NETCDF4_CLASSIC")
     else: