From 3cf510f6d20e7f22d8d77db13e377eb22e929996 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Sun, 12 Oct 2025 13:23:54 +0200 Subject: [PATCH 01/26] add missing dependencies needed for notebook exec --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 6605425..3d64a6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,11 @@ dependencies = [ "s3fs>=2025.9.0", "jinja2>=3.1.6", "zarr>=3.1.3", + "matplotlib>=3.10.7", + "loguru>=0.7.3", + "scipy>=1.16.2", + "cartopy>=0.25.0", + "cdsapi>=0.7.7", ] requires-python = ">=3.12" readme = "README.md" From 6cc0c8f1e1632fd33b20235c1402ec75dcb2eb73 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Sun, 12 Oct 2025 13:25:19 +0200 Subject: [PATCH 02/26] add missing functions --- danra-book/notebooks/distributions.ipynb | 7 +- danra-book/scripts/utils.py | 138 +++++++++++++++++++++++ 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 danra-book/scripts/utils.py diff --git a/danra-book/notebooks/distributions.ipynb b/danra-book/notebooks/distributions.ipynb index c4900b5..60d179a 100644 --- a/danra-book/notebooks/distributions.ipynb +++ b/danra-book/notebooks/distributions.ipynb @@ -17,9 +17,14 @@ "metadata": {}, "outputs": [], "source": [ + "import sys\n", + "\n", "import xarray as xr\n", "import matplotlib.pyplot as plt\n", - "import numpy as np" + "import numpy as np\n", + "\n", + "sys.path.append(\"../scripts\")\n", + "from utils import sel_nearest_to_latlon_pt" ] }, { diff --git a/danra-book/scripts/utils.py b/danra-book/scripts/utils.py new file mode 100644 index 0000000..d67d685 --- /dev/null +++ b/danra-book/scripts/utils.py @@ -0,0 +1,138 @@ +from loguru import logger +import scipy.spatial +import pickle +import tempfile +from pathlib import Path + +LOOKUP_INFO = {} + +def sel_nearest_to_latlon_pt(ds, pt, wrap_lon=True): + """ + For a gridded Harmonie dataset (ds), find the nearest grid point to a given + lat/lon point (pt) and return a selection of the dataset with only + that grid point. + + NOTE: this function internally uses a KDTree for fast nearest neighbour + lookup, this tree is cached based on the suite_name of the dataset. The + distance is computed as Euclidean distance in the lat/lon space and so is + not the actual distance on the earth's surface. + + Parameters + ---------- + ds : xarray.Dataset + Harmonie dataset + pt : dict + Dictionary with keys "lat" and "lon" specifying the lat/lon point to + find the nearest grid point to. + wrap_lon : bool, optional + Whether to wrap the longitude values to the domain bounds (default: True) + """ + identifier = ds.attrs.get("suite_name") + if identifier is None: + raise ValueError("dataset must have a suite_name attribute") + + if not isinstance(pt, dict): + raise TypeError("pt_latlon must be a dict with keys 'lat' and 'lon'") + + lon, lat = pt["lon"], pt["lat"] + + @cache_to_pickle(identifier) + def build_tree(): + logger.info(f"Building nearest-point to lat/lon lookup tree for {identifier}") + values = list(zip(ds.lon.values.flatten(), ds.lat.values.flatten())) + tree_kdtree = scipy.spatial.cKDTree(values) + latlon_bounds = dict( + lat=(ds.lat.min().values, ds.lat.max().values), + lon=(ds.lon.min().values, ds.lon.max().values), + ) + return dict(tree=tree_kdtree, latlon_bounds=latlon_bounds) + + if identifier in LOOKUP_INFO: + lookup_info = LOOKUP_INFO[identifier] + else: + lookup_info = build_tree() + LOOKUP_INFO[identifier] = lookup_info + + lookup_tree = lookup_info["tree"] + latlon_bounds = lookup_info["latlon_bounds"] + + if wrap_lon: + lon_wrapped, applied_lon_offset = _wrap_lon(lon, latlon_bounds["lon"]) + else: + # check that the longitude is within the bounds + if lon < latlon_bounds["lon"][0] or lon > latlon_bounds["lon"][1]: + raise ValueError( + f"Longitude value ({lon}) is not within the domain bounds ({latlon_bounds['lon']})" + ) + + # check that the latitude is within the bounds + if lat < latlon_bounds["lat"][0] or lat > latlon_bounds["lat"][1]: + raise ValueError( + f"Latitude value ({lat}) is not within the domain bounds ({latlon_bounds['lat']})" + ) + + _, idx_nearest = lookup_tree.query((lon_wrapped, lat)) + nx, ny = ds.lon.shape + i, j = idx_nearest % ny, idx_nearest // ny + + ds_point = ds.isel(x=i, y=j) + + if wrap_lon: + ds_point["lon"] = ds_point["lon"] - applied_lon_offset + + return ds_point + +def cache_to_pickle(identifier): + """ + This function can be used as a decorator to cache the return value of a + function to a pickle file. The function must return a pickleable object. + """ + + def decorator(func): + fp_pickle = Path(tempfile.gettempdir()) / f"{identifier}.pkl" + + def wrapper(*args, **kwargs): + if fp_pickle.exists(): + with open(fp_pickle, "rb") as f: + return pickle.load(f) + else: + result = func(*args, **kwargs) + fp_pickle.parent.mkdir(parents=True, exist_ok=True) + with open(fp_pickle, "wb") as f: + pickle.dump(result, f) + return result + + return wrapper + + return decorator + + +def _wrap_lon(lon, lon_bounds): + """ + Attempt to wrap longitude values to the given bounds. + + Parameters + ---------- + lon : float + Longitude value to wrap + lon_bounds : tuple + Tuple of (min, max) longitude values + """ + applied_rotation = 0.0 + exception_text = ( + f"The provided longitude value ({lon}) cannot be placed within the " + f"bounds ({lon_bounds}), even after applying a 360deg wrapping." + ) + while lon < lon_bounds[0] or lon > lon_bounds[1]: + if lon < lon_bounds[0]: + if applied_rotation < 0: + raise ValueError(exception_text) + lon += 360 + applied_rotation += 360 + elif lon > lon_bounds[1]: + lon -= 360 + if applied_rotation > 0: + raise ValueError(exception_text) + applied_rotation -= 360 + + return lon, applied_rotation From f8a67a936fe183569ecc174d56252ea26d4b444c Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Sun, 12 Oct 2025 14:02:49 +0200 Subject: [PATCH 03/26] fix download to temp file --- danra-book/notebooks/paper-figures.ipynb | 420 +++++++++++++++++------ 1 file changed, 308 insertions(+), 112 deletions(-) diff --git a/danra-book/notebooks/paper-figures.ipynb b/danra-book/notebooks/paper-figures.ipynb index db15664..5d2e2d3 100644 --- a/danra-book/notebooks/paper-figures.ipynb +++ b/danra-book/notebooks/paper-figures.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "c7be3011", "metadata": {}, "outputs": [], @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "178dc4c8", "metadata": {}, "outputs": [], @@ -64,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "0fc065fd", "metadata": {}, "outputs": [ @@ -86,39 +86,86 @@ "\n", "\n", "\n", - "
<xarray.Dataset> Size: 10TB\n",
        "Dimensions:           (time: 96768, y: 589, x: 789)\n",
        "Coordinates:\n",
-       "    lat               (y, x) float64 4MB dask.array<chunksize=(295, 263), meta=np.ndarray>\n",
-       "    lon               (y, x) float64 4MB dask.array<chunksize=(295, 263), meta=np.ndarray>\n",
        "  * time              (time) datetime64[ns] 774kB 1990-09-01 ... 2023-10-13T2...\n",
-       "  * x                 (x) float64 6kB -1.999e+06 -1.997e+06 ... -2.925e+04\n",
        "  * y                 (y) float64 5kB -6.095e+05 -6.07e+05 ... 8.605e+05\n",
+       "  * x                 (x) float64 6kB -1.999e+06 -1.997e+06 ... -2.925e+04\n",
+       "    lat               (y, x) float64 4MB dask.array<chunksize=(295, 263), meta=np.ndarray>\n",
+       "    lon               (y, x) float64 4MB dask.array<chunksize=(295, 263), meta=np.ndarray>\n",
        "Data variables: (12/31)\n",
        "    cape_column       (time, y, x) float64 360GB dask.array<chunksize=(256, 295, 263), meta=np.ndarray>\n",
        "    cb_column         (time, y, x) float64 360GB dask.array<chunksize=(256, 295, 263), meta=np.ndarray>\n",
@@ -468,7 +576,12 @@
        "    contact:      Leif Denby <lcd@dmi.dk>, Danish Meteorological Institute\n",
        "    description:  All prognostic variables for 1990-09-01T00:00Z to 2024-01-0...\n",
        "    license:      CC-BY-4.0: https://creativecommons.org/licenses/by/4.0/\n",
-       "    suite_name:   danra