From 82aeb02b2496a51ed18a8e8cd135672c022dff67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20K=C3=B8nigslieb?= Date: Wed, 6 May 2026 08:20:59 +0000 Subject: [PATCH 1/3] add subsetting also in data loaders, not just in downlaoder --- sunflow/data_io.py | 11 +++++++++-- sunflow/downloaders.py | 38 +------------------------------------- sunflow/geospatial.py | 24 ++++++++++++++++++++++++ sunflow/main.py | 2 ++ 4 files changed, 36 insertions(+), 39 deletions(-) diff --git a/sunflow/data_io.py b/sunflow/data_io.py index c78a5cb..70973d3 100644 --- a/sunflow/data_io.py +++ b/sunflow/data_io.py @@ -11,6 +11,7 @@ from loguru import logger from .config import NowcastConfig, S3Config +from .geospatial import subset_to_bbox from .validation import DataNotAvailableError @@ -192,6 +193,7 @@ def load_data_from_files( satellite_data_directory: str, data_type: str, filename_format: str, + bbox: str, ) -> xr.Dataset: """Load time steps from files (operational mode). @@ -232,7 +234,8 @@ def load_data_from_files( if not collected: return xr.Dataset() - return xr.concat(collected, dim="time", data_vars=None) + ds = xr.concat(collected, dim="time", data_vars=None) + return subset_to_bbox(ds, bbox) def check_current_data_existence_s3( @@ -281,6 +284,7 @@ def load_data_from_s3( s3_config: S3Config, data_type: str, filename_format: str, + bbox: str, ) -> xr.Dataset: """Load time steps from S3. @@ -325,7 +329,8 @@ def load_data_from_s3( if not collected: return xr.Dataset() - return xr.concat(collected, dim="time", data_vars=None) + ds = xr.concat(collected, dim="time", data_vars=None) + return subset_to_bbox(ds, bbox) def fetch_clearsky_with_fallback( @@ -392,6 +397,7 @@ def fetch_clearsky_with_fallback( nowcast_config.satellite_data_directory, "clearsky data", config["filename_format"], + bbox=bbox, ) case "s3": fetched = load_data_from_s3( @@ -401,6 +407,7 @@ def fetch_clearsky_with_fallback( s3_config, "clearsky data", config["filename_format"], + bbox=bbox, ) if offset > 0: diff --git a/sunflow/downloaders.py b/sunflow/downloaders.py index 13a19f7..331bfcd 100644 --- a/sunflow/downloaders.py +++ b/sunflow/downloaders.py @@ -8,6 +8,7 @@ from loguru import logger from .data_io import generate_input_filename +from .geospatial import subset_to_bbox def download_netcdf_knmi(url: str, api_key: str) -> BytesIO: @@ -66,43 +67,6 @@ def generate_dwd_filename(timestamp: datetime) -> str: return f"SISin{timestamp.strftime('%Y%m%d%H%M')}FDv3.nc" -def subset_to_bbox(ds: xr.Dataset, bbox: str) -> xr.Dataset: - """Subset an xarray Dataset to a geographic bounding box. - - Detects the latitude and longitude coordinate names automatically by - matching against common aliases ('lat', 'latitude', 'y' for - latitude; 'lon', 'longitude', 'x' for longitude). If coordinates - cannot be identified, the original dataset is returned unchanged with - a warning. - - Args: - ds: Input dataset to subset. - bbox: Bounding box string in the format lon_min,lat_min,lon_max,lat_max. - - Returns: - Dataset sliced to the requested bounding box, or the original - dataset if lat/lon coordinates could not be found. - """ - lon_min, lat_min, lon_max, lat_max = map(float, bbox.split(",")) - - lat_coord = lon_coord = None - for coord in ds.coords: - if coord.lower() in ["lat", "latitude", "y"]: - lat_coord = coord - elif coord.lower() in ["lon", "longitude", "x"]: - lon_coord = coord - - if lat_coord is None or lon_coord is None: - logger.warning("Could not find lat/lon coordinates") - return ds - - return ds.sel( - { - lat_coord: slice(lat_min, lat_max), - lon_coord: slice(lon_min, lon_max), - } - ) - def download_current_data( request_time: datetime, diff --git a/sunflow/geospatial.py b/sunflow/geospatial.py index 4cbe385..96d75b8 100644 --- a/sunflow/geospatial.py +++ b/sunflow/geospatial.py @@ -4,10 +4,34 @@ import numpy as np import pvlib import xarray as xr +from loguru import logger from .config import BBOX_OPTIONS +def subset_to_bbox(ds: xr.Dataset, bbox: str) -> xr.Dataset: + """Subset an xarray Dataset to a geographic bounding box.""" + lon_min, lat_min, lon_max, lat_max = map(float, bbox.split(",")) + + lat_coord = lon_coord = None + for coord in ds.coords: + if coord.lower() in ["lat", "latitude", "y"]: + lat_coord = coord + elif coord.lower() in ["lon", "longitude", "x"]: + lon_coord = coord + + if lat_coord is None or lon_coord is None: + logger.warning("Could not find lat/lon coordinates for bbox subsetting") + return ds + + return ds.sel( + { + lat_coord: slice(lat_min, lat_max), + lon_coord: slice(lon_min, lon_max), + } + ) + + def get_bbox(bbox_choice: str, custom_bbox: str | None = None) -> str | None: """Return the bounding box string for a given bbox choice. diff --git a/sunflow/main.py b/sunflow/main.py index e7402b2..b4cfab4 100644 --- a/sunflow/main.py +++ b/sunflow/main.py @@ -242,6 +242,7 @@ def run_nowcast( nowcast_config.satellite_data_directory, "past data", config["filename_format"], + bbox=bbox, ) case "s3": data = load_data_from_s3( @@ -251,6 +252,7 @@ def run_nowcast( s3_config, "past data", config["filename_format"], + bbox=bbox, ) n_loaded = len(data.time) if "time" in data.coords else 0 From 7a5b1cde01d6085758f0708fa48fab5099fe0d49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20K=C3=B8nigslieb?= Date: Thu, 7 May 2026 12:45:20 +0000 Subject: [PATCH 2/3] add changelog, add fix for handling both desc/asc lat/lon coords --- CHANGELOG.md | 2 ++ sunflow/geospatial.py | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1174b3f..2661a64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Subsetting to bounding box is now also done in the `s3` and `files` code paths. +- Subsetting to bounding box now correctly handles both ascending and descending lat/lon coordinates. ## [v1.1.0] diff --git a/sunflow/geospatial.py b/sunflow/geospatial.py index 96d75b8..80712ff 100644 --- a/sunflow/geospatial.py +++ b/sunflow/geospatial.py @@ -24,10 +24,21 @@ def subset_to_bbox(ds: xr.Dataset, bbox: str) -> xr.Dataset: logger.warning("Could not find lat/lon coordinates for bbox subsetting") return ds + # Handle potentially inverted coordinates by detecting order + lat_values = ds[lat_coord].values + lon_values = ds[lon_coord].values + + lat_ascending = lat_values[0] < lat_values[-1] + lon_ascending = lon_values[0] < lon_values[-1] + + # Arrange slice bounds based on coordinate order + lat_slice = slice(lat_min, lat_max) if lat_ascending else slice(lat_max, lat_min) + lon_slice = slice(lon_min, lon_max) if lon_ascending else slice(lon_max, lon_min) + return ds.sel( { - lat_coord: slice(lat_min, lat_max), - lon_coord: slice(lon_min, lon_max), + lat_coord: lat_slice, + lon_coord: lon_slice, } ) From c5020d2640fd733fd5d8c9238d5f6e2ae3fb3360 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joachim=20K=C3=B8nigslieb?= Date: Thu, 7 May 2026 13:40:44 +0000 Subject: [PATCH 3/3] properly run black --- sunflow/downloaders.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sunflow/downloaders.py b/sunflow/downloaders.py index 331bfcd..0867160 100644 --- a/sunflow/downloaders.py +++ b/sunflow/downloaders.py @@ -67,7 +67,6 @@ def generate_dwd_filename(timestamp: datetime) -> str: return f"SISin{timestamp.strftime('%Y%m%d%H%M')}FDv3.nc" - def download_current_data( request_time: datetime, config: dict[str, Any],