From 7521faae74130504ec4eda8832939768d02d01cc Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 12 Dec 2025 09:54:11 +0100 Subject: [PATCH 01/23] wip --- src/pycmor/core/cmorizer.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/pycmor/core/cmorizer.py b/src/pycmor/core/cmorizer.py index b47cef54..19ee240d 100644 --- a/src/pycmor/core/cmorizer.py +++ b/src/pycmor/core/cmorizer.py @@ -490,6 +490,21 @@ def _rules_expand_drvs(self): for rule in self.rules: num_drvs = len(rule.data_request_variables) logger.debug(f"Rule '{rule.name}' has {num_drvs} data_request_variables") + + # Handle zero DRVs - this is always an error + if len(rule.data_request_variables) == 0: + if self.cmor_version == "CMIP7": + raise ValueError( + f"Rule '{rule.name}' with compound_name='{getattr(rule, 'compound_name', 'NOT SET')}' " + f"did not match any variables in the CMIP7 data request" + ) + else: + # CMIP6 + raise ValueError( + f"Rule '{rule.name}' with cmor_variable='{getattr(rule, 'cmor_variable', 'NOT SET')}' " + f"did not match any variables in the data request" + ) + if len(rule.data_request_variables) == 1: new_rules.append(rule) else: From 80223e2c1672c5d25d234879ddad1756a7e770ea Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 12 Dec 2025 10:01:57 +0100 Subject: [PATCH 02/23] wip for cli debugger --- src/pycmor/cli.py | 154 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/src/pycmor/cli.py b/src/pycmor/cli.py index dd99bc2c..c3c8d7bb 100644 --- a/src/pycmor/cli.py +++ b/src/pycmor/cli.py @@ -315,6 +315,160 @@ def populate_cache(files: List): fc.save() +################################################################################ +################################################################################ +################################################################################ + +################################################################################ +# CMIP7 Testing Commands +################################################################################ + + +@cli.command() +@click_loguru.init_logger() +@click.argument("compound_name", type=click.STRING) +@click.option( + "--version", + "-v", + default="v1.2.2.2", + help="CMIP7 data request version to test against", + show_default=True, +) +@click.option( + "--metadata-file", + "-m", + type=click.Path(exists=True), + help="Path to local metadata JSON file (optional)", +) +@click.option( + "--show-all-variants", + "-a", + is_flag=True, + help="Show all variants of the variable if found", +) +def cmip7_name_test(compound_name, version, metadata_file, show_all_variants): + """ + Test a CMIP7 compound name against the data request. + + Checks if the given compound name exists in the CMIP7 data request + and displays metadata information. + + Example compound name format: realm.variable.branding.frequency.region + Example: atmos.tas.tavg-h2m-hxy-u.mon.GLB + """ + from rich.console import Console + from rich.panel import Panel + from rich.table import Table + + from .data_request.cmip7_interface import CMIP7Interface + + console = Console() + + try: + # Initialize interface + console.print("[bold]Loading CMIP7 Data Request...[/bold]") + interface = CMIP7Interface() + interface.load_metadata(version=version, metadata_file=metadata_file) + console.print(f"[green]✓[/green] Loaded metadata for version: {version}\n") + + # Try to find the compound name + console.print(f"[bold]Testing compound name:[/bold] {compound_name}\n") + metadata = interface.get_variable_metadata(compound_name) + + if metadata: + # Found it! + console.print(Panel("[bold green]✓ Compound name FOUND in data request[/bold green]", border_style="green")) + + # Display metadata in a table + table = Table(title="Variable Metadata", show_header=True, header_style="bold magenta") + table.add_column("Property", style="cyan", no_wrap=True) + table.add_column("Value", style="white") + + # Key properties to display + display_props = [ + "variable_id", + "standard_name", + "long_name", + "units", + "frequency", + "modeling_realm", + "cmip6_compound_name", + "cell_methods", + "cell_measures", + ] + + for prop in display_props: + if prop in metadata: + value = str(metadata[prop]) + # Truncate very long values + if len(value) > 80: + value = value[:77] + "..." + table.add_row(prop, value) + + console.print(table) + + # Show all variants if requested + if show_all_variants: + parts = compound_name.split(".") + if len(parts) == 5: + realm, variable, branding, frequency, region = parts + console.print(f"\n[bold]Finding all variants of variable '{variable}' in realm '{realm}'...[/bold]") + variants = interface.find_variable_variants(variable, realm=realm) + + if len(variants) > 1: + console.print(f"Found {len(variants)} total variants:\n") + for var in variants: + console.print(f" • {var['cmip7_compound_name']}") + else: + console.print("No other variants found.") + + else: + # Not found + console.print(Panel("[bold red]✗ Compound name NOT FOUND in data request[/bold red]", border_style="red")) + + # Try to provide helpful information + parts = compound_name.split(".") + if len(parts) != 5: + console.print( + f"\n[yellow]Warning:[/yellow] Compound name should have 5 parts " + f"(realm.variable.branding.frequency.region), but got {len(parts)} parts." + ) + else: + realm, variable, branding, frequency, region = parts + console.print("\n[bold]Searching for similar variables...[/bold]") + + # Try to find variants of this variable + variants = interface.find_variable_variants(variable, realm=realm) + if variants: + console.print(f"\nFound {len(variants)} variant(s) of '{variable}' in realm '{realm}':") + for var in variants: + console.print(f" • {var['cmip7_compound_name']}") + console.print("\n[yellow]Hint:[/yellow] Check if one of these matches what you're looking for.") + else: + console.print(f"\n[yellow]No variants found for variable '{variable}' in realm '{realm}'.[/yellow]") + console.print("\n[yellow]Suggestions:[/yellow]") + console.print(" 1. Check spelling of variable name") + console.print(" 2. Verify the realm is correct") + console.print(" 3. Use 'pycmor table-explorer' to browse available variables") + + return 0 + + except ImportError as e: + console.print( + Panel( + "[bold red]Error: CMIP7 Data Request API not installed[/bold red]\n\n" + f"{str(e)}\n\n" + "Install with: pip install CMIP7-data-request-api", + border_style="red", + ) + ) + return 1 + except Exception as e: + console.print(Panel(f"[bold red]Error:[/bold red] {str(e)}", border_style="red")) + logger.exception("Failed to test compound name") + return 1 + + ################################################################################ ################################################################################ ################################################################################ From 0d98352c17ed9da40866e551123610c68314caa9 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 12 Dec 2025 10:19:28 +0100 Subject: [PATCH 03/23] wip --- src/pycmor/core/cmorizer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pycmor/core/cmorizer.py b/src/pycmor/core/cmorizer.py index 19ee240d..ed46c27c 100644 --- a/src/pycmor/core/cmorizer.py +++ b/src/pycmor/core/cmorizer.py @@ -455,6 +455,8 @@ def find_matching_rule(self, data_request_variable: DataRequestVariable) -> Rule matches = [] attr_criteria = [("cmor_variable", "variable_id")] for rule in self.rules: + if getattr(rule, "debug_matching", False): + breakpoint() if all( getattr(rule, r_attr) == getattr(data_request_variable, drv_attr) for (r_attr, drv_attr) in attr_criteria From 8ff2dee8ed93f37f8ba595c2beea3795c32b1e63 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 12 Dec 2025 10:33:04 +0100 Subject: [PATCH 04/23] ... --- src/pycmor/core/cmorizer.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/pycmor/core/cmorizer.py b/src/pycmor/core/cmorizer.py index ed46c27c..fe472baa 100644 --- a/src/pycmor/core/cmorizer.py +++ b/src/pycmor/core/cmorizer.py @@ -457,11 +457,22 @@ def find_matching_rule(self, data_request_variable: DataRequestVariable) -> Rule for rule in self.rules: if getattr(rule, "debug_matching", False): breakpoint() + + compound_name_match_cmip6 = getattr(data_request_variable, "cmip6_compound_name") == getattr( + rule, "compound_name" + ) + compound_name_match_cmip7 = getattr(data_request_variable, "cmip7_compound_name") == getattr( + rule, "compound_name" + ) + compound_name_match = compound_name_match_cmip6 or compound_name_match_cmip7 + if all( getattr(rule, r_attr) == getattr(data_request_variable, drv_attr) for (r_attr, drv_attr) in attr_criteria ): matches.append(rule) + elif compound_name_match: + matches.append(rule) if len(matches) == 0: msg = f"No rule found for {data_request_variable}" if self._pymor_cfg.get("raise_on_no_rule", False): From 89955b2f5e622dc413564a65c4738bd8b2fbc502 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 12 Dec 2025 14:45:18 +0100 Subject: [PATCH 05/23] ... --- src/pycmor/core/cmorizer.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/pycmor/core/cmorizer.py b/src/pycmor/core/cmorizer.py index fe472baa..c41e494a 100644 --- a/src/pycmor/core/cmorizer.py +++ b/src/pycmor/core/cmorizer.py @@ -91,7 +91,7 @@ def __init__( logger.debug("---------------------") logger.debug(yaml.dump(self._general_cfg)) logger.debug("--------------------") - logger.debug("PyCMOR Configuration:") + logger.debug("PyCMOR Configuration") logger.debug("--------------------") # This isn't actually the config, it's the "App" object. Everett is weird about this... pymor_config = PycmorConfig() @@ -254,6 +254,17 @@ def _post_init_create_dask_cluster(self): else: logger.info("No Dask extras specified...") + def _locate_table_dir(self): + from .resource_locator import TableLocator + + user_table_dir = self._general_cfg.get("CMIP_Tables_Dir") + table_version = self._general_cfg.get("CMIP_Tables_version") + + TableLocatorClass = self._get_versioned_class(TableLocator) + locator = TableLocatorClass(version=table_version, user_path=user_table_dir) + table_dir = locator.locate() + return table_dir + def _post_init_create_data_request_tables(self): """ Loads all the tables from table directory using ResourceLocator priority chain. @@ -268,14 +279,7 @@ def _post_init_create_data_request_tables(self): A shortened version of the filename (i.e., ``CMIP6_Omon.json`` -> ``Omon``) is used as the mapping key. The same key format is used in CMIP6_table_id.json """ - from .resource_locator import TableLocator - - user_table_dir = self._general_cfg.get("CMIP_Tables_Dir") - table_version = self._general_cfg.get("CMIP_Tables_version") - - TableLocatorClass = self._get_versioned_class(TableLocator) - locator = TableLocatorClass(version=table_version, user_path=user_table_dir) - table_dir = locator.locate() + table_dir = self._locate_table_dir() if table_dir is None: raise FileNotFoundError( @@ -294,14 +298,7 @@ def _post_init_create_data_request(self): Uses TableLocator with 5-level priority chain to locate tables. """ - from .resource_locator import TableLocator - - user_table_dir = self._general_cfg.get("CMIP_Tables_Dir") - table_version = self._general_cfg.get("CMIP_Tables_version") - - TableLocatorClass = self._get_versioned_class(TableLocator) - locator = TableLocatorClass(version=table_version, user_path=user_table_dir) - table_dir = locator.locate() + table_dir = self._locate_table_dir() DataRequestClass = self._get_versioned_class(DataRequest) self.data_request = DataRequestClass.from_directory(table_dir) @@ -726,6 +723,7 @@ def from_dict(cls, data): logger.debug(f"Loaded {len(instance.pipelines)} pipelines from configuration") instance._post_init_populate_rules_with_tables() instance._post_init_create_data_request() + instance._post_init_create_cmip7_interface() instance._post_init_populate_rules_with_data_request_variables() instance._post_init_populate_rules_with_dimensionless_unit_mappings() instance._post_init_populate_rules_with_aux_files() @@ -764,7 +762,7 @@ def _rule_for_cmor_variable(self, cmor_variable): logger.debug(f"Found {len(matching_rules)} rules to apply for {cmor_variable}") return matching_rules - def check_rules_for_table(self, table_name): + def _check_rules_for_table(self, table_name): missing_variables = [] for cmor_variable in self._cmor_tables[table_name]["variable_entry"]: if self._rule_for_cmor_variable(cmor_variable) == []: @@ -779,7 +777,7 @@ def check_rules_for_table(self, table_name): logger.warning("This CMORizer may be incomplete or badly configured!") logger.warning(f"Missing rules for >> {len(missing_variables)} << variables.") - def check_rules_for_output_dir(self, output_dir): + def _check_rules_for_output_dir(self, output_dir): all_files_in_output_dir = [f for f in Path(output_dir).iterdir()] for rule in self.rules: # Remove files from list when matching a rule From 0513d32aa3f613fd7c47b75d094101b8595ce0ec Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 14:06:25 +0100 Subject: [PATCH 06/23] feat: accessor API with lazy registration, StdLibAccessor, and .process() - Replace auto-import with enable_xarray_accessor() for lazy registration - Add _build_rule() helper for interactive Rule construction - Add StdLibAccessor with tab-completable std_lib steps via ds.pycmor.stdlib - Add .process() method for running full pipelines interactively - Add BaseModelRun ABC in pycmor.tutorial for test infrastructure - Update existing tests to use enable_xarray_accessor() - Add comprehensive test suite in test_accessor_api.py --- src/pycmor/__init__.py | 37 ++- src/pycmor/core/cmorizer.py | 1 + src/pycmor/tutorial/__init__.py | 5 + src/pycmor/tutorial/base_model_run.py | 371 ++++++++++++++++++++++++++ src/pycmor/xarray/__init__.py | 3 +- src/pycmor/xarray/accessor.py | 166 ++++++++++++ tests/unit/test_accessor_api.py | 128 +++++++++ tests/unit/test_accessors.py | 4 +- tests/unit/test_xarray_accessors.py | 4 +- 9 files changed, 711 insertions(+), 8 deletions(-) create mode 100644 src/pycmor/tutorial/__init__.py create mode 100644 src/pycmor/tutorial/base_model_run.py create mode 100644 tests/unit/test_accessor_api.py diff --git a/src/pycmor/__init__.py b/src/pycmor/__init__.py index 4b9fd965..00d17116 100644 --- a/src/pycmor/__init__.py +++ b/src/pycmor/__init__.py @@ -2,11 +2,38 @@ from . import _version -# Import unified accessor to trigger xarray registration -# This makes ds.pycmor.coords, ds.pycmor.dims, and time frequency methods available -from .xarray import accessor # noqa: F401 - __author__ = "Paul Gierz " -__all__ = [] +__all__ = ["enable_xarray_accessor"] __version__ = _version.get_versions()["version"] + +_accessor_registered = False + + +def enable_xarray_accessor(log_level="INFO"): + """Enable the pycmor xarray accessor (ds.pycmor). + + This function lazily registers the pycmor accessor on xarray Dataset + and DataArray objects. It is idempotent -- calling it multiple times + has no additional effect. + + Parameters + ---------- + log_level : str, optional + Logging level for loguru (default: "INFO"). + """ + global _accessor_registered + if _accessor_registered: + return + + from .xarray.accessor import PycmorAccessor, PycmorDataArrayAccessor # noqa: F401 + + # Configure loguru with rich formatting + try: + from loguru import logger + + logger.enable("pycmor") + except ImportError: + pass + + _accessor_registered = True diff --git a/src/pycmor/core/cmorizer.py b/src/pycmor/core/cmorizer.py index c41e494a..4b052ca5 100644 --- a/src/pycmor/core/cmorizer.py +++ b/src/pycmor/core/cmorizer.py @@ -50,6 +50,7 @@ class CMORizer: _SUPPORTED_CMOR_VERSIONS = ("CMIP6", "CMIP7") """tuple : Supported CMOR versions.""" + # [FIXME] I'd like to deprecate the pymor_cfg constructor option, but still keep it around def __init__( self, pymor_cfg=None, diff --git a/src/pycmor/tutorial/__init__.py b/src/pycmor/tutorial/__init__.py new file mode 100644 index 00000000..3279e6d4 --- /dev/null +++ b/src/pycmor/tutorial/__init__.py @@ -0,0 +1,5 @@ +"""Tutorial utilities for pycmor examples and testing.""" + +from .base_model_run import BaseModelRun + +__all__ = ["BaseModelRun"] diff --git a/src/pycmor/tutorial/base_model_run.py b/src/pycmor/tutorial/base_model_run.py new file mode 100644 index 00000000..ac3bf596 --- /dev/null +++ b/src/pycmor/tutorial/base_model_run.py @@ -0,0 +1,371 @@ +"""Base class for model-specific test run fixtures. + +This module provides a base class that standardizes the interface for +model-contributed test fixtures. Each model can inherit from this base +class and implement model-specific logic for: +- Downloading/extracting real data +- Generating stub data +- Accessing mesh files (if applicable) +- Opening datasets with xarray + +The base class handles: +- Registry and manifest path resolution via properties +- Cache directory management +- Routing between real and stub data based on environment/markers +- Lazy-loading of data directories, mesh directories, and datasets +- Common fixture patterns +""" + +import logging +import os +import re +from abc import ABC, abstractmethod +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class BaseModelRun(ABC): + """Base class for model-specific test run fixtures. + + This class provides a standard interface for handling test data + across different climate model runs. Subclasses should implement + model-specific logic for fetching, generating, and opening data. + + A model run represents a specific execution of a climate model with + associated data files, mesh files, and configuration. + + Parameters + ---------- + model_name : str + Name of the model (e.g., 'fesom_uxarray', 'awicm_recom') + fixtures_dir : Path + Path to the model's fixtures directory + use_real : bool, optional + Whether to use real or stub data (default: False) + tmp_path_factory : pytest.TempPathFactory, optional + Pytest fixture for creating temporary directories (required for stub data) + + Attributes + ---------- + model_name : str + Name of the model + fixtures_dir : Path + Path to the fixtures directory + use_real : bool + Whether to use real or stub data + tmp_path_factory : pytest.TempPathFactory + Pytest fixture for creating temporary directories + """ + + def __init__( + self, + model_name: str, + fixtures_dir: Path, + use_real: bool = False, + tmp_path_factory=None, + ): + self.model_name = model_name + self.fixtures_dir = Path(fixtures_dir) + self.use_real = use_real + self.tmp_path_factory = tmp_path_factory + # Lazy-loaded cached values + self._datadir = None + self._meshdir = None + self._ds = None + + @classmethod + def from_module(cls, module_path: str, use_real: bool = False, tmp_path_factory=None): + """Create instance from a module's __file__ path. + + Parameters + ---------- + module_path : str + The __file__ attribute of the calling module + use_real : bool, optional + Whether to use real or stub data + tmp_path_factory : pytest.TempPathFactory, optional + Pytest fixture for creating temporary directories + + Returns + ------- + BaseModelRun + Instance configured for the calling module's model + """ + fixtures_dir = Path(module_path).parent + model_name = fixtures_dir.parent.name + return cls( + model_name=model_name, + fixtures_dir=fixtures_dir, + use_real=use_real, + tmp_path_factory=tmp_path_factory, + ) + + @property + def registry_path(self) -> Path: + """Path to the pooch registry YAML file. + + Generates filename from class name: Fesom2p6ModelRun -> fesom_2p6_registry.yaml + """ + class_name = self.__class__.__name__.replace("ModelRun", "") + # Insert underscore before uppercase letters and before digits starting a version + # but not before digits within a version (e.g., p6 in 2p6) + prefix = re.sub(r"(? Path: + """Path to the stub data manifest YAML file. + + Generates filename from class name: Fesom2p6ModelRun -> fesom_2p6_stub_manifest.yaml + """ + class_name = self.__class__.__name__.replace("ModelRun", "") + # Insert underscore before uppercase letters and before digits starting a version + # but not before digits within a version (e.g., p6 in 2p6) + prefix = re.sub(r"(? Path: + """Get the persistent cache directory for test data. + + Returns + ------- + Path + Cache directory path (usually ~/.cache/pycmor/test_data) + """ + # Default cache directory + cache_home = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")) + cache_dir = Path(cache_home) / "pycmor" / "tutorial_data" + cache_dir.mkdir(parents=True, exist_ok=True) + return cache_dir + + @property + def config_path_cmip6(self) -> Path: + """Path to the CMIP6 configuration file for this model. + + Returns + ------- + Path + Path to config_cmip6.yaml in the model's fixtures directory + """ + return self.fixtures_dir / "config_cmip6.yaml" + + @property + def config_path_cmip7(self) -> Path: + """Path to the CMIP7 configuration file for this model. + + Returns + ------- + Path + Path to config_cmip7.yaml in the model's fixtures directory + """ + return self.fixtures_dir / "config_cmip7.yaml" + + @property + def configs(self) -> dict: + """Available configuration files for this model. + + Returns a dictionary of available config files. External packages + may not provide all config files, so tests should check if the + desired config key exists before using it. + + Returns + ------- + dict + Dictionary with keys like 'cmip6', 'cmip7' mapping to Path objects. + Only includes configs that actually exist. + + Examples + -------- + >>> if "cmip6" in model_run.configs: + ... config_path = model_run.configs["cmip6"] + ... # Use config_path + """ + available_configs = {} + if self.config_path_cmip6.exists(): + available_configs["cmip6"] = self.config_path_cmip6 + if self.config_path_cmip7.exists(): + available_configs["cmip7"] = self.config_path_cmip7 + return available_configs + + @staticmethod + def should_use_real_data(request=None) -> bool: + """Determine whether to use real or stub data. + + Checks: + 1. PYCMOR_USE_REAL_TEST_DATA environment variable + 2. pytest 'real_data' marker (if request provided) + + Parameters + ---------- + request : pytest.FixtureRequest, optional + Pytest request object for checking markers + + Returns + ------- + bool + True if real data should be used, False for stub data + """ + # Check environment variable + use_real = os.getenv("PYCMOR_USE_REAL_TEST_DATA", "").lower() in ("1", "true", "yes") + + # Check pytest marker + if request is not None and hasattr(request, "node"): + if request.node.get_closest_marker("real_data"): + use_real = True + + return use_real + + # Properties for lazy-loaded resources + + @property + def datadir(self) -> Path: + """Lazy-loaded data directory (real or stub). + + Returns + ------- + Path + Path to the data directory + """ + if self._datadir is None: + if self.use_real: + logger.info(f"Using real data for {self.model_name}") + self._datadir = self.fetch_real_datadir() + else: + logger.info(f"Using stub data for {self.model_name}") + if self.tmp_path_factory is None: + raise ValueError("tmp_path_factory required for stub data generation") + stub_dir = self.tmp_path_factory.mktemp(f"{self.model_name}_stub_data") + self._datadir = self.generate_stub_datadir(stub_dir) + return self._datadir + + @property + def meshdir(self) -> Path: + """Lazy-loaded mesh directory (real or stub). + + Returns + ------- + Path + Path to the mesh directory + + Raises + ------ + NotImplementedError + If the model does not implement mesh handling + """ + if self._meshdir is None: + if self.use_real: + logger.info(f"Using real mesh for {self.model_name}") + self._meshdir = self.fetch_real_meshdir() + else: + logger.info(f"Using stub mesh for {self.model_name}") + if self.tmp_path_factory is None: + raise ValueError("tmp_path_factory required for stub mesh generation") + stub_dir = self.tmp_path_factory.mktemp(f"{self.model_name}_stub_mesh") + self._meshdir = self.generate_stub_meshdir(stub_dir) + return self._meshdir + + @property + def ds(self): + """Lazy-loaded xarray dataset. + + Returns + ------- + xr.Dataset + Opened dataset from the data directory + """ + if self._ds is None: + self._ds = self.open_mfdataset() + return self._ds + + # Abstract methods for data handling + + @abstractmethod + def fetch_real_datadir(self) -> Path: + """Download and extract real data using pooch. + + Returns + ------- + Path + Path to the extracted data directory + """ + pass + + @abstractmethod + def generate_stub_datadir(self, stub_dir: Path) -> Path: + """Generate stub data from YAML manifest. + + Parameters + ---------- + stub_dir : Path + Temporary directory for stub data + + Returns + ------- + Path + Path to the stub data directory + """ + pass + + @abstractmethod + def open_mfdataset(self, **kwargs): + """Open xarray dataset from data directory. + + Subclasses should implement this method to define specific + file patterns and dataset opening logic. + + Parameters + ---------- + **kwargs + Additional keyword arguments for xr.open_mfdataset + + Returns + ------- + xr.Dataset + Opened dataset + """ + pass + + # Optional methods for mesh handling (override if model has mesh files) + + def fetch_real_meshdir(self) -> Path: + """Download or clone real mesh files. + + Override this method if the model requires separate mesh files. + Default implementation raises NotImplementedError. + + Returns + ------- + Path + Path to the mesh directory + + Raises + ------ + NotImplementedError + If the model does not implement mesh fetching + """ + raise NotImplementedError(f"Model {self.model_name} does not implement mesh fetching") + + def generate_stub_meshdir(self, stub_dir: Path) -> Path: + """Generate stub mesh files. + + Override this method if the model requires separate mesh files. + Default implementation raises NotImplementedError. + + Parameters + ---------- + stub_dir : Path + Temporary directory for stub mesh + + Returns + ------- + Path + Path to the stub mesh directory + + Raises + ------ + NotImplementedError + If the model does not implement stub mesh generation + """ + raise NotImplementedError(f"Model {self.model_name} does not implement stub mesh generation") diff --git a/src/pycmor/xarray/__init__.py b/src/pycmor/xarray/__init__.py index e8a3d63a..2a28bfab 100644 --- a/src/pycmor/xarray/__init__.py +++ b/src/pycmor/xarray/__init__.py @@ -1,10 +1,11 @@ """xarray integration for pycmor.""" -from .accessor import CoordinateAccessor, DimensionAccessor, PycmorAccessor, PycmorDataArrayAccessor +from .accessor import CoordinateAccessor, DimensionAccessor, PycmorAccessor, PycmorDataArrayAccessor, StdLibAccessor __all__ = [ "PycmorAccessor", "PycmorDataArrayAccessor", "CoordinateAccessor", "DimensionAccessor", + "StdLibAccessor", ] diff --git a/src/pycmor/xarray/accessor.py b/src/pycmor/xarray/accessor.py index a3e0d4e3..82e54fcb 100644 --- a/src/pycmor/xarray/accessor.py +++ b/src/pycmor/xarray/accessor.py @@ -205,6 +205,102 @@ def _lookup_data_request_variable( return None +def _build_rule(**kwargs): + """Build a Rule object from keyword arguments for interactive accessor use. + + Parameters + ---------- + **kwargs + Must include ``cmor_variable``. Optional: ``cmor_version``, ``table``, + ``compound_name``, ``variable_spec``, ``pipeline``, and any additional + attributes to attach to the Rule. + + Returns + ------- + Rule + A fully constructed Rule with ``_pycmor_cfg`` attached and + ``workflow_backend="native"`` on its pipeline. + """ + from ..core.config import PycmorConfigManager + from ..core.pipeline import DefaultPipeline, Pipeline + from ..core.rule import Rule + + cmor_variable = kwargs.pop("cmor_variable", None) + if cmor_variable is None: + raise ValueError("cmor_variable is required") + + cmor_version = kwargs.pop("cmor_version", "CMIP6") + table = kwargs.pop("table", None) + compound_name = kwargs.pop("compound_name", None) + variable_spec = kwargs.pop("variable_spec", None) + pipeline = kwargs.pop("pipeline", None) + + # Resolve data request variable + drv = _lookup_data_request_variable( + table=table, + variable=cmor_variable if table else None, + compound_name=compound_name, + variable_spec=variable_spec, + cmor_version=cmor_version, + ) + data_request_variables = [drv] if drv is not None else [] + + # Build pipeline with native backend + if pipeline is None: + pipeline = DefaultPipeline(workflow_backend="native") + elif isinstance(pipeline, Pipeline): + pipeline._workflow_backend = "native" + + rule = Rule( + cmor_variable=cmor_variable, + data_request_variables=data_request_variables, + pipelines=[pipeline], + **kwargs, + ) + + # Attach config manager so std_lib steps can call rule._pycmor_cfg("key") + rule._pycmor_cfg = PycmorConfigManager.from_pycmor_cfg() + + return rule + + +class StdLibAccessor: + """Accessor providing tab-completable access to pycmor.std_lib steps. + + Access via: ``ds.pycmor.stdlib.(**kwargs)`` + + Each step is called as ``step_fn(self._obj, rule)`` where the rule is + built from the provided kwargs via ``_build_rule()``. + """ + + def __init__(self, xarray_obj): + self._obj = xarray_obj + + def __dir__(self): + from .. import std_lib + + return [name for name in std_lib.__all__ if not name.startswith("_")] + + def __getattr__(self, name): + if name.startswith("_"): + raise AttributeError(name) + + from .. import std_lib + + if name not in std_lib.__all__: + raise AttributeError(f"pycmor.std_lib has no step named '{name}'") + + step_fn = getattr(std_lib, name) + + def bound_step(**kwargs): + rule = _build_rule(**kwargs) + return step_fn(self._obj, rule) + + bound_step.__name__ = name + bound_step.__doc__ = step_fn.__doc__ + return bound_step + + class CoordinateAccessor: """ Accessor for coordinate attribute operations. @@ -732,6 +828,7 @@ def __init__(self, xarray_obj): self._obj = xarray_obj self._coords_accessor = None self._dims_accessor = None + self._stdlib_accessor = None self._timefreq = None @property @@ -782,6 +879,75 @@ def dims(self) -> DimensionAccessor: self._dims_accessor = DimensionAccessor(self._obj) return self._dims_accessor + @property + def stdlib(self) -> StdLibAccessor: + """Access pycmor standard library steps interactively. + + Returns + ------- + StdLibAccessor + Accessor with tab-completable std_lib steps. + + Examples + -------- + .. code-block:: python + + result = ds.pycmor.stdlib.convert_units(cmor_variable="tas") + """ + if self._stdlib_accessor is None: + self._stdlib_accessor = StdLibAccessor(self._obj) + return self._stdlib_accessor + + def process( + self, + *, + cmor_variable: str, + cmor_version: str = "CMIP6", + table: Optional[str] = None, + compound_name: Optional[str] = None, + variable_spec: Optional[str] = None, + pipeline=None, + **kwargs, + ): + """Run a full processing pipeline on this dataset/array. + + Parameters + ---------- + cmor_variable : str + The CMOR variable name (required). + cmor_version : str, optional + CMIP version, default ``"CMIP6"``. + table : str, optional + CMIP6 table name. + compound_name : str, optional + CMIP7 compound name. + variable_spec : str, optional + Auto-detect format string. + pipeline : Pipeline, optional + Custom pipeline. Defaults to ``DefaultPipeline``. + **kwargs + Additional attributes passed to the Rule. + + Returns + ------- + Dataset or DataArray + Processed data. + """ + all_kwargs = { + "cmor_variable": cmor_variable, + "cmor_version": cmor_version, + "table": table, + "compound_name": compound_name, + "variable_spec": variable_spec, + "pipeline": pipeline, + } + all_kwargs.update(kwargs) + # Remove None values so _build_rule uses its own defaults + all_kwargs = {k: v for k, v in all_kwargs.items() if v is not None} + + rule = _build_rule(**all_kwargs) + return rule.pipelines[0].run(self._obj, rule) + # Time frequency methods - delegate to DatasetFrequencyAccessor def resample_safe(self, *args, **kwargs): """Resample dataset safely with temporal resolution validation. diff --git a/tests/unit/test_accessor_api.py b/tests/unit/test_accessor_api.py new file mode 100644 index 00000000..cf3f1b35 --- /dev/null +++ b/tests/unit/test_accessor_api.py @@ -0,0 +1,128 @@ +"""Tests for the accessor API: lazy registration, StdLibAccessor, process(), and _build_rule().""" + +import pytest +import xarray as xr + +import pycmor +from pycmor.xarray.accessor import _build_rule + + +class TestLazyRegistration: + """Test lazy accessor registration via enable_xarray_accessor().""" + + def test_flag_exists(self): + """The _accessor_registered flag should exist on the pycmor module.""" + assert hasattr(pycmor, "_accessor_registered") + + def test_idempotent(self): + """Calling enable_xarray_accessor() multiple times is safe.""" + pycmor.enable_xarray_accessor() + assert pycmor._accessor_registered is True + # Call again -- should not raise + pycmor.enable_xarray_accessor() + assert pycmor._accessor_registered is True + + def test_accessor_available_after_enable(self): + """After enable, ds.pycmor should be accessible.""" + pycmor.enable_xarray_accessor() + ds = xr.Dataset({"x": (["t"], [1.0])}) + assert hasattr(ds, "pycmor") + + +class TestStdLibAccessor: + """Test the StdLibAccessor exposed via ds.pycmor.stdlib.""" + + @pytest.fixture(autouse=True) + def _enable(self): + pycmor.enable_xarray_accessor() + + def test_dir_lists_steps(self): + """dir(ds.pycmor.stdlib) should list std_lib __all__ entries.""" + ds = xr.Dataset({"tas": (["time"], [1.0, 2.0])}) + stdlib_dir = dir(ds.pycmor.stdlib) + assert "convert_units" in stdlib_dir + assert "load_data" in stdlib_dir + assert "set_global_attributes" in stdlib_dir + + def test_unknown_step_raises(self): + """Accessing a non-existent step should raise AttributeError.""" + ds = xr.Dataset({"tas": (["time"], [1.0])}) + with pytest.raises(AttributeError, match="no step named"): + ds.pycmor.stdlib.definitely_not_a_real_step + + def test_missing_cmor_variable_raises(self): + """Calling a step without cmor_variable should raise ValueError.""" + ds = xr.Dataset({"tas": (["time"], [1.0])}) + step = ds.pycmor.stdlib.show_data + with pytest.raises(ValueError, match="cmor_variable is required"): + step() + + def test_dunder_raises_attribute_error(self): + """Accessing dunder attributes should not recurse into std_lib.""" + ds = xr.Dataset({"tas": (["time"], [1.0])}) + with pytest.raises(AttributeError): + ds.pycmor.stdlib.__nonexistent__ + + +class TestProcessMethod: + """Test the .process() method on PycmorAccessor.""" + + @pytest.fixture(autouse=True) + def _enable(self): + pycmor.enable_xarray_accessor() + + def test_method_exists(self): + """ds.pycmor.process should be a callable.""" + ds = xr.Dataset({"tas": (["time"], [1.0, 2.0])}) + assert hasattr(ds.pycmor, "process") + assert callable(ds.pycmor.process) + + def test_requires_cmor_variable(self): + """process() without cmor_variable should raise TypeError (missing kwarg).""" + ds = xr.Dataset({"tas": (["time"], [1.0])}) + with pytest.raises(TypeError): + ds.pycmor.process() + + def test_process_on_dataarray(self): + """process() should also be available on DataArrays.""" + da = xr.DataArray([1.0, 2.0], dims=["time"], name="tas") + assert hasattr(da.pycmor, "process") + assert callable(da.pycmor.process) + + +class TestBuildRule: + """Test the _build_rule() helper function.""" + + def test_basic_construction(self): + """_build_rule should return a Rule with the given cmor_variable.""" + rule = _build_rule(cmor_variable="tas") + assert rule.cmor_variable == "tas" + + def test_pycmor_cfg_attached(self): + """The rule should have _pycmor_cfg attached for std_lib steps.""" + rule = _build_rule(cmor_variable="tas") + assert hasattr(rule, "_pycmor_cfg") + # Should be callable (ConfigManager) + assert callable(rule._pycmor_cfg) + + def test_native_backend(self): + """The pipeline should use native workflow backend.""" + rule = _build_rule(cmor_variable="tas") + assert len(rule.pipelines) == 1 + assert rule.pipelines[0]._workflow_backend == "native" + + def test_extra_kwargs_become_attrs(self): + """Additional kwargs should be set as attributes on the Rule.""" + rule = _build_rule(cmor_variable="tas", model_variable="temp", experiment_id="historical") + assert rule.model_variable == "temp" + assert rule.experiment_id == "historical" + + def test_missing_cmor_variable_raises(self): + """_build_rule without cmor_variable should raise ValueError.""" + with pytest.raises(ValueError, match="cmor_variable is required"): + _build_rule() + + def test_data_request_variables_empty_without_table(self): + """Without table info, data_request_variables should be empty.""" + rule = _build_rule(cmor_variable="tas") + assert rule.data_request_variables == [] or rule.data_request_variables == [None] diff --git a/tests/unit/test_accessors.py b/tests/unit/test_accessors.py index 52754b9a..ad045543 100644 --- a/tests/unit/test_accessors.py +++ b/tests/unit/test_accessors.py @@ -10,9 +10,11 @@ import pytest import xarray as xr -# Import pycmor to register all accessors +# Import pycmor and enable accessor registration import pycmor # noqa: F401 +pycmor.enable_xarray_accessor() + @pytest.fixture def regular_monthly_time(): diff --git a/tests/unit/test_xarray_accessors.py b/tests/unit/test_xarray_accessors.py index 96c71301..14cd452c 100644 --- a/tests/unit/test_xarray_accessors.py +++ b/tests/unit/test_xarray_accessors.py @@ -6,9 +6,11 @@ import pytest import xarray as xr -# Import pycmor to register accessors +# Import pycmor and enable accessor registration import pycmor # noqa: F401 +pycmor.enable_xarray_accessor() + class TestAccessorRegistration: """Test that accessors are properly registered.""" From 0c78b9a1fb09d0aab36655f155481932c951e505 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 14:55:44 +0100 Subject: [PATCH 07/23] style: fix trailing whitespace and isort --- doc/infer_freq.rst | 2 +- doc/netcdf_chunking.rst | 20 ++++++++++---------- tests/integration/test_yaml_validation.py | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/infer_freq.rst b/doc/infer_freq.rst index 9791d89b..d0f26528 100644 --- a/doc/infer_freq.rst +++ b/doc/infer_freq.rst @@ -188,7 +188,7 @@ Accessor Methods ~~~~~~~~~~~~~~~~ Time frequency functionality is available through xarray accessors. For comprehensive -documentation of all accessor methods, including both specialized (``timefreq``) and +documentation of all accessor methods, including both specialized (``timefreq``) and unified (``pymor``) accessors, see: .. seealso:: diff --git a/doc/netcdf_chunking.rst b/doc/netcdf_chunking.rst index 1db76a32..984bea39 100644 --- a/doc/netcdf_chunking.rst +++ b/doc/netcdf_chunking.rst @@ -23,7 +23,7 @@ Configuration Options Global Configuration via Inherit Block --------------------------------------- -The recommended way to configure chunking is through the ``inherit`` block in your pycmor configuration file. +The recommended way to configure chunking is through the ``inherit`` block in your pycmor configuration file. Settings in the ``inherit`` block are automatically passed down to all rules, making them available as rule attributes: .. code-block:: yaml @@ -31,33 +31,33 @@ Settings in the ``inherit`` block are automatically passed down to all rules, ma general: cmor_version: "CMIP6" CMIP_Tables_Dir: ./cmip6-cmor-tables/Tables/ - + pycmor: warn_on_no_rule: False - + # Chunking configuration that applies to all rules inherit: # Enable/disable chunking netcdf_enable_chunking: yes - + # Chunking algorithm: simple, even_divisor, or iterative netcdf_chunk_algorithm: simple - + # Target chunk size (can be specified as bytes or string like '100MB') netcdf_chunk_size: 100MB - + # Tolerance for chunk size matching (0.0-1.0, used by even_divisor and iterative) netcdf_chunk_tolerance: 0.5 - + # Prefer chunking along time dimension netcdf_chunk_prefer_time: yes - + # Compression level (1-9, higher = better compression but slower) netcdf_compression_level: 4 - + # Enable zlib compression netcdf_enable_compression: yes - + rules: - model_variable: temp cmor_variable: tas diff --git a/tests/integration/test_yaml_validation.py b/tests/integration/test_yaml_validation.py index a76d93ca..4ee75d78 100644 --- a/tests/integration/test_yaml_validation.py +++ b/tests/integration/test_yaml_validation.py @@ -3,7 +3,7 @@ import pytest import yaml -from pycmor.core.validate import GENERAL_VALIDATOR, RULES_VALIDATOR, RuleSectionValidator, RULES_SCHEMA +from pycmor.core.validate import GENERAL_VALIDATOR, RULES_SCHEMA, RULES_VALIDATOR, RuleSectionValidator @pytest.fixture From 492ca1c99daaa9a7cebe5481275edbdcc5f21359 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 15:28:06 +0100 Subject: [PATCH 08/23] fix: update pi_uxarray download URL (old Nextcloud share expired) --- tests/fixtures/example_data/pi_uxarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fixtures/example_data/pi_uxarray.py b/tests/fixtures/example_data/pi_uxarray.py index 3e69b6d2..3938ad50 100644 --- a/tests/fixtures/example_data/pi_uxarray.py +++ b/tests/fixtures/example_data/pi_uxarray.py @@ -11,7 +11,7 @@ from tests.fixtures.stub_generator import generate_stub_files -URL = "https://nextcloud.awi.de/s/swqyFgbL2jjgjRo/download/pi_uxarray.tar" +URL = "https://nextcloud.awi.de/s/o2YQy2i9BR97Rge/download/pi_uxarray.tar" """str : URL to download the example data from.""" MESH_GIT_REPO = "https://gitlab.awi.de/fesom/pi" From 3b2af742e03bdc74226d860d487241ef693b358a Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 15:39:58 +0100 Subject: [PATCH 09/23] fix: add compound_name to CMIP7 test configs and setuptools to Docker - Add required compound_name field to all CMIP7 test config rules (validator requires it for cmor_version=CMIP7) - Add setuptools to Dockerfile.test (pyfesom2 imports pkg_resources) --- Dockerfile.test | 1 + tests/configs/test_config_cmip7.yaml | 3 +++ tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml | 1 + tests/configs/test_config_pi_uxarray_cmip7.yaml | 1 + 4 files changed, 6 insertions(+) diff --git a/Dockerfile.test b/Dockerfile.test index 5ebf2dc9..ce600bb0 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -40,6 +40,7 @@ RUN micromamba install -y -n base -c conda-forge \ h5netcdf \ h5py \ pip \ + setuptools \ && micromamba clean --all --yes # Activate the base environment for subsequent commands diff --git a/tests/configs/test_config_cmip7.yaml b/tests/configs/test_config_cmip7.yaml index c7eec4c9..53d25eb4 100644 --- a/tests/configs/test_config_cmip7.yaml +++ b/tests/configs/test_config_cmip7.yaml @@ -31,6 +31,7 @@ rules: enabled: true description: "This is a test rule" cmor_variable: "tas" + compound_name: "atmos.tas.mean.mon.gn" input_type: "xr.DataArray" input_source: "xr_tutorial" output_directory: . @@ -50,6 +51,7 @@ rules: - path: "/a/b/c" pattern: ".*" cmor_variable: "so" + compound_name: "ocean.so.mean.mon.gn" output_directory: . variant_label: r1i1p1f1 experiment_id: piControl @@ -58,6 +60,7 @@ rules: grid_label: gn - name: test_rule4 cmor_variable: "thetao" + compound_name: "ocean.thetao.mean.mon.gn" pipelines: ["sleeper_pipeline"] output_directory: . variant_label: r1i1p1f1 diff --git a/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml b/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml index 1af34e94..80a42601 100644 --- a/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml +++ b/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml @@ -24,6 +24,7 @@ rules: - path: "REPLACE_ME/outdata/fesom" pattern: "temp.fesom..*.nc" cmor_variable: "thetao" + compound_name: "ocean.thetao.mean.mon.gn" model_variable: "temp" sort_dimensions_missing_dims: "warn" model_dim: diff --git a/tests/configs/test_config_pi_uxarray_cmip7.yaml b/tests/configs/test_config_pi_uxarray_cmip7.yaml index c94297c4..31a3a256 100644 --- a/tests/configs/test_config_pi_uxarray_cmip7.yaml +++ b/tests/configs/test_config_pi_uxarray_cmip7.yaml @@ -21,6 +21,7 @@ rules: - path: "REPLACE_ME" pattern: "temp.fesom..*.nc" cmor_variable: "thetao" + compound_name: "ocean.thetao.mean.mon.gn" model_variable: "temp" model_component: ocean grid_label: gn From fa75da0ce788513f0bb09a2a52deaae66e2aaf8b Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 15:58:41 +0100 Subject: [PATCH 10/23] fix: use CMIP6-style compound names (Table.variable) in CMIP7 test configs --- tests/configs/test_config_awicm_1p0_recom_cmip7.yaml | 2 +- tests/configs/test_config_cmip7.yaml | 6 +++--- tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml | 2 +- tests/configs/test_config_pi_uxarray_cmip7.yaml | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/configs/test_config_awicm_1p0_recom_cmip7.yaml b/tests/configs/test_config_awicm_1p0_recom_cmip7.yaml index a5bdf212..b6d8d501 100644 --- a/tests/configs/test_config_awicm_1p0_recom_cmip7.yaml +++ b/tests/configs/test_config_awicm_1p0_recom_cmip7.yaml @@ -24,7 +24,7 @@ rules: grid_label: gn variant_label: "r1i1p1f1" model_component: "ocean" - compound_name: "ocean.thetao.mean.mon.gn" + compound_name: "Omon.thetao" inputs: - path: "REPLACE_ME/outdata/fesom" pattern: "thetao.fesom..*.nc" diff --git a/tests/configs/test_config_cmip7.yaml b/tests/configs/test_config_cmip7.yaml index 53d25eb4..1c17e79f 100644 --- a/tests/configs/test_config_cmip7.yaml +++ b/tests/configs/test_config_cmip7.yaml @@ -31,7 +31,7 @@ rules: enabled: true description: "This is a test rule" cmor_variable: "tas" - compound_name: "atmos.tas.mean.mon.gn" + compound_name: "Amon.tas" input_type: "xr.DataArray" input_source: "xr_tutorial" output_directory: . @@ -51,7 +51,7 @@ rules: - path: "/a/b/c" pattern: ".*" cmor_variable: "so" - compound_name: "ocean.so.mean.mon.gn" + compound_name: "Omon.so" output_directory: . variant_label: r1i1p1f1 experiment_id: piControl @@ -60,7 +60,7 @@ rules: grid_label: gn - name: test_rule4 cmor_variable: "thetao" - compound_name: "ocean.thetao.mean.mon.gn" + compound_name: "Omon.thetao" pipelines: ["sleeper_pipeline"] output_directory: . variant_label: r1i1p1f1 diff --git a/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml b/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml index 80a42601..f503fc3b 100644 --- a/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml +++ b/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml @@ -24,7 +24,7 @@ rules: - path: "REPLACE_ME/outdata/fesom" pattern: "temp.fesom..*.nc" cmor_variable: "thetao" - compound_name: "ocean.thetao.mean.mon.gn" + compound_name: "Omon.thetao" model_variable: "temp" sort_dimensions_missing_dims: "warn" model_dim: diff --git a/tests/configs/test_config_pi_uxarray_cmip7.yaml b/tests/configs/test_config_pi_uxarray_cmip7.yaml index 31a3a256..ccdf248b 100644 --- a/tests/configs/test_config_pi_uxarray_cmip7.yaml +++ b/tests/configs/test_config_pi_uxarray_cmip7.yaml @@ -21,7 +21,7 @@ rules: - path: "REPLACE_ME" pattern: "temp.fesom..*.nc" cmor_variable: "thetao" - compound_name: "ocean.thetao.mean.mon.gn" + compound_name: "Omon.thetao" model_variable: "temp" model_component: ocean grid_label: gn From 0ea98cfe9d2b2425b77647d8702248ff1fbf8ade Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 16:07:03 +0100 Subject: [PATCH 11/23] fix: guard pyfesom2 imports in tests to avoid collection errors --- tests/unit/test_aux_files.py | 6 +++++- tests/unit/test_fesom.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_aux_files.py b/tests/unit/test_aux_files.py index 0b55fc43..040c386b 100644 --- a/tests/unit/test_aux_files.py +++ b/tests/unit/test_aux_files.py @@ -1,10 +1,14 @@ import os import pytest -from pyfesom2.load_mesh_data import fesom_mesh from pycmor.core.aux_files import attach_files_to_rule +try: + from pyfesom2.load_mesh_data import fesom_mesh +except ImportError: + fesom_mesh = None + def test_aux_files_attach_without_aux(pi_uxarray_temp_rule): rule = pi_uxarray_temp_rule diff --git a/tests/unit/test_fesom.py b/tests/unit/test_fesom.py index a3f7ad25..db9b96da 100644 --- a/tests/unit/test_fesom.py +++ b/tests/unit/test_fesom.py @@ -4,7 +4,11 @@ import xarray as xr import pycmor -import pycmor.fesom_2p1.regridding + +try: + import pycmor.fesom_2p1.regridding +except ImportError: + pass @pytest.mark.skipif( From 66cf054a9fa78a5b62081d72ba9498f288ec2578 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 16:13:52 +0100 Subject: [PATCH 12/23] fix: guard pyfesom2 import in regridding.py for environments without pkg_resources --- src/pycmor/fesom_2p1/regridding.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pycmor/fesom_2p1/regridding.py b/src/pycmor/fesom_2p1/regridding.py index 0b134c4b..17aab7df 100644 --- a/src/pycmor/fesom_2p1/regridding.py +++ b/src/pycmor/fesom_2p1/regridding.py @@ -6,7 +6,11 @@ import scipy import scipy.spatial.qhull as qhull import xarray as xr -from pyfesom2.load_mesh_data import load_mesh + +try: + from pyfesom2.load_mesh_data import load_mesh +except ImportError: + load_mesh = None from scipy.interpolate import CloughTocher2DInterpolator, LinearNDInterpolator from scipy.spatial import cKDTree From 11f5bbecb5a875fc076030bc3fd448685a9ad110 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 16:16:29 +0100 Subject: [PATCH 13/23] fix: skip pyfesom2-dependent tests when pyfesom2 is not importable --- tests/unit/test_aux_files.py | 1 + tests/unit/test_fesom.py | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_aux_files.py b/tests/unit/test_aux_files.py index 040c386b..7eee899c 100644 --- a/tests/unit/test_aux_files.py +++ b/tests/unit/test_aux_files.py @@ -32,6 +32,7 @@ def test_aux_files_attach_simple_file(pi_uxarray_temp_rule, tmp_path): assert rule.aux == {"aux1": "Hello, pytest!"} +@pytest.mark.skipif(fesom_mesh is None, reason="pyfesom2 not available (pkg_resources missing)") @pytest.mark.skipif( not os.getenv("PYCMOR_USE_REAL_TEST_DATA"), reason="FESOM mesh loading requires real mesh data (set PYCMOR_USE_REAL_TEST_DATA=1)", diff --git a/tests/unit/test_fesom.py b/tests/unit/test_fesom.py index db9b96da..577352da 100644 --- a/tests/unit/test_fesom.py +++ b/tests/unit/test_fesom.py @@ -7,10 +7,16 @@ try: import pycmor.fesom_2p1.regridding + + _has_pyfesom2 = True except ImportError: - pass + _has_pyfesom2 = False + + +_skip_no_pyfesom2 = pytest.mark.skipif(not _has_pyfesom2, reason="pyfesom2 not available (pkg_resources missing)") +@_skip_no_pyfesom2 @pytest.mark.skipif( not os.getenv("PYCMOR_USE_REAL_TEST_DATA"), reason="FESOM regridding requires real mesh data (set PYCMOR_USE_REAL_TEST_DATA=1)", @@ -28,6 +34,7 @@ def test_regridding(fesom_pi_mesh_config, fesom_2p6_pimesh_esm_tools_data, pi_ux assert da.shape == (3, 360, 180) +@_skip_no_pyfesom2 @pytest.mark.skipif( not os.getenv("PYCMOR_USE_REAL_TEST_DATA"), reason="FESOM mesh attachment requires real mesh data (set PYCMOR_USE_REAL_TEST_DATA=1)", From b405898b753587262732b46992ff4bb3d81badfe Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 16:22:14 +0100 Subject: [PATCH 14/23] fix: CMIP7 compound_name matching when DRV has plain variable_id The vendored all_var_info.json does not populate cmip7_compound_name or cmip6_compound_name on DRVs. So variable_id falls back to the short name (e.g., "tas"). The matching logic compared the full compound name "Amon.tas" against the plain "tas" when only one side had a dot, which always failed. Fix: always extract the short name from compound_name for comparison, regardless of whether the DRV also has dots. Also add a fallback match against drv.name directly. Add CMIP7 DRV fixtures (dr_cmip7_tas, dr_cmip7_thetao) for testing. --- src/pycmor/core/cmorizer.py | 22 +++++++++++++++------- tests/fixtures/data_requests.py | 22 +++++++++++++++++++++- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/pycmor/core/cmorizer.py b/src/pycmor/core/cmorizer.py index 64e2697e..720eaae9 100644 --- a/src/pycmor/core/cmorizer.py +++ b/src/pycmor/core/cmorizer.py @@ -455,17 +455,23 @@ def find_matching_rule(self, data_request_variable: DataRequestVariable) -> Rule if hasattr(rule, "compound_name") and rule.compound_name is not None: rule_value = rule.compound_name drv_value = getattr(data_request_variable, "variable_id") - # For compound name matching, compare directly or extract variable names - if "." in rule_value and "." in str(drv_value): - # Both are compound names, extract variable parts for comparison + drv_name = getattr(data_request_variable, "name", drv_value) + + # Extract the variable short name from the rule's compound name + if "." in rule_value: rule_parts = rule_value.split(".") - drv_parts = str(drv_value).split(".") rule_var = rule_parts[1] if len(rule_parts) >= 2 else rule_value - drv_var = drv_parts[1] if len(drv_parts) >= 2 else drv_value else: - # One or both are not compound names, compare as-is rule_var = rule_value + + # Extract the variable short name from the DRV + if "." in str(drv_value): + drv_parts = str(drv_value).split(".") + drv_var = drv_parts[1] if len(drv_parts) >= 2 else drv_value + else: + # DRV has a plain short name (e.g., "tas") -- compare directly drv_var = drv_value + # Also check full compound name match for CMIP6/CMIP7 compound_name_match_cmip6 = ( getattr(data_request_variable, "cmip6_compound_name", None) == rule.compound_name @@ -473,7 +479,9 @@ def find_matching_rule(self, data_request_variable: DataRequestVariable) -> Rule compound_name_match_cmip7 = ( getattr(data_request_variable, "cmip7_compound_name", None) == rule.compound_name ) - compound_name_match = compound_name_match_cmip6 or compound_name_match_cmip7 + # Also match rule compound_name directly against drv name + compound_name_match_name = drv_name == rule_var + compound_name_match = compound_name_match_cmip6 or compound_name_match_cmip7 or compound_name_match_name else: # Use cmor_variable with compound name extraction logic rule_value = getattr(rule, "cmor_variable") diff --git a/tests/fixtures/data_requests.py b/tests/fixtures/data_requests.py index 19a19fb6..389075ba 100644 --- a/tests/fixtures/data_requests.py +++ b/tests/fixtures/data_requests.py @@ -1,6 +1,6 @@ import pytest -from pycmor.data_request.variable import DataRequestVariable +from pycmor.data_request.variable import CMIP7DataRequestVariable, DataRequestVariable @pytest.fixture @@ -17,3 +17,23 @@ def dr_sos(): cell_methods="area: mean where sea", cell_measures="area: areacello", ) + + +@pytest.fixture +def dr_cmip7_tas(): + """CMIP7 DataRequestVariable for tas (near-surface air temperature). + + Loaded from the vendored all_var_info.json using the CMIP6-style + compound name key 'Amon.tas'. + """ + return CMIP7DataRequestVariable.from_all_var_info_json("Amon.tas") + + +@pytest.fixture +def dr_cmip7_thetao(): + """CMIP7 DataRequestVariable for thetao (sea water potential temperature). + + Loaded from the vendored all_var_info.json using the CMIP6-style + compound name key 'Omon.thetao'. + """ + return CMIP7DataRequestVariable.from_all_var_info_json("Omon.thetao") From ab87d7b328c71fb260dd809d7c6cde6ca704a6c3 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 16:35:39 +0100 Subject: [PATCH 15/23] fix: propagate pipeline/flow errors instead of silently logging them Pipeline._run_prefect() now uses return_state=True and checks for failures, re-raising the original exception. Previously, Prefect swallowed exceptions via on_failure callbacks that only logged. CMORizer._parallel_process_prefect() also checks both the flow-level state and individual rule future states for failures. This ensures integration tests correctly fail when pipeline steps raise exceptions. --- src/pycmor/core/cmorizer.py | 16 +++++++++++++++- src/pycmor/core/pipeline.py | 8 +++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/pycmor/core/cmorizer.py b/src/pycmor/core/cmorizer.py index 720eaae9..ddf0d541 100644 --- a/src/pycmor/core/cmorizer.py +++ b/src/pycmor/core/cmorizer.py @@ -870,7 +870,21 @@ def dynamic_flow(): # We encapsulate the flow in a context manager to ensure that the # Dask cluster is available in the singleton, which could be used # during unpickling to reattach it to a Pipeline. - return dynamic_flow() + result = dynamic_flow(return_state=True) + if result.is_failed(): + exc = result.result(raise_on_failure=False) + if isinstance(exc, BaseException): + raise exc + raise RuntimeError(f"CMORizer parallel processing failed: {exc}") + # Check individual rule results for failures + for future in result.result(): + state = future.state + if state.is_failed(): + exc = state.result(raise_on_failure=False) + if isinstance(exc, BaseException): + raise exc + raise RuntimeError(f"Rule processing failed: {exc}") + return result.result() def _parallel_process_dask(self, external_client=None): if external_client: diff --git a/src/pycmor/core/pipeline.py b/src/pycmor/core/pipeline.py index 5a0eccaf..7ccead98 100644 --- a/src/pycmor/core/pipeline.py +++ b/src/pycmor/core/pipeline.py @@ -144,7 +144,13 @@ def _run_prefect(self, data, rule_spec): def dynamic_flow(data, rule_spec): return self._run_native(data, rule_spec) - return dynamic_flow(data, rule_spec) + result = dynamic_flow(data, rule_spec, return_state=True) + if result.is_failed(): + exc = result.result(raise_on_failure=False) + if isinstance(exc, BaseException): + raise exc + raise RuntimeError(f"Pipeline '{self.name}' failed for rule '{rule_name}': {exc}") + return result.result() @staticmethod @add_to_report_log From 3345d7e77369088cdac6a65f7673a9eb2d8dce39 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 16:37:33 +0100 Subject: [PATCH 16/23] fix: remove raw convert() from DefaultPipeline steps DefaultPipeline had both handle_unit_conversion (correct pipeline step taking data+rule) and units.convert (low-level function taking da+from_unit+to_unit). The latter was called with (data, rule) args, causing ParameterBindError: missing required argument 'to_unit'. handle_unit_conversion already calls convert() internally, so the duplicate step was both wrong and redundant. --- src/pycmor/core/pipeline.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pycmor/core/pipeline.py b/src/pycmor/core/pipeline.py index 7ccead98..9409f915 100644 --- a/src/pycmor/core/pipeline.py +++ b/src/pycmor/core/pipeline.py @@ -267,8 +267,6 @@ class DefaultPipeline(FrozenPipeline): "pycmor.std_lib.add_vertical_bounds", "pycmor.std_lib.timeaverage.timeavg", "pycmor.std_lib.units.handle_unit_conversion", - # "pycmor.std_lib.time.average", - "pycmor.std_lib.units.convert", "pycmor.std_lib.attributes.set_global", "pycmor.std_lib.attributes.set_variable", "pycmor.std_lib.attributes.set_coordinates", From 3c8d3f5e90498f4b6b414ecff6f6befae412525e Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 16:57:00 +0100 Subject: [PATCH 17/23] fix: ConfigurationError from dict default and missing activity_id - dimension_mapping.py: use getattr(rule, "dimension_mapping") instead of rule._pycmor_cfg("dimension_mapping", default={}) -- dimension_mapping is a rule attribute, not a config option, and everett rejects non-string defaults - CMIP7 test configs: add activity_id="CMIP" to rules that need it for global attribute generation - cmorizer.py: fix parallel error checking to handle both PrefectFuture and State objects from different Prefect versions --- src/pycmor/core/cmorizer.py | 24 ++++++++++++------- src/pycmor/std_lib/dimension_mapping.py | 6 ++--- .../test_config_fesom_2p6_pimesh_cmip7.yaml | 1 + .../configs/test_config_pi_uxarray_cmip7.yaml | 1 + 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/pycmor/core/cmorizer.py b/src/pycmor/core/cmorizer.py index ddf0d541..4abfb6c5 100644 --- a/src/pycmor/core/cmorizer.py +++ b/src/pycmor/core/cmorizer.py @@ -877,14 +877,22 @@ def dynamic_flow(): raise exc raise RuntimeError(f"CMORizer parallel processing failed: {exc}") # Check individual rule results for failures - for future in result.result(): - state = future.state - if state.is_failed(): - exc = state.result(raise_on_failure=False) - if isinstance(exc, BaseException): - raise exc - raise RuntimeError(f"Rule processing failed: {exc}") - return result.result() + rule_results = result.result() + for item in rule_results: + # Items may be PrefectFuture or State objects depending on Prefect version + if hasattr(item, "result") and hasattr(item, "is_failed"): + if item.is_failed(): + exc = item.result(raise_on_failure=False) + if isinstance(exc, BaseException): + raise exc + raise RuntimeError(f"Rule processing failed: {exc}") + elif hasattr(item, "state"): + if item.state.is_failed(): + exc = item.state.result(raise_on_failure=False) + if isinstance(exc, BaseException): + raise exc + raise RuntimeError(f"Rule processing failed: {exc}") + return rule_results def _parallel_process_dask(self, external_client=None): if external_client: diff --git a/src/pycmor/std_lib/dimension_mapping.py b/src/pycmor/std_lib/dimension_mapping.py index 561dc8af..7afcaa72 100644 --- a/src/pycmor/std_lib/dimension_mapping.py +++ b/src/pycmor/std_lib/dimension_mapping.py @@ -760,11 +760,11 @@ def map_dimensions(ds: Union[xr.Dataset, xr.DataArray], rule) -> Union[xr.Datase logger.debug("Dimension mapping is disabled") return ds if not was_dataarray else ds[da_name] - # Get user-specified mapping from rule - user_mapping = rule._pycmor_cfg("dimension_mapping", default={}) + # Get user-specified mapping from rule (rule attribute, not config) + user_mapping = getattr(rule, "dimension_mapping", None) or {} # Get allow_override setting - allow_override = rule._pycmor_cfg("dimension_mapping_allow_override", default=True) + allow_override = rule._pycmor_cfg("dimension_mapping_allow_override") # Create mapper mapper = DimensionMapper() diff --git a/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml b/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml index f503fc3b..378d43b7 100644 --- a/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml +++ b/tests/configs/test_config_fesom_2p6_pimesh_cmip7.yaml @@ -14,6 +14,7 @@ general: rules: - name: "temp" experiment_id: "piControl" + activity_id: "CMIP" output_directory: "./output" source_id: "AWI-CM-1-1-HR" institution_id: "AWI" diff --git a/tests/configs/test_config_pi_uxarray_cmip7.yaml b/tests/configs/test_config_pi_uxarray_cmip7.yaml index ccdf248b..815d4efd 100644 --- a/tests/configs/test_config_pi_uxarray_cmip7.yaml +++ b/tests/configs/test_config_pi_uxarray_cmip7.yaml @@ -14,6 +14,7 @@ general: rules: - name: "temp" experiment_id: "piControl" + activity_id: "CMIP" output_directory: "./output" source_id: "AWI-CM-1-1-HR" variant_label: "r1i1p1f1" From 197802ce3c40d24cc3d7f396a133f7b0a4206d72 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 17:33:42 +0100 Subject: [PATCH 18/23] fix: dimension_mapping Mock iteration error and doctest in base_model_run - dimension_mapping.py: check isinstance(user_mapping, dict) to handle Mock objects in tests (getattr on Mock returns Mock, not None) - base_model_run.py: convert doctest example to code-block to prevent pytest from trying to execute it --- src/pycmor/std_lib/dimension_mapping.py | 4 +++- src/pycmor/tutorial/base_model_run.py | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/pycmor/std_lib/dimension_mapping.py b/src/pycmor/std_lib/dimension_mapping.py index 7afcaa72..f2479791 100644 --- a/src/pycmor/std_lib/dimension_mapping.py +++ b/src/pycmor/std_lib/dimension_mapping.py @@ -761,7 +761,9 @@ def map_dimensions(ds: Union[xr.Dataset, xr.DataArray], rule) -> Union[xr.Datase return ds if not was_dataarray else ds[da_name] # Get user-specified mapping from rule (rule attribute, not config) - user_mapping = getattr(rule, "dimension_mapping", None) or {} + user_mapping = getattr(rule, "dimension_mapping", {}) + if not isinstance(user_mapping, dict): + user_mapping = {} # Get allow_override setting allow_override = rule._pycmor_cfg("dimension_mapping_allow_override") diff --git a/src/pycmor/tutorial/base_model_run.py b/src/pycmor/tutorial/base_model_run.py index ac3bf596..68d82c35 100644 --- a/src/pycmor/tutorial/base_model_run.py +++ b/src/pycmor/tutorial/base_model_run.py @@ -178,9 +178,10 @@ def configs(self) -> dict: Examples -------- - >>> if "cmip6" in model_run.configs: - ... config_path = model_run.configs["cmip6"] - ... # Use config_path + .. code-block:: python + + if "cmip6" in model_run.configs: + config_path = model_run.configs["cmip6"] """ available_configs = {} if self.config_path_cmip6.exists(): From ff766ee556ec98488e2163e56e6c8ad7d5f85c11 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Fri, 27 Mar 2026 17:46:03 +0100 Subject: [PATCH 19/23] fix: fall back to _pycmor_cfg for dimension_mapping when rule attr is not a dict --- src/pycmor/std_lib/dimension_mapping.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/pycmor/std_lib/dimension_mapping.py b/src/pycmor/std_lib/dimension_mapping.py index f2479791..fcb62bdd 100644 --- a/src/pycmor/std_lib/dimension_mapping.py +++ b/src/pycmor/std_lib/dimension_mapping.py @@ -760,10 +760,15 @@ def map_dimensions(ds: Union[xr.Dataset, xr.DataArray], rule) -> Union[xr.Datase logger.debug("Dimension mapping is disabled") return ds if not was_dataarray else ds[da_name] - # Get user-specified mapping from rule (rule attribute, not config) - user_mapping = getattr(rule, "dimension_mapping", {}) + # Get user-specified mapping: try rule attribute first, then config + user_mapping = getattr(rule, "dimension_mapping", None) if not isinstance(user_mapping, dict): - user_mapping = {} + try: + user_mapping = rule._pycmor_cfg("dimension_mapping", default="") + if not isinstance(user_mapping, dict): + user_mapping = {} + except Exception: + user_mapping = {} # Get allow_override setting allow_override = rule._pycmor_cfg("dimension_mapping_allow_override") From 51a00e11757d303cb15aa86a90d789eec68e4eed Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Mon, 30 Mar 2026 08:45:42 +0200 Subject: [PATCH 20/23] fix: derive table_id from CMIP6-style compound names (Table.variable) --- src/pycmor/std_lib/global_attributes.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/pycmor/std_lib/global_attributes.py b/src/pycmor/std_lib/global_attributes.py index 2d0e0534..0e44f177 100644 --- a/src/pycmor/std_lib/global_attributes.py +++ b/src/pycmor/std_lib/global_attributes.py @@ -478,11 +478,16 @@ def get_table_id(self): compound_name = self.rule_dict.get("compound_name", None) logger.debug(f"Attempting to derive table_id from compound_name: {compound_name}") if compound_name: - # compound_name format: component.variable.cell_methods.frequency.grid - # Example: ocnBgchem.fgco2.tavg-u-hxy-sea.mon.GLB + # compound_name formats: + # CMIP6-style: Table.variable (e.g., Amon.tas, Omon.thetao) + # CMIP7-style: component.variable.cell_methods.frequency.grid parts = compound_name.split(".") logger.debug(f"compound_name split into {len(parts)} parts: {parts}") - if len(parts) >= 5: + if len(parts) == 2: + # CMIP6-style compound name: table_id is the first part + table_id = parts[0] + logger.debug(f"Derived table_id from CMIP6-style compound_name: {table_id}") + elif len(parts) >= 5: component = parts[0] # e.g., ocnBgchem frequency = parts[3] # e.g., mon From 6ffc348bc5edd0f28357ce7315cc33c6df2af720 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Mon, 30 Mar 2026 08:57:39 +0200 Subject: [PATCH 21/23] fix: filter None values from global attributes before applying to dataset --- src/pycmor/std_lib/global_attributes.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pycmor/std_lib/global_attributes.py b/src/pycmor/std_lib/global_attributes.py index 0e44f177..f7e4f37c 100644 --- a/src/pycmor/std_lib/global_attributes.py +++ b/src/pycmor/std_lib/global_attributes.py @@ -860,5 +860,9 @@ def set_global_attributes(ds, rule): """Set global attributes for the dataset""" if isinstance(ds, xr.DataArray): ds = ds.to_dataset() - ds.attrs.update(rule.ga.global_attributes()) + global_attrs = rule.ga.global_attributes() + # Filter out None values -- xarray accepts them in memory but + # netCDF serialization rejects non-string/non-numeric attributes + global_attrs = {k: v for k, v in global_attrs.items() if v is not None} + ds.attrs.update(global_attrs) return ds From 8f8f7a5d3a1599591c425ed508c1eff882f5e718 Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Mon, 30 Mar 2026 09:17:28 +0200 Subject: [PATCH 22/23] fix: pin sphinx<9 for sphinx_toolbox compatibility --- doc/requirements.txt | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/requirements.txt b/doc/requirements.txt index 2dca8e3d..492bfc30 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,3 +1,4 @@ +sphinx>=7.4.7,<9 sphinx-book-theme sphinx-click sphinx-copybutton diff --git a/pyproject.toml b/pyproject.toml index 8b0ef327..6941a736 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ dev = [ ] doc = [ + "sphinx>=7.4.7,<9", "sphinx-book-theme>=1.1.4", "sphinx-click>=6.0.0", "sphinx-copybutton>=0.5.2", From 130e897a3a8605d5d74a4b8b20374e22af4b4e7a Mon Sep 17 00:00:00 2001 From: Paul Gierz Date: Mon, 30 Mar 2026 09:55:20 +0200 Subject: [PATCH 23/23] feat: rename non-standard time dimension on load (OpenIFS support) Cherry-picked from PR #194 by @mzapponi (adapted for src/pycmor/ paths): - gather_inputs.py: if rule has time_dimname and dataset uses that dimension instead of "time", rename it automatically on load - pipeline.py: defensive getattr for _cluster attribute Co-authored-by: Martina Zapponi --- src/pycmor/core/gather_inputs.py | 4 ++++ src/pycmor/core/pipeline.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pycmor/core/gather_inputs.py b/src/pycmor/core/gather_inputs.py index 22330c24..6975e309 100644 --- a/src/pycmor/core/gather_inputs.py +++ b/src/pycmor/core/gather_inputs.py @@ -291,6 +291,10 @@ def load_mfdataset(data, rule_spec): for f in all_files: logger.info(f" * {f}") mf_ds = xr.open_mfdataset(all_files, parallel=parallel, use_cftime=True, engine=engine) + # Rename non-standard time dimension if specified in rule (e.g., OpenIFS uses different names) + time_dimname = rule_spec.get("time_dimname") + if time_dimname and time_dimname in mf_ds.dims and "time" not in mf_ds.dims: + mf_ds = mf_ds.rename({time_dimname: "time"}) return mf_ds diff --git a/src/pycmor/core/pipeline.py b/src/pycmor/core/pipeline.py index 9409f915..2d835115 100644 --- a/src/pycmor/core/pipeline.py +++ b/src/pycmor/core/pipeline.py @@ -128,7 +128,7 @@ def _run_prefect(self, data, rule_spec): logger.debug("Dynamically creating workflow with DaskTaskRunner...") cmor_name = rule_spec.get("cmor_name") rule_name = rule_spec.get("name", cmor_name) - if self._cluster is None: + if getattr(self, "_cluster", None) is None: logger.warning("No cluster assigned to this pipeline. Using local Dask cluster.") dask_scheduler_address = None else: