diff --git a/fre/__init__.py b/fre/__init__.py index cfe401402..da0fe1308 100644 --- a/fre/__init__.py +++ b/fre/__init__.py @@ -2,12 +2,12 @@ module init file for fre. sets the version attribute, and sets up a fre_logger """ +import logging import os + version = os.getenv("GIT_DESCRIBE_TAG", "2025.04") __version__ = version -import logging - fre_logger = logging.getLogger(__name__) FORMAT = "[%(levelname)5s:%(filename)24s:%(funcName)24s] %(message)s" diff --git a/fre/analysis/freanalysis.py b/fre/analysis/freanalysis.py index 4f100bd9d..1863eae7e 100644 --- a/fre/analysis/freanalysis.py +++ b/fre/analysis/freanalysis.py @@ -1,16 +1,19 @@ ''' fre analysis ''' # a third party package -import click import logging -fre_logger = logging.getLogger(__name__) -## a diff gfdl package -#from analysis_scripts import available_plugins +import click # this package -from .subtools import install_analysis_package, list_plugins, run_analysis, \ - uninstall_analysis_package +from .subtools import ( + install_analysis_package, + list_plugins, + run_analysis, + uninstall_analysis_package +) + +fre_logger = logging.getLogger(__name__) @click.group(help=click.style(" - analysis subcommands", fg=(250, 154, 90))) diff --git a/fre/app/generate_time_averages/combine.py b/fre/app/generate_time_averages/combine.py index 6e17b0f54..db8ffbc92 100644 --- a/fre/app/generate_time_averages/combine.py +++ b/fre/app/generate_time_averages/combine.py @@ -94,7 +94,7 @@ def combine( root_in_dir: str, """ if frequency not in ["yr", "mon"]: raise ValueError(f"Frequency '{frequency}' not recognized or supported") - + if frequency == "yr": frequency_iso = "P1Y" elif frequency == "mon": diff --git a/fre/app/generate_time_averages/generate_time_averages.py b/fre/app/generate_time_averages/generate_time_averages.py index c7e480c56..2d0f0a874 100755 --- a/fre/app/generate_time_averages/generate_time_averages.py +++ b/fre/app/generate_time_averages/generate_time_averages.py @@ -125,6 +125,3 @@ def generate(inf = None, fre_logger.warning('time averaging exited non-zero, exitstatus == %s', exitstatus) else: fre_logger.info('time averaging finished successfully') - - - diff --git a/fre/app/helpers.py b/fre/app/helpers.py index ec8229ee3..5548e3619 100644 --- a/fre/app/helpers.py +++ b/fre/app/helpers.py @@ -1,10 +1,12 @@ +# set up logging +import logging import os +from contextlib import contextmanager from pathlib import Path + import yaml -from contextlib import contextmanager -# set up logging -import logging + fre_logger = logging.getLogger(__name__) def get_variables(yml: dict, pp_comp: str) -> dict: @@ -30,7 +32,9 @@ def get_variables(yml: dict, pp_comp: str) -> dict: for component_info in yml["postprocess"]["components"]: # if component in yaml not an active pp component, skip if component_info.get("type") != pp_comp: - fre_logger.info(f'Component in pp yaml config (%s) does not match active pp component (%s). Skipping component remapping ...', component_info.get("type"), pp_comp) + fre_logger.info( + 'Component in pp yaml config (%s) does not match active pp component (%s). ' + 'Skipping component remapping ...', component_info.get("type"), pp_comp) continue # non-static diff --git a/fre/app/regrid_xy/regrid_xy.py b/fre/app/regrid_xy/regrid_xy.py index 83895b52f..ad6bdd19b 100644 --- a/fre/app/regrid_xy/regrid_xy.py +++ b/fre/app/regrid_xy/regrid_xy.py @@ -361,7 +361,10 @@ def regrid_xy(yamlfile: str, continue # create the output dir - output_subdir = Path(output_dir) / f"{datadict['output_nlat']}_{datadict['output_nlon']}.{datadict['interp_method']}" + output_subdir = ( + Path(output_dir) + / f"{datadict['output_nlat']}_{datadict['output_nlon']}.{datadict['interp_method']}" + ) output_subdir.mkdir(parents=True, exist_ok=True) #construct fregrid command diff --git a/fre/app/regrid_xy/tests/generate_files.py b/fre/app/regrid_xy/tests/generate_files.py index 181d33f50..f7a2b93e9 100644 --- a/fre/app/regrid_xy/tests/generate_files.py +++ b/fre/app/regrid_xy/tests/generate_files.py @@ -1,9 +1,11 @@ -import numpy as np -from pathlib import Path import shutil import tarfile -import yaml +from pathlib import Path + +import numpy as np import xarray as xr +import yaml + nxy = 20 nxyp = nxy + 1 @@ -20,24 +22,25 @@ def cleanup(): - if Path(yamlfile).exists(): - Path(yamlfile).unlink() + if Path(yamlfile).exists(): + Path(yamlfile).unlink() - if Path("grid_spec.nc").exists(): - Path("grid_spec.nc").unlink() + if Path("grid_spec.nc").exists(): + Path("grid_spec.nc").unlink() - if Path(grid_spec_tar).exists(): - Path(grid_spec_tar).unlink() + if Path(grid_spec_tar).exists(): + Path(grid_spec_tar).unlink() - if Path(input_mosaic).exists(): - Path(input_mosaic).unlink() + if Path(input_mosaic).exists(): + Path(input_mosaic).unlink() - if Path(input_dir).exists(): - shutil.rmtree(input_dir) + if Path(input_dir).exists(): + shutil.rmtree(input_dir) - for i in range(1, ntiles+1): - gridfile = Path(f"{input_grid}.tile{i}.nc") - if gridfile.exists(): gridfile.unlink() + for i in range(1, ntiles+1): + gridfile = Path(f"{input_grid}.tile{i}.nc") + if gridfile.exists(): + gridfile.unlink() def set_test(components_in: dict, @@ -50,124 +53,132 @@ def set_test(components_in: dict, input_grid_in: str = None, input_dir_in: str = None): - global components - global nxyp, nxy, ntiles, grid_spec_tar, input_grid - global date, input_mosaic - global input_dir, yamlfile - global tar_list - - components = components_in - if nxy_in is not None: - nxy = nxy_in - nxyp = nxy_in+1 - input_grid = f"C{nxy}" - if ntiles_in is not None: ntiles = ntiles_in - if date_in is not None: date = date_in - if yamlfile_in is not None: yamlfile = yamlfile_in - if grid_spec_tar_in is not None: grid_spec_tar = grid_spec_tar_in - if input_grid_in is not None: input_grid = input_grid_in - if input_mosaic_in is not None: input_mosaic = input_mosaic_in - if input_dir_in is not None: input_dir = input_dir_in - - tar_list = [] + global components + global nxyp, nxy, ntiles, grid_spec_tar, input_grid + global date, input_mosaic + global input_dir, yamlfile + global tar_list + + components = components_in + if nxy_in is not None: + nxy = nxy_in + nxyp = nxy_in+1 + input_grid = f"C{nxy}" + if ntiles_in is not None: + ntiles = ntiles_in + if date_in is not None: + date = date_in + if yamlfile_in is not None: + yamlfile = yamlfile_in + if grid_spec_tar_in is not None: + grid_spec_tar = grid_spec_tar_in + if input_grid_in is not None: + input_grid = input_grid_in + if input_mosaic_in is not None: + input_mosaic = input_mosaic_in + if input_dir_in is not None: + input_dir = input_dir_in + + tar_list = [] def make_yaml(): - ppyaml = {} - ppyaml["name"] = yamlfile + ppyaml = {} + ppyaml["name"] = yamlfile - directories = ppyaml["directories"] = {} - directories["history_dir"] = "./" - directories["pp_dir"] = "./" + directories = ppyaml["directories"] = {} + directories["history_dir"] = "./" + directories["pp_dir"] = "./" - postprocess = ppyaml["postprocess"] = {} - postprocess["settings"] = {"pp_grid_spec": grid_spec_tar} - postprocess["components"] = components + postprocess = ppyaml["postprocess"] = {} + postprocess["settings"] = {"pp_grid_spec": grid_spec_tar} + postprocess["components"] = components - with open(yamlfile, "w") as openedfile: - yaml.dump(ppyaml, openedfile, sort_keys=False) + with open(yamlfile, "w") as openedfile: + yaml.dump(ppyaml, openedfile, sort_keys=False) def make_grid_spec(): - xr.Dataset(data_vars={"atm_mosaic_file": f"{input_mosaic}".encode(), - "lnd_mosaic_file": f"{input_mosaic}".encode(), - "ocn_mosaic_file": "ocean_mosaic.nc".encode()} - ).to_netcdf("grid_spec.nc") + xr.Dataset(data_vars={"atm_mosaic_file": f"{input_mosaic}".encode(), + "lnd_mosaic_file": f"{input_mosaic}".encode(), + "ocn_mosaic_file": "ocean_mosaic.nc".encode()} + ).to_netcdf("grid_spec.nc") - tar_list.append("grid_spec.nc") + tar_list.append("grid_spec.nc") def make_mosaic(): - if ntiles > 1: - gridfiles = [f"{input_grid}.tile{i}.nc".encode() for i in range(1,ntiles+1)] - gridtiles = [f"tile{i}".encode() for i in range(1,ntiles+1)] - else: - gridfiles = f"{input_grid}.nc".encode() - gridtiles = f"tile1".encode() + if ntiles > 1: + gridfiles = [f"{input_grid}.tile{i}.nc".encode() for i in range(1,ntiles+1)] + gridtiles = [f"tile{i}".encode() for i in range(1,ntiles+1)] + else: + gridfiles = f"{input_grid}.nc".encode() + gridtiles = "tile1".encode() - data = dict(gridfiles = xr.DataArray(gridfiles, dims=["ntiles"]).astype("|S255"), - gridtiles = xr.DataArray(gridtiles, dims=["ntiles"]).astype("|S255") - ) + data = dict(gridfiles = xr.DataArray(gridfiles, dims=["ntiles"]).astype("|S255"), + gridtiles = xr.DataArray(gridtiles, dims=["ntiles"]).astype("|S255") + ) - xr.Dataset(data_vars=data).to_netcdf(f"{input_mosaic}") + xr.Dataset(data_vars=data).to_netcdf(f"{input_mosaic}") - tar_list.append(f"{input_mosaic}") + tar_list.append(f"{input_mosaic}") def make_grid(): - xy = np.arange(0, nxyp, 1, dtype=np.float64) - area = np.ones((nxy, nxy), dtype=np.float64) + xy = np.arange(0, nxyp, 1, dtype=np.float64) + area = np.ones((nxy, nxy), dtype=np.float64) - x, y = np.meshgrid(xy, xy) + x, y = np.meshgrid(xy, xy) - data = dict(x = xr.DataArray(x, dims=["nyp", "nxp"]), - y = xr.DataArray(y, dims=["nyp", "nxp"]), - area = xr.DataArray(area, dims=["ny", "nx"]) - ) + data = dict(x = xr.DataArray(x, dims=["nyp", "nxp"]), + y = xr.DataArray(y, dims=["nyp", "nxp"]), + area = xr.DataArray(area, dims=["ny", "nx"]) + ) - for i in range(1, ntiles+1): - data["tile"] = xr.DataArray(f"tile{i}".encode()).astype("|S255") - xr.Dataset(data).to_netcdf(f"{input_grid}.tile{i}.nc") + for i in range(1, ntiles+1): + data["tile"] = xr.DataArray(f"tile{i}".encode()).astype("|S255") + xr.Dataset(data).to_netcdf(f"{input_grid}.tile{i}.nc") - tar_list.append(f"{input_grid}.tile{i}.nc") + tar_list.append(f"{input_grid}.tile{i}.nc") def make_data(): - data = {} - data["mister"] = xr.DataArray(np.full((nxy,nxy), 1.0, dtype=np.float64), dims=["ny", "nx"]) - data["darcy"] = xr.DataArray(np.full((nxy,nxy), 2.0, dtype=np.float64), dims=["ny", "nx"]) - data["wins"] = xr.DataArray(np.full((nxy,nxy), 3.0, dtype=np.float64), dims=["ny", "nx"]) - data["wet_c"] = xr.DataArray(np.full((nxy,nxy), 5.0, dtype=np.float64), dims=["ny", "nx"]) + data = {} + data["mister"] = xr.DataArray(np.full((nxy,nxy), 1.0, dtype=np.float64), dims=["ny", "nx"]) + data["darcy"] = xr.DataArray(np.full((nxy,nxy), 2.0, dtype=np.float64), dims=["ny", "nx"]) + data["wins"] = xr.DataArray(np.full((nxy,nxy), 3.0, dtype=np.float64), dims=["ny", "nx"]) + data["wet_c"] = xr.DataArray(np.full((nxy,nxy), 5.0, dtype=np.float64), dims=["ny", "nx"]) - coords = {"nx": np.arange(1,nxyp, dtype=np.float64), - "ny": np.arange(1,nxyp, dtype=np.float64)} + coords = {"nx": np.arange(1,nxyp, dtype=np.float64), + "ny": np.arange(1,nxyp, dtype=np.float64)} - dataset = xr.Dataset(data_vars=data, coords=coords) + dataset = xr.Dataset(data_vars=data, coords=coords) - for component in components: - for source in component["sources"]: - history_file = source["history_file"] - for i in range(1, ntiles+1): - dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc") - if "static" in component: - for static_source in component["static"]: - history_file = static_source["source"] - for i in range(1, ntiles+1): - dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc") + for component in components: + for source in component["sources"]: + history_file = source["history_file"] + for i in range(1, ntiles+1): + dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc") + if "static" in component: + for static_source in component["static"]: + history_file = static_source["source"] + for i in range(1, ntiles+1): + dataset.to_netcdf(f"{input_dir}/{date}.{history_file}.tile{i}.nc") def make_all(): - make_yaml() - make_grid_spec() - make_mosaic() - make_grid() - make_data() - - with tarfile.open(grid_spec_tar, "w") as tar: - for ifile in tar_list: tar.add(ifile) - - for ifile in tar_list: - Path(ifile).unlink() + make_yaml() + make_grid_spec() + make_mosaic() + make_grid() + make_data() + + with tarfile.open(grid_spec_tar, "w") as tar: + for ifile in tar_list: + tar.add(ifile) + + for ifile in tar_list: + Path(ifile).unlink() diff --git a/fre/app/regrid_xy/tests/test_regrid_xy.py b/fre/app/regrid_xy/tests/test_regrid_xy.py index 992dd3e78..46091a077 100644 --- a/fre/app/regrid_xy/tests/test_regrid_xy.py +++ b/fre/app/regrid_xy/tests/test_regrid_xy.py @@ -36,20 +36,20 @@ {"xyInterp": f"{nxy},{nxy}", "interpMethod": "conserve_order2", "inputRealm": "atmos", - "type": f"pride_and_prejudice", + "type": "pride_and_prejudice", "sources": input_files, "postprocess_on": True}, {"xyInterp": f"{nxy},{nxy}", "interpMethod": "conserve_order2", "inputRealm": "atmos", - "type": f"my_component", + "type": "my_component", "sources": input_files, "static": input_files_static, "postprocess_on": True}, {"xyInterp": f"{nxy},{nxy}", "interpMethod": "conserve_order2", "inputRealm": "atmos", - "type": f"this_comp_is_off", + "type": "this_comp_is_off", "sources": input_files_donotregrid, "postprocess_on": False} ] @@ -84,51 +84,51 @@ def cleanup_test(): @pytest.mark.skipif(not HAVE_FREGRID, reason='fregrid not in env. it was removed from package reqs. you must load it externally') def test_regrid_xy(): - """ - Tests the main function regrid_xy and ensures - data is regridded correctly - """ - - setup_test() - - #modify generate_files to change sources - for source_dict in input_files: - source = source_dict["history_file"] - regrid_xy.regrid_xy(yamlfile=str(yamlfile), - input_dir=str(input_dir), - output_dir=str(output_dir), - work_dir=str(work_dir), - remap_dir=str(remap_dir), - source=source, - input_date=date+"TTTT") - - #check answers - for source_dict in input_files: - # Files are now output to a subdirectory based on grid size and interpolation method - output_subdir = output_dir/f"{nxy}_{nxy}.conserve_order2" - outfile = output_subdir/f"{date}.{source_dict['history_file']}.nc" - - test = xr.load_dataset(outfile) - - assert "wet_c" not in test - assert "mister" in test - assert "darcy" in test - assert "wins" in test - - assert np.all(test["mister"].values==np.float64(1.0)) - assert np.all(test["darcy"].values==np.float64(2.0)) - assert np.all(test["wins"].values==np.float64(3.0)) - - #check answers, these shouldn't have been regridded - for source_dict in input_files_donotregrid: - ifile = source_dict["history_file"] - assert not (output_dir/f"{date}.{ifile}.nc").exists() + """ + Tests the main function regrid_xy and ensures + data is regridded correctly + """ - #check remap_file exists and is not empty - remap_file = remap_dir/f"C{nxy}_mosaicX{nxy}by{nxy}_conserve_order2.nc" - assert remap_file.exists() + setup_test() - cleanup_test() + #modify generate_files to change sources + for source_dict in input_files: + source = source_dict["history_file"] + regrid_xy.regrid_xy(yamlfile=str(yamlfile), + input_dir=str(input_dir), + output_dir=str(output_dir), + work_dir=str(work_dir), + remap_dir=str(remap_dir), + source=source, + input_date=date+"TTTT") + + #check answers + for source_dict in input_files: + # Files are now output to a subdirectory based on grid size and interpolation method + output_subdir = output_dir/f"{nxy}_{nxy}.conserve_order2" + outfile = output_subdir/f"{date}.{source_dict['history_file']}.nc" + + test = xr.load_dataset(outfile) + + assert "wet_c" not in test + assert "mister" in test + assert "darcy" in test + assert "wins" in test + + assert np.all(test["mister"].values==np.float64(1.0)) + assert np.all(test["darcy"].values==np.float64(2.0)) + assert np.all(test["wins"].values==np.float64(3.0)) + + #check answers, these shouldn't have been regridded + for source_dict in input_files_donotregrid: + ifile = source_dict["history_file"] + assert not (output_dir/f"{date}.{ifile}.nc").exists() + + #check remap_file exists and is not empty + remap_file = remap_dir/f"C{nxy}_mosaicX{nxy}by{nxy}_conserve_order2.nc" + assert remap_file.exists() + + cleanup_test() @pytest.mark.skipif(not HAVE_FREGRID, reason='fregrid not in env. it was removed from package reqs. you must load it externally') @@ -172,8 +172,8 @@ def test_regrid_xy_static(): assert remap_file.exists() cleanup_test() - - + + @pytest.mark.skipif(not HAVE_FREGRID, reason='fregrid not in env. it was removed from package reqs. you must load it externally') diff --git a/fre/app/remap_pp_components/remap_pp_components.py b/fre/app/remap_pp_components/remap_pp_components.py index 51b936df8..5bcdc924c 100755 --- a/fre/app/remap_pp_components/remap_pp_components.py +++ b/fre/app/remap_pp_components/remap_pp_components.py @@ -276,7 +276,10 @@ def get_varlist(comp_info: dict, product: str, req_source: str, src_vars: dict) """ if product == "static": if comp_info.get("static") is None: - raise ValueError(f"Product is set to static but no static sources/variables defined for {comp_info.get('type')}") + raise ValueError( + f"Product is set to static but no static sources/variables defined for " + f"{comp_info.get('type')}" + ) ## Dictionary of variables associated with pp component source name ## are retrieved through Jinjafilter get_variables.py @@ -483,7 +486,9 @@ def remap_pp_components(input_dir: str, output_dir: str, begin_date: str, curren chunk = get_chunk(comp_info) ## might have to be a list ... for c in chunk: if c != current_chunk: - fre_logger.warning("Chunk in directory structure is not equal to the current chunk set! Skipping component remapping for %s...", comp) + fre_logger.warning( + "Chunk in directory structure is not equal to the current " + "chunk set! Skipping component remapping for %s...", comp) continue if ens_mem is not None: os.chdir(f"{input_dir}/{g}/{ens_mem}/{s}/{f}/{c}") diff --git a/fre/app/remap_pp_components/tests/test_remap_pp_components.py b/fre/app/remap_pp_components/tests/test_remap_pp_components.py index d723e44e9..bd83a11ad 100644 --- a/fre/app/remap_pp_components/tests/test_remap_pp_components.py +++ b/fre/app/remap_pp_components/tests/test_remap_pp_components.py @@ -374,8 +374,10 @@ def test_nccmp_ncgen_remap_statics(): output_nc_file = f"{comp_name}.bk.nc" nccmp = [ "nccmp", "-d", - Path(f"{REMAP_IN}/{NATIVE_GRID}/atmos_static_scalar/{STATIC_FREQ}/{STATIC_CHUNK}/{STATIC_DATA_NC_FILES[0]}"), - Path(f"{REMAP_OUT}/static/{comp_name}/{STATIC_FREQ}/{STATIC_CHUNK}/{output_nc_file}")] + Path(f"{REMAP_IN}/{NATIVE_GRID}/atmos_static_scalar/" + f"{STATIC_FREQ}/{STATIC_CHUNK}/{STATIC_DATA_NC_FILES[0]}"), + Path(f"{REMAP_OUT}/static/{comp_name}/" + f"{STATIC_FREQ}/{STATIC_CHUNK}/{output_nc_file}")] sp = subprocess.run( nccmp, check = False) assert sp.returncode == 0 @@ -444,8 +446,12 @@ def test_remap_static_variable_filtering(): # Check for # 1. creation of output directory structure, # 2. link to nc file in output location - assert all([Path(f"{remap_static_out}/atmos_scalar_test_vars_CNAME/{STATIC_FREQ}/{STATIC_CHUNK}").exists(), - Path(f"{remap_static_out}/atmos_scalar_test_vars_CNAME/{STATIC_FREQ}/{STATIC_CHUNK}/atmos_scalar_test_vars_CNAME.bk.nc").exists()]) + assert all([ + Path(f"{remap_static_out}/atmos_scalar_test_vars_CNAME/" + f"{STATIC_FREQ}/{STATIC_CHUNK}").exists(), + Path(f"{remap_static_out}/atmos_scalar_test_vars_CNAME/" + f"{STATIC_FREQ}/{STATIC_CHUNK}/atmos_scalar_test_vars_CNAME.bk.nc").exists() + ]) @pytest.mark.xfail def test_remap_variable_filtering_fail(): diff --git a/fre/app/tests/test_helpers.py b/fre/app/tests/test_helpers.py index 6ece8a7e2..227d1be86 100644 --- a/fre/app/tests/test_helpers.py +++ b/fre/app/tests/test_helpers.py @@ -13,7 +13,7 @@ def test_get_variables(): """ Test dictionary output with {source name: [variables]} - from given pp component. + from given pp component. """ # Load the yaml config with open(YAML_EX,'r') as f: @@ -27,7 +27,11 @@ def test_get_variables(): {'atmos_scalar_static_test_vars_fail2': 'all', 'atmos_static_scalar_test_vars_fail': ['bk', 'no_var']}] - components = ["atmos_scalar_test_vars_CNAME", "atmos_scalar_test_vars_fail_CNAME", "atmos_scalar_static_test_vars_fail_CNAME"] + components = [ + "atmos_scalar_test_vars_CNAME", + "atmos_scalar_test_vars_fail_CNAME", + "atmos_scalar_static_test_vars_fail_CNAME" + ] out1 = helpers.get_variables(yml = yml, pp_comp = components[0]) out2 = helpers.get_variables(yml = yml, pp_comp = components[1]) @@ -39,7 +43,7 @@ def test_get_variables(): out1 == expected_dicts[0], out2 == expected_dicts[1], out3 == expected_dicts[2]]) - + def test_get_variables_err(): """ Test get_variables() returns an error when given inappropriate input @@ -52,8 +56,8 @@ def test_change_directory(): """ Test change_directory context manager. This allows for the changing of directories within - a function's execution. - After execution of the function, user should be in + a function's execution. + After execution of the function, user should be in the same directory the script started in. """ original_dir = Path.cwd() diff --git a/fre/catalog/frecatalog.py b/fre/catalog/frecatalog.py index 227ea35ca..64f8ecb24 100644 --- a/fre/catalog/frecatalog.py +++ b/fre/catalog/frecatalog.py @@ -35,7 +35,8 @@ def catalog_cli(): help = "Ensure output catalog is strictly compliant with schema") @click.pass_context def build(context, input_path = None, output_path = None, config = None, filter_realm = None, - filter_freq = None, filter_chunk = None, verbose = False, overwrite = False, append = False, slow = False, strict = False): + filter_freq = None, filter_chunk = None, verbose = False, overwrite = False, + append = False, slow = False, strict = False): # pylint: disable=unused-argument """ - Generate .csv and .json files for catalog """ context.forward(gen_intake_gfdl.create_catalog_cli) @@ -52,7 +53,9 @@ def build(context, input_path = None, output_path = None, config = None, filter_ @click.pass_context def validate(context, json_path, json_template_path, vocab, proper_generation, test_failure): # pylint: disable=unused-argument - """ - Validate catalogs against controlled vocabulary as provided by particular JSON schemas per vocabulary type (vocabulary validation) OR Validate a catalog against catalog schema template (proper generation checking) """ + """ - Validate catalogs against controlled vocabulary as provided by particular JSON schemas + per vocabulary type (vocabulary validation) OR Validate a catalog against catalog schema + template (proper generation checking) """ context.forward(compval.main) @catalog_cli.command() diff --git a/fre/cmor/cmor_config.py b/fre/cmor/cmor_config.py index a79a1bb88..e49fb0bdd 100644 --- a/fre/cmor/cmor_config.py +++ b/fre/cmor/cmor_config.py @@ -214,8 +214,8 @@ def cmor_config_subtool( lines.append(f" - component_name: '{component_name}'") lines.append(f" variable_list: '{variable_list}'") - lines.append(f" data_series_type: 'ts'") - lines.append(f" chunk: *PP_CMIP_CHUNK") + lines.append(" data_series_type: 'ts'") + lines.append(" chunk: *PP_CMIP_CHUNK") # ---- write output YAML ---- diff --git a/fre/cmor/frecmor.py b/fre/cmor/frecmor.py index e8858ac6e..e7d774d78 100755 --- a/fre/cmor/frecmor.py +++ b/fre/cmor/frecmor.py @@ -180,7 +180,8 @@ def run(indir, varlist, table_config, exp_config, outdir, run_one, opt_var_name, @cmor_cli.command() @click.option("-d", "--dir_targ", type=str, required=True, help="Target directory") @click.option("-o", "--output_variable_list", type=str, required=True, help="Output variable list file") -@click.option("-t", "--mip_table", type=str, required=False, default=None, help="Target MIP table for making variable list") +@click.option("-t", "--mip_table", type=str, required=False, default=None, + help="Target MIP table for making variable list") def varlist(dir_targ, output_variable_list, mip_table): """ Create a simple variable list from netCDF files in the target directory. diff --git a/fre/cmor/tests/test_cmor_find_subtool.py b/fre/cmor/tests/test_cmor_find_subtool.py index adae8f0ae..20b1466ed 100644 --- a/fre/cmor/tests/test_cmor_find_subtool.py +++ b/fre/cmor/tests/test_cmor_find_subtool.py @@ -69,7 +69,7 @@ def test_find_subtool_no_json_files_in_dir_err(temp_dir): def test_find_subtool_no_varlist_no_optvarname_err(temp_dir): ''' test no opt_var_name AND no varlist error ''' - with pytest.raises(ValueError, match=f'RROR: no opt_var_name given but also no content in variable list!!! exit!'): + with pytest.raises(ValueError, match='RROR: no opt_var_name given but also no content in variable list!!! exit!'): cmor_find_subtool(json_var_list=None, json_table_config_dir='fre/tests/test_files/cmip6-cmor-tables/Tables', opt_var_name=None) diff --git a/fre/cmor/tests/test_cmor_helpers_update_grid_label.py b/fre/cmor/tests/test_cmor_helpers_update_grid_label.py index 2d6abdc47..9a336feec 100644 --- a/fre/cmor/tests/test_cmor_helpers_update_grid_label.py +++ b/fre/cmor/tests/test_cmor_helpers_update_grid_label.py @@ -70,7 +70,10 @@ def test_missing_nom_res_field(temp_json_file): new_nom_res = "updated_nom_res" # Act & Assert - with pytest.raises(KeyError, match='"Error updating \'nominal_resolution\'. Ensure the field exists and is modifiable."'): + with pytest.raises( + KeyError, + match='"Error updating \'nominal_resolution\'. Ensure the field exists and is modifiable."' + ): update_grid_and_label(temp_json_file, new_grid_label, new_grid, new_nom_res) def test_missing_grid_label_field(temp_json_file): diff --git a/fre/list_/tests/__init__.py b/fre/list_/tests/__init__.py index 8b1378917..e69de29bb 100644 --- a/fre/list_/tests/__init__.py +++ b/fre/list_/tests/__init__.py @@ -1 +0,0 @@ - diff --git a/fre/list_/tests/test_list_experiments_script.py b/fre/list_/tests/test_list_experiments_script.py index 21266536d..5f0f29493 100644 --- a/fre/list_/tests/test_list_experiments_script.py +++ b/fre/list_/tests/test_list_experiments_script.py @@ -1,12 +1,15 @@ """ Test fre list exps """ -import pytest from pathlib import Path + +import pytest import yaml + from fre.list_ import list_experiments_script from fre.yamltools import helpers + # SET-UP TEST_DIR = Path("fre/make/tests") NM_EXAMPLE = Path("null_example") @@ -31,13 +34,18 @@ def test_exp_list(caplog): ' - null_model_2' ] for i in check_out: assert i in caplog.text - + # make sure the level is INFO for record in caplog.records: assert record.levelname == "INFO" # Test validation -@pytest.mark.skip(reason='cannot validate with current schema at the moment. Current schemas include final "combined" schema to validate compile and pp information. Both of these "clean" the final yaml information for only what is needed. This final combined yaml info does not include the "experiments" section, which is the section being read and parsed for information') +@pytest.mark.skip( + reason='cannot validate with current schema at the moment. Current schemas include final ' + '"combined" schema to validate compile and pp information. Both of these "clean" the final ' + 'yaml information for only what is needed. This final combined yaml info does not include the ' + '"experiments" section, which is the section being read and parsed for information' +) def test_yamlvalidate(): ''' Test yaml is being validated ''' yamlfilepath = Path(f"{TEST_DIR}/{NM_EXAMPLE}/{YAMLFILE}") diff --git a/fre/list_/tests/test_list_platforms_script.py b/fre/list_/tests/test_list_platforms_script.py index 0a9a6924d..8dd7eba56 100644 --- a/fre/list_/tests/test_list_platforms_script.py +++ b/fre/list_/tests/test_list_platforms_script.py @@ -1,12 +1,15 @@ """ Test fre list platforms """ -import pytest from pathlib import Path + +import pytest import yaml + from fre.list_ import list_platforms_script from fre.yamltools import combine_yamls_script as cy + # SET-UP TEST_DIR = Path("fre/make/tests") NM_EXAMPLE = Path("null_example") @@ -33,7 +36,7 @@ def test_platformyaml_exists(): ''' Test platforms yaml exists ''' assert Path(f"{TEST_DIR}/{NM_EXAMPLE}/platforms.yaml").exists() -# Test whole tool +# Test whole tool def test_platforms_list_correct(caplog): ''' Test fre list platforms subtool ''' list_platforms_script.list_platforms_subtool(f"{TEST_DIR}/{NM_EXAMPLE}/{YAMLFILE}") diff --git a/fre/list_/tests/test_list_pp_components_script.py b/fre/list_/tests/test_list_pp_components_script.py index 0b4d926e1..7372ea645 100644 --- a/fre/list_/tests/test_list_pp_components_script.py +++ b/fre/list_/tests/test_list_pp_components_script.py @@ -1,13 +1,16 @@ """ Test fre list pp-comps """ -import pytest from pathlib import Path + +import pytest import yaml + from fre.list_ import list_pp_components_script from fre.yamltools import combine_yamls_script as cy from fre.yamltools import helpers + # SET-UP TEST_DIR = Path("fre/pp/tests") AM5_EXAMPLE = Path("AM5_example") @@ -45,7 +48,7 @@ def test_exp_list(caplog): for i in check_out: assert i in caplog.text - + # make sure the level is INFO for record in caplog.records: assert record.levelname == "INFO" diff --git a/fre/make/create_compile_script.py b/fre/make/create_compile_script.py index a874b98bb..5111f304f 100644 --- a/fre/make/create_compile_script.py +++ b/fre/make/create_compile_script.py @@ -2,16 +2,22 @@ Creates a compile script to compile the model and generate a model executable. ''' -import os import logging - -from pathlib import Path +import os from multiprocessing.dummy import Pool +from pathlib import Path +from typing import Optional import fre.yamltools.combine_yamls_script as cy -from typing import Optional from fre.make.make_helpers import get_mktemplate_path -from .gfdlfremake import varsfre, yamlfre, targetfre, buildBaremetal + +from .gfdlfremake import ( + buildBaremetal, + targetfre, + varsfre, + yamlfre +) + fre_logger = logging.getLogger(__name__) @@ -28,7 +34,8 @@ def compile_create(yamlfile:str, platform:str, target:str, njobs: int = 4, :type platform: str :param target: Predefined FRE targets; options include [prod/debug/repro]-openmp :type target: str - :param njobs: Used for parallelism with make; number of files to build simultaneously; on a per-build basis (default 4) + :param njobs: Used for parallelism with make; number of files to build + simultaneously; on a per-build basis (default 4) :type njobs: int :param nparallel: Number of concurrent model builds (default 1) :type nparallel: int diff --git a/fre/make/create_docker_script.py b/fre/make/create_docker_script.py index d4fd31402..d26ff4411 100644 --- a/fre/make/create_docker_script.py +++ b/fre/make/create_docker_script.py @@ -4,19 +4,27 @@ If the build script is executed, a singularity image file (.sif) is generated. ''' -import os import logging - +import os import subprocess +from typing import Optional import fre.yamltools.combine_yamls_script as cy -from typing import Optional from fre.make.make_helpers import get_mktemplate_path -from .gfdlfremake import varsfre, targetfre, yamlfre, buildDocker + +from .gfdlfremake import ( + buildDocker, + targetfre, + varsfre, + yamlfre +) + fre_logger = logging.getLogger(__name__) -def dockerfile_create(yamlfile:str, platform:str, target:str, execute: Optional[bool] = False, no_format_transfer: Optional[bool] = False): +def dockerfile_create(yamlfile:str, platform:str, target:str, + execute: Optional[bool] = False, + no_format_transfer: Optional[bool] = False): """ Creates the dockerfile and container build script for a container build diff --git a/fre/make/create_makefile_script.py b/fre/make/create_makefile_script.py index b8bdcbe11..aba71ea8c 100644 --- a/fre/make/create_makefile_script.py +++ b/fre/make/create_makefile_script.py @@ -7,7 +7,7 @@ from pathlib import Path import fre.yamltools.combine_yamls_script as cy -from fre.make.make_helpers import get_mktemplate_path +from fre.make.make_helpers import get_mktemplate_path from .gfdlfremake import makefilefre, varsfre, targetfre, yamlfre fre_logger = logging.getLogger(__name__) @@ -15,7 +15,7 @@ def makefile_create(yamlfile: str, platform: str, target:str): """ Creates the makefile for model compilation - + :param yamlfile: Model compile YAML file :type yamlfile: str :param platform: FRE platform; defined in the platforms yaml @@ -23,7 +23,7 @@ def makefile_create(yamlfile: str, platform: str, target:str): :type platform: str :param target: Predefined FRE targets; options include [prod/debug/repro]-openmp :type target: str - :raises ValueError: Error if platform does not exist in platforms yaml configuration + :raises ValueError: Error if platform does not exist in platforms yaml configuration .. note:: If additional library dependencies are defined in the compile.yaml file: diff --git a/fre/make/tests/test_create_compile.py b/fre/make/tests/test_create_compile.py index 292196231..01e07be06 100644 --- a/fre/make/tests/test_create_compile.py +++ b/fre/make/tests/test_create_compile.py @@ -3,10 +3,13 @@ """ import os import shutil -import pytest from pathlib import Path + +import pytest + from fre.make import create_compile_script + ## SET-UP TEST_DIR = Path("fre/make/tests") NM_EXAMPLE = Path("null_example") diff --git a/fre/pp/__init__.py b/fre/pp/__init__.py index 3f2e15bb3..b0daf2cd5 100644 --- a/fre/pp/__init__.py +++ b/fre/pp/__init__.py @@ -9,7 +9,9 @@ def make_workflow_name(experiment : Optional[str] = None, user to the shell being used by python, we split/reform the string to remove semi-colons or spaces that may be used to execute an arbitrary command with elevated privileges. - :param experiment: One of the postprocessing experiment names from the yaml displayed by fre list exps -y $yamlfile (e.g. c96L65_am5f4b4r0_amip), default None + :param experiment: One of the postprocessing experiment names from the + yaml displayed by fre list exps -y $yamlfile + (e.g. c96L65_am5f4b4r0_amip), default None :type experiment: str :param platform: The location + compiler that was used to run the model (e.g. gfdl.ncrc5-deploy), default None :type platform: str diff --git a/fre/pp/checkout_script.py b/fre/pp/checkout_script.py index bb811d2be..1c7c90849 100644 --- a/fre/pp/checkout_script.py +++ b/fre/pp/checkout_script.py @@ -5,23 +5,24 @@ 3. branch not given, folder exists, 4. branch given and folder exists ''' +import logging import os import subprocess -import logging -fre_logger = logging.getLogger(__name__) - from . import make_workflow_name - from ..fre import version as fre_ver +fre_logger = logging.getLogger(__name__) + FRE_WORKFLOWS_URL = 'https://github.com/NOAA-GFDL/fre-workflows.git' def checkout_template(experiment = None, platform = None, target = None, branch = None): """ Create a directory and checkout the workflow template files from the repo - :param experiment: One of the postprocessing experiment names from the yaml displayed by fre list exps -y $yamlfile (e.g. c96L65_am5f4b4r0_amip), default None + :param experiment: One of the postprocessing experiment names from the + yaml displayed by fre list exps -y $yamlfile + (e.g. c96L65_am5f4b4r0_amip), default None :type experiment: str :param platform: The location + compiler that was used to run the model (e.g. gfdl.ncrc5-deploy), default None :type platform: str diff --git a/fre/pp/configure_script_yaml.py b/fre/pp/configure_script_yaml.py index cc4aa279f..b0aeda6cf 100644 --- a/fre/pp/configure_script_yaml.py +++ b/fre/pp/configure_script_yaml.py @@ -55,7 +55,13 @@ def validate_yaml(yamlfile: dict) -> None: raise ValueError("Unclear error from validation. Please try to find the error and try again.") from exc #################### -def rose_init(experiment: str, platform: str, target: str) -> tuple[metomi.rose.config.ConfigNode, metomi.rose.config.ConfigNode, metomi.rose.config.ConfigNode]: +def rose_init( + experiment: str, platform: str, target: str +) -> tuple[ + metomi.rose.config.ConfigNode, + metomi.rose.config.ConfigNode, + metomi.rose.config.ConfigNode +]: """ Initializes the rose suite and app configurations. @@ -155,7 +161,9 @@ def set_rose_suite(yamlfile: dict, rose_suite: metomi.rose.config.ConfigNode) -> rose_suite.set(keys=['template variables', key.upper()], value=quote_rose_values(value)) #################### -def set_rose_apps(yamlfile: dict, rose_regrid: metomi.rose.config.ConfigNode, rose_remap: metomi.rose.config.ConfigNode) -> None: +def set_rose_apps(yamlfile: dict, + rose_regrid: metomi.rose.config.ConfigNode, + rose_remap: metomi.rose.config.ConfigNode) -> None: """ Sets items in the regrid and remap rose app configurations. diff --git a/fre/pp/frepp.py b/fre/pp/frepp.py index 025f88a11..55c75a7db 100644 --- a/fre/pp/frepp.py +++ b/fre/pp/frepp.py @@ -1,8 +1,8 @@ ''' fre pp ''' -import click import logging -fre_logger = logging.getLogger(__name__) + +import click #fre tools from . import checkout_script @@ -18,6 +18,8 @@ from . import wrapper_script from . import split_netcdf_script +fre_logger = logging.getLogger(__name__) + # fre pp @click.group(help=click.style(" - pp subcommands", fg=(57,139,210))) def pp_cli(): @@ -165,19 +167,36 @@ def histval(history,date_string,warn): #fre pp split-netcdf-wrapper @pp_cli.command() @click.option('-i', '--inputdir', required=True, - help='Path to a directory in which to search for netcdf files to split. Files matching the pattern in $history-source will be split.') + help='Path to a directory in which to search for netcdf ' + 'files to split. Files matching the pattern in ' + '$history-source will be split.') @click.option('-o', '--outputdir', required=True, - help='Path to a directory to which to write split netcdf files.') + help='Path to a directory to which to write split ' + 'netcdf files.') @click.option('-c', '--component', required=False, default=None, - help='component specified in yamlfile under postprocess:components. Needs to be the same component that contains the sources:history-file. Conflicts with --split-all-vars.') + help='component specified in yamlfile under ' + 'postprocess:components. Needs to be the same ' + 'component that contains the ' + 'sources:history-file. ' + 'Conflicts with --split-all-vars.') @click.option('-s', '--history-source', required=True, default=None, - help='history-file specification under postprocess:components:type=component:sources in the fre postprocess config yamlfile. Used to match files in inputdir.') + help='history-file specification under ' + 'postprocess:components:type=component:sources ' + 'in the fre postprocess config yamlfile. ' + 'Used to match files in inputdir.') @click.option('-y', '--yamlfile', required=False, default=None, - help='fre postprocessing .yml file from which to get the variable filtering list under postprocess:components:type=component:variables. Conflicts with --split-all-vars.') + help='fre postprocessing .yml file from which to get ' + 'the variable filtering list under ' + 'postprocess:components:type=component:variables. ' + 'Conflicts with --split-all-vars.') @click.option('--use-subdirs', '-u', is_flag=True, default=False, - help="Whether to search subdirs underneath $inputdir for netcdf files. Defaults to false. This option is used in flow.cylc when regridding.") + help="Whether to search subdirs underneath $inputdir " + "for netcdf files. Defaults to false. This option " + "is used in flow.cylc when regridding.") @click.option('--split-all-vars', '-a', is_flag=True, default=False, - help="Whether to ignore other config options and split all vars in the file. Defaults to false. Conflicts with -c, -s and -y options.") + help="Whether to ignore other config options and split " + "all vars in the file. Defaults to false. " + "Conflicts with -c, -s and -y options.") def split_netcdf_wrapper(inputdir, outputdir, component, history_source, use_subdirs, yamlfile, split_all_vars): ''' Splits all netcdf files matching the pattern specified by $history_source in $inputdir into files with a single data variable written to $outputdir. If $yamlfile contains @@ -194,12 +213,18 @@ def split_netcdf_wrapper(inputdir, outputdir, component, history_source, use_sub fre_logger.error('''Error in split_netcdf_wrapper arg parsing: --split-all-vars was set and one or more of mutually exclusive options --component and --yamlfile was also set! Either unset --split-all-vars or parse the varlist from the yaml - do not try do do both!''') - split_netcdf_script.split_netcdf(inputdir, outputdir, component, history_source, use_subdirs, yamlfile, split_all_vars) + split_netcdf_script.split_netcdf( + inputdir, outputdir, component, history_source, + use_subdirs, yamlfile, split_all_vars + ) #fre pp split-netcdf @pp_cli.command() -@click.option('-f', '--file', type = str, required=True, help='path to a netcdf file') -@click.option('-o', '--outputdir', type = str, required=True, help='path to a directory to which to write single-data-variable output files') +@click.option('-f', '--file', type = str, required=True, + help='path to a netcdf file') +@click.option('-o', '--outputdir', type = str, required=True, + help='path to a directory to which to write ' + 'single-data-variable output files') @click.option('-v', '--variables', type = str, required=True, help='''Specifies which variables in $file are split and written to $outputdir. Either a string "all" or a comma-separated string of variable names ("tasmax,tasmin,pr")''') @@ -220,7 +245,8 @@ def split_netcdf(file, outputdir, variables): @pp_cli.command() @click.option('--path','-p', required=True, help="Path to postprocessed time-series file") def ppval(path): - """ Determines an estimated number of timesteps from a postprocessed time-series file's name and run nccheck on it """ + """ Determines an estimated number of timesteps from a postprocessed + time-series file's name and run nccheck on it """ ppval_script.validate(path) #fre pp all diff --git a/fre/pp/install_script.py b/fre/pp/install_script.py index a57f3196b..e6561697f 100644 --- a/fre/pp/install_script.py +++ b/fre/pp/install_script.py @@ -1,24 +1,27 @@ ''' fre pp install ''' -from pathlib import Path +import logging import os import subprocess -import logging -fre_logger =logging.getLogger(__name__) +from pathlib import Path from . import make_workflow_name +fre_logger = logging.getLogger(__name__) + def install_subtool(experiment, platform, target): """ - Install the Cylc workflow definition located in - + Install the Cylc workflow definition located in + ~/cylc-src/$(experiment)__$(platform)__$(target) - + to - + ~/cylc-run/$(experiment)__$(platform)__$(target) - - :param experiment: One of the postprocessing experiment names from the yaml displayed by fre list exps -y $yamlfile (e.g. c96L65_am5f4b4r0_amip), default None + + :param experiment: One of the postprocessing experiment names from the + yaml displayed by fre list exps -y $yamlfile + (e.g. c96L65_am5f4b4r0_amip), default None :type experiment: str :param platform: The location + compiler that was used to run the model (e.g. gfdl.ncrc5-deploy), default None :type platform: str @@ -27,7 +30,7 @@ def install_subtool(experiment, platform, target): """ #name = experiment + '__' + platform + '__' + target - workflow_name = make_workflow_name(experiment, platform, target) + workflow_name = make_workflow_name(experiment, platform, target) # if the cylc-run directory already exists, # then check whether the cylc expanded definition (cylc config) # is identical. If the same, good. If not, bad. diff --git a/fre/pp/nccheck_script.py b/fre/pp/nccheck_script.py index 15d293f93..c00703126 100644 --- a/fre/pp/nccheck_script.py +++ b/fre/pp/nccheck_script.py @@ -37,5 +37,13 @@ def check(file_path: str, num_steps: int): return 0 else: - fre_logger.error(f" Unexpected number of timesteps found in {file_path}. Found: {num_actual_steps} timesteps Expected: {num_steps} timesteps") - raise ValueError(f" Unexpected number of timesteps found in {file_path}. Found: {num_actual_steps} timesteps Expected: {num_steps} timesteps") + fre_logger.error( + f" Unexpected number of timesteps found in {file_path}. " + f"Found: {num_actual_steps} timesteps " + f"Expected: {num_steps} timesteps" + ) + raise ValueError( + f" Unexpected number of timesteps found in {file_path}. " + f"Found: {num_actual_steps} timesteps " + f"Expected: {num_steps} timesteps" + ) diff --git a/fre/pp/ppval_script.py b/fre/pp/ppval_script.py index 6973e3d8d..7481d446d 100644 --- a/fre/pp/ppval_script.py +++ b/fre/pp/ppval_script.py @@ -1,6 +1,7 @@ -''' -This script will determine an estimated number of timesteps from a postprocessed time-series file's name and run nccheck on it. -Ran during time-series file creation during rename-split-to-pp and make-timeseries tasks in fre postprocessing workflow. +''' +This script will determine an estimated number of timesteps from a postprocessed +time-series file's name and run nccheck on it. +Ran during time-series file creation during rename-split-to-pp and make-timeseries tasks in fre postprocessing workflow. ''' import logging @@ -15,16 +16,18 @@ fre_logger = logging.getLogger(__name__) -def getenot(date_start: str, - date_end: str, - chunk_type: str, +def getenot(date_start: str, + date_end: str, + chunk_type: str, cal: str): """ - Returns the estimated number of timesteps using elapsed time (calculated using date_start/date_end) and data frequency (provided in chunk_type argument). + Returns the estimated number of timesteps using elapsed time + (calculated using date_start/date_end) and data frequency + (provided in chunk_type argument). Date string formats must be YYYY,YYYYMM,YYYYMMDD,YYYYMMDDHH,or YYYYMMDDHH:mm - + Ex: Will return value of 36 (timesteps) for 3 years of data with monthly frequency output (3 years * 12 months) - + :param date_start: Starting time of data chunk :type date_start: str :param date_end: Ending time of data chunk @@ -120,19 +123,26 @@ def getenot(date_start: str, else: raise ValueError(f"Unknown chunk_type '{chunk_type}'") - fre_logger.debug(f"date start: {date_start}; date end: {date_end}; chunk_type: {chunk_type}; calendar: {cal}; timesteps: {enot}") - + fre_logger.debug( + f"date start: {date_start}; date end: {date_end}; " + f"chunk_type: {chunk_type}; calendar: {cal}; timesteps: {enot}" + ) + return enot def validate(filepath: str): """ - Compares the number of timesteps in a postprocessed time-series netCDF (.nc) file to the number of expected timesteps as calculated using elapsed time and data frequency. + Compares the number of timesteps in a postprocessed time-series + netCDF (.nc) file to the number of expected timesteps as calculated + using elapsed time and data frequency. Runs nccheck on every timeseries file in pp dir. - + :param filepath: Path to time-series file to be checked :type filepath: str - :raises ValueError: Calendar name doesn't follow cftime conventions, frequency can't be determined from filepath, or number of timesteps differ from expectation + :raises ValueError: Calendar name doesn't follow cftime conventions, + frequency can't be determined from filepath, or number of + timesteps differ from expectation :return: Returns 0 unless an exception is raised or number of timesteps differ from expectation :rtype: int """ @@ -141,8 +151,12 @@ def validate(filepath: str): import re # Get the date range from the filename # This regular expression accepts at minimum '.YYYY-YYYY.' date strings. - # If month, day, hour, and minute strings are present it will identify them by looking for groups of two digits after the year string - match = re.compile(r"\.((?:\d{4})(?:\d{2}(?:\d{2}(?:\d{2}(?::\d{2})?)?)?)?)-((?:\d{4})(?:\d{2}(?:\d{2}(?:\d{2}(?::\d{2})?)?)?)?)\.") + # If month, day, hour, and minute strings are present it will identify them + # by looking for groups of two digits after the year string + match = re.compile( + r"\.((?:\d{4})(?:\d{2}(?:\d{2}(?:\d{2}(?::\d{2})?)?)?)?)-((?:\d{4})" + r"(?:\d{2}(?:\d{2}(?:\d{2}(?::\d{2})?)?)?)?)\." + ) filename = os.path.basename(filepath) date_range = match.search(filename) @@ -150,7 +164,9 @@ def validate(filepath: str): # date_range[0] is the full match (e.g., ".202201-202501." # date_range[1] is the start date (e.g., "202201") # date_range[2] is the end date (e.g., "202501") - # This regular expression captures date start/end individually by first capturing the year as a 4 digit number then capturing each following group of two digits + # This regular expression captures date start/end individually by first + # capturing the year as a 4 digit number then capturing each following + # group of two digits # Minute string is identified by ':' followed with two digits d_regex = re.compile(r"(\d{4})(\d{2})?(\d{2})?(\d{2})?(?::(\d{2}))?") date_end = d_regex.search(date_range[2]) @@ -194,7 +210,8 @@ def validate(filepath: str): # Sub-daily to hourly elif date_length == 10: # We would rather not check filepaths but it's necessary for sub-daily files - # Path elements contains the directories from the filepath.. we use this to determine frequency/chunk_size in sub-daily files + # Path elements contains the directories from the filepath.. + # we use this to determine frequency/chunk_size in sub-daily files path_elements = os.path.abspath(filepath).split('/') expected_frequencies = ['6hr', 'PT6H', '3hr', 'PT3H', '1hr', 'PT1H', '30min', 'PT30M', 'PT0.5H'] @@ -216,7 +233,12 @@ def validate(filepath: str): # If none of the expected frequencies are found in filepath, raise ValueError if all(freq not in path_elements for freq in expected_frequencies): - raise ValueError(f" Cannot determine frequency from {filepath}. Sub-daily files must at minimum be placed in a directory corresponding to data frequency: '6hr, 'PT6H', '3hr, 'PT3H', '1hr, 'PT1H', '30min, 'PT30M, 'PT0.5H'") + raise ValueError( + f" Cannot determine frequency from {filepath}. Sub-daily" + " files must at minimum be placed in a directory" + " corresponding to data frequency: '6hr, 'PT6H', '3hr," + " 'PT3H', '1hr, 'PT1H', '30min, 'PT30M, 'PT0.5H'" + ) elif date_length == 12: enot = getenot(date_start, date_end, '30minute', cal) diff --git a/fre/pp/run_script.py b/fre/pp/run_script.py index 13b4d18af..e8cc62da3 100644 --- a/fre/pp/run_script.py +++ b/fre/pp/run_script.py @@ -1,25 +1,33 @@ ''' fre pp run ''' +import logging import subprocess import time -import logging -fre_logger = logging.getLogger(__name__) from . import make_workflow_name +fre_logger = logging.getLogger(__name__) + def pp_run_subtool(experiment = None, platform = None, target = None, pause = False, no_wait = False): """ Starts, pauses or restarts the Cylc workflow described by $(experiment)__$(platform)__$(target) - :param experiment: One of the postprocessing experiment names from the yaml displayed by fre list exps -y $yamlfile (e.g. c96L65_am5f4b4r0_amip), default None + :param experiment: One of the postprocessing experiment names from the yaml + displayed by fre list exps -y $yamlfile + (e.g. c96L65_am5f4b4r0_amip), default None :type experiment: str - :param platform: The location + compiler that was used to run the model (e.g. gfdl.ncrc5-deploy), default None + :param platform: The location + compiler that was used to run the model + (e.g. gfdl.ncrc5-deploy), default None :type platform: str - :param target: Options used for the model compiler (e.g. prod-openmp), default None + :param target: Options used for the model compiler + (e.g. prod-openmp), default None :type target: str - :param pause: Whether to pause the current Cylc workflow. Defaults to false, which starts or restarts the workflow. + :param pause: Whether to pause the current Cylc workflow. Defaults to + false, which starts or restarts the workflow. :type pause: boolean - :param no_wait: Whether to avoid waiting at least 30 seconds for confirmation that the workflow is stopped. Defaults to False, which waits for confirmation. + :param no_wait: Whether to avoid waiting at least 30 seconds for + confirmation that the workflow is stopped. Defaults to False, + which waits for confirmation. :type no_wait: boolean """ if None in [experiment, platform, target]: diff --git a/fre/pp/split_netcdf_script.py b/fre/pp/split_netcdf_script.py index e19cd5763..ed08688d6 100644 --- a/fre/pp/split_netcdf_script.py +++ b/fre/pp/split_netcdf_script.py @@ -7,16 +7,17 @@ # Input format: date.component(.tileX).nc # Output format: date.component.var(.tileX).nc +import logging import os -from os import path -import subprocess import re +import subprocess import sys -import xarray as xr +from itertools import chain +from os import path from pathlib import Path + +import xarray as xr import yaml -from itertools import chain -import logging from fre.app.helpers import get_variables @@ -35,299 +36,319 @@ def split_netcdf(inputDir, outputDir, component, history_source, use_subdirs, yamlfile, split_all_vars=False): - ''' - Given a directory of netcdf files, splits those netcdf files into separate - files for each data variable and copies the data variable files of interest - to the output directory - - Intended to work with data structured for fre-workflows and fre-workflows file naming conventions - - Sample infile name convention: "19790101.atmos_tracer.tile6.nc" - - :param inputDir: directory containing netcdf files - :type inputDir: string - :param outputDir: directory to which to write netcdf files - :type outputDir: string - :param component: the 'component' element we are currently working with in the yaml - :type component: string - :param history_source: a history_file under a 'source' under the 'component' that we are working with. Is used to identify the files in inputDir. - :type history_source: string - :param use_subdirs: whether to recursively search through inputDir under the subdirectories. Used when regridding. - :type use_subdirs: boolean - :param yamlfile: - a .yml config file for fre postprocessing - :type yamlfile: string - :param split_all_vars: Whether to skip parsing the yamlfile and split all available vars in the file. Defaults to False. - :type split_all_vars: boolean - ''' - - #Verify input/output dirs exist and are dirs - if not (os.path.isdir(inputDir)): - fre_logger.error(f"error: input dir {inputDir} does not exist or is not a directory") - raise OSError(f"error: input dir {inputDir} does not exist or is not a directory") - if not (os.path.isdir(outputDir)): - if os.path.isfile(outputDir): - fre_logger.error(f"error: output dir {outputDir} is a file. Please specify a directory.") - else: - if not os.access(outputDir, os.W_OK): - fre_logger.error(f"error: cannot write to output dir {outputDir}") - - curr_dir = os.getcwd() - workdir = os.path.abspath(inputDir) - - fre_logger.debug(f"input dir: {inputDir}") - fre_logger.debug(f"output dir: {outputDir}") - - #note to self: if CYLC_TASK_PARAM_component isn't doing what we think it's - #doing, we can also use history_source to get the component but it's - #going to be a bit of a pain - if split_all_vars: - varlist = "all" - else: - ydict = yaml.safe_load(Path(yamlfile).read_text()) - vardict = get_variables(ydict, component) - if vardict is None or history_source not in vardict.keys(): - fre_logger.error(f"error: either component {component} not defined or source {history_source} not defined under component {component} in yamlfile {yamlfile}.") - raise ValueError(f"error: either component {component} not defined or source {history_source} not defined under component {component} in yamlfile {yamlfile}.") + ''' + Given a directory of netcdf files, splits those netcdf files into separate + files for each data variable and copies the data variable files of interest + to the output directory + + Intended to work with data structured for fre-workflows and fre-workflows file naming conventions + - Sample infile name convention: "19790101.atmos_tracer.tile6.nc" + + :param inputDir: directory containing netcdf files + :type inputDir: string + :param outputDir: directory to which to write netcdf files + :type outputDir: string + :param component: the 'component' element we are currently working with in the yaml + :type component: string + :param history_source: a history_file under a 'source' under the + 'component' that we are working with. Is used to identify the + files in inputDir. + :type history_source: string + :param use_subdirs: whether to recursively search through inputDir + under the subdirectories. Used when regridding. + :type use_subdirs: boolean + :param yamlfile: - a .yml config file for fre postprocessing + :type yamlfile: string + :param split_all_vars: Whether to skip parsing the yamlfile and split + all available vars in the file. Defaults to False. + :type split_all_vars: boolean + ''' + + #Verify input/output dirs exist and are dirs + if not os.path.isdir(inputDir): + fre_logger.error(f"error: input dir {inputDir} does not exist or is not a directory") + raise OSError(f"error: input dir {inputDir} does not exist or is not a directory") + if not os.path.isdir(outputDir): + if os.path.isfile(outputDir): + fre_logger.error(f"error: output dir {outputDir} is a file. Please specify a directory.") + else: + if not os.access(outputDir, os.W_OK): + fre_logger.error(f"error: cannot write to output dir {outputDir}") + + curr_dir = os.getcwd() + workdir = os.path.abspath(inputDir) + + fre_logger.debug(f"input dir: {inputDir}") + fre_logger.debug(f"output dir: {outputDir}") + + #note to self: if CYLC_TASK_PARAM_component isn't doing what we think it's + #doing, we can also use history_source to get the component but it's + #going to be a bit of a pain + if split_all_vars: + varlist = "all" else: - varlist = vardict[history_source] - - #extend globbing used to find both tiled and non-tiled files - #all files that contain the current source:history_file name, - #0-1 instances of "tile" and end in .nc - #under most circumstances, this should match 1 file - #older regex - not currently working - #file_regex = f'*.{history_source}?(.tile?).nc' - #file_regex = f'*.{history_source}*.*.nc' - #glob.glob is NOT sufficient for this. It needs to match: - # '00020101.atmos_level_cmip.tile4.nc' - # '00020101.ocean_cobalt_omip_2d.nc' - file_regex = f'.*{history_source}(\\.tile.*)?.nc' - - #If in sub-dir mode, process the sub-directories instead of the main one - # and write to $outputdir/$subdir - if use_subdirs: - subdirs = [el for el in os.listdir(workdir) if os.path.isdir(os.path.join(workdir,el))] - num_subdirs = len(subdirs) - fre_logger.info(f"checking {num_subdirs} under {workdir}") - files_split = 0 - sd_string = ",".join(subdirs) - for sd in subdirs: - sdw = os.path.join(workdir,sd) - files=[os.path.join(sdw,el) for el in os.listdir(sdw) if re.match(file_regex, el) is not None] - if len(files) == 0: - fre_logger.info(f"No input files found; skipping subdir {sd}") - else: - output_subdir = os.path.join(os.path.abspath(outputDir), sd) - if not os.path.isdir(output_subdir): - os.mkdir(output_subdir) + ydict = yaml.safe_load(Path(yamlfile).read_text()) + vardict = get_variables(ydict, component) + if vardict is None or history_source not in vardict.keys(): + fre_logger.error( + f"error: either component {component} not defined or " + f"source {history_source} not defined under component " + f"{component} in yamlfile {yamlfile}." + ) + raise ValueError( + f"error: either component {component} not defined or " + f"source {history_source} not defined under component " + f"{component} in yamlfile {yamlfile}." + ) + else: + varlist = vardict[history_source] + + #extend globbing used to find both tiled and non-tiled files + #all files that contain the current source:history_file name, + #0-1 instances of "tile" and end in .nc + #under most circumstances, this should match 1 file + #older regex - not currently working + #file_regex = f'*.{history_source}?(.tile?).nc' + #file_regex = f'*.{history_source}*.*.nc' + #glob.glob is NOT sufficient for this. It needs to match: + # '00020101.atmos_level_cmip.tile4.nc' + # '00020101.ocean_cobalt_omip_2d.nc' + file_regex = f'.*{history_source}(\\.tile.*)?.nc' + + #If in sub-dir mode, process the sub-directories instead of the main one + # and write to $outputdir/$subdir + if use_subdirs: + subdirs = [el for el in os.listdir(workdir) if os.path.isdir(os.path.join(workdir,el))] + num_subdirs = len(subdirs) + fre_logger.info(f"checking {num_subdirs} under {workdir}") + files_split = 0 + sd_string = ",".join(subdirs) + for sd in subdirs: + sdw = os.path.join(workdir,sd) + files=[os.path.join(sdw,el) for el in os.listdir(sdw) if re.match(file_regex, el) is not None] + if len(files) == 0: + fre_logger.info(f"No input files found; skipping subdir {sd}") + else: + output_subdir = os.path.join(os.path.abspath(outputDir), sd) + if not os.path.isdir(output_subdir): + os.mkdir(output_subdir) + for infile in files: + split_file_xarray(infile, output_subdir, varlist) + files_split += 1 + fre_logger.info(f"{files_split} files split") + if files_split == 0: + fre_logger.error( + f"error: no files found in dirs {sd_string} under " + f"{workdir} that match pattern {file_regex}; " + "no splitting took place" + ) + raise OSError + else: + files_split = 0 + files=[os.path.join(workdir, el) for el in os.listdir(workdir) if re.match(file_regex, el) is not None] + # Split the files by variable for infile in files: - split_file_xarray(infile, output_subdir, varlist) - files_split += 1 - fre_logger.info(f"{files_split} files split") - if files_split == 0: - fre_logger.error(f"error: no files found in dirs {sd_string} under {workdir} that match pattern {file_regex}; no splitting took place") - raise OSError - else: - files_split = 0 - files=[os.path.join(workdir, el) for el in os.listdir(workdir) if re.match(file_regex, el) is not None] - # Split the files by variable - for infile in files: - split_file_xarray(infile, os.path.abspath(outputDir), varlist) - files_split += 1 - if len(files) == 0: - fre_logger.error(f"error: no files found in {workdir} that match pattern {file_regex}; no splitting took place") - raise OSError - - fre_logger.info(f"split-netcdf-wrapper call complete, having split {files_split} files") - sys.exit(0) #check this + split_file_xarray(infile, os.path.abspath(outputDir), varlist) + files_split += 1 + if len(files) == 0: + fre_logger.error( + f"error: no files found in {workdir} that match pattern " + f"{file_regex}; no splitting took place" + ) + raise OSError + + fre_logger.info(f"split-netcdf-wrapper call complete, having split {files_split} files") + sys.exit(0) #check this def split_file_xarray(infile, outfiledir, var_list='all'): - ''' - Given a netcdf infile containing one or more data variables, - writes out a separate file for each data variable in the file, including the - variable name in the filename. - if var_list if specified, only the vars in var_list are written to file; - if no vars in the file match the vars in var_list, no files are written. - - :param infile: input netcdf file - :type infile: string - :param outfiledir: writeable directory to which to write netcdf files - :type outfiledir: string - :param var_list: python list of string variable names or a string "all" - :type var_list: list of strings - ''' - if not os.path.isdir(outfiledir): - fre_logger.info("creating output directory") - os.makedirs(outfiledir) - - if not os.path.isfile(infile): - fre_logger.error(f"error: input file {infile} not found. Please check the path.") - raise OSError(f"error: input file {infile} not found. Please check the path.") - - dataset = xr.load_dataset(infile, decode_cf=False, decode_times=False, decode_coords="all") - allvars = dataset.data_vars.keys() - - #If you have a file of 3 or more dim vars, 2d-or-fewer vars are likely to be - #metadata vars; if your file is 2d vars, 1d vars are likely to be metadata. - max_ndims = get_max_ndims(dataset) - if max_ndims >= 3: - varsize = 2 - else: - varsize = 1 - fre_logger.debug(f"varsize: {varsize}") - #note: netcdf dimensions and xarray coords are NOT ALWAYS THE SAME THING. - #If they were, I could get away with the following: - #var_zerovars = [v for v in datavars if not len(dataset[v].coords) > 0]) - #instead of this: - var_shortvars = [v for v in allvars if (len(dataset[v].shape) < varsize) and v not in dataset._coord_names] - #having a variable listed as both a metadata var and a coordinate var seems to - #lead to the weird adding a _FillValue behavior - fre_logger.info(f"var patterns: {VAR_PATTERNS}") - fre_logger.info(f"1 or 2-d vars: {var_shortvars}") - #both combined gets you a decent list of non-diagnostic variables - var_exclude = list(set(VAR_PATTERNS + [str(el) for el in var_shortvars] )) - def matchlist(xstr): - ''' checks a string for matches in a list of patterns - - xstr: string to search for matches - var_exclude: list of patterns defined in VAR_EXCLUDE''' - allmatch = [re.search(el, xstr)for el in var_exclude] - #If there's at least one match in the var_exclude list (average_bnds is OK) - return len(list(set(allmatch))) > 1 - metavars = [el for el in allvars if matchlist(el)] - datavars = [el for el in allvars if not matchlist(el)] - fre_logger.debug(f"metavars: {metavars}") - fre_logger.debug(f"datavars: {datavars}") - fre_logger.debug(f"var filter list: {var_list}") - - #datavars does 2 things: keep track of which vars to write, and tell xarray - #which vars to drop. we need to separate those things for the variable filtering. - if var_list == "all": - write_vars = datavars - else: - if isinstance(var_list, str): - var_list = var_list.split(",") - var_list = list(set(var_list)) - write_vars = [el for el in datavars if el in var_list] - fre_logger.debug(f"intersection of datavars and var_list: {write_vars}") - - if len(write_vars) <= 0: - fre_logger.info(f"No data variables found in {infile}; no writes take place.") - else: - vc_encode = set_coord_encoding(dataset, dataset._coord_names) - for variable in write_vars: - fre_logger.info(f"splitting var {variable}") - #drop all data vars (diagnostics) that are not the current var of interest - #but KEEP the metadata vars - #(seriously, we need the time_bnds) - data2 = dataset.drop_vars([el for el in datavars if el is not variable]) - v_encode= set_var_encoding(dataset, metavars) - #combine 2 dicts into 1 dict - should be no shared keys, - #so the merge is straightforward - var_encode = {**vc_encode, **v_encode} - fre_logger.debug(f"var_encode settings: {var_encode}") - #Encoding principles for xarray: - # - no coords have a _FillValue - # - Everything is written out with THE SAME precision it was read in - # - Everything has THE SAME UNITS as it did when it was read in - var_outfile = fre_outfile_name(os.path.basename(infile), variable) - var_out = os.path.join(outfiledir, os.path.basename(var_outfile)) - data2.to_netcdf(var_out, encoding = var_encode) - fre_logger.debug(f"Wrote '{var_out}'") + ''' + Given a netcdf infile containing one or more data variables, + writes out a separate file for each data variable in the file, including the + variable name in the filename. + if var_list if specified, only the vars in var_list are written to file; + if no vars in the file match the vars in var_list, no files are written. + + :param infile: input netcdf file + :type infile: string + :param outfiledir: writeable directory to which to write netcdf files + :type outfiledir: string + :param var_list: python list of string variable names or a string "all" + :type var_list: list of strings + ''' + if not os.path.isdir(outfiledir): + fre_logger.info("creating output directory") + os.makedirs(outfiledir) + + if not os.path.isfile(infile): + fre_logger.error(f"error: input file {infile} not found. Please check the path.") + raise OSError(f"error: input file {infile} not found. Please check the path.") + + dataset = xr.load_dataset(infile, decode_cf=False, decode_times=False, decode_coords="all") + allvars = dataset.data_vars.keys() + + #If you have a file of 3 or more dim vars, 2d-or-fewer vars are likely to be + #metadata vars; if your file is 2d vars, 1d vars are likely to be metadata. + max_ndims = get_max_ndims(dataset) + if max_ndims >= 3: + varsize = 2 + else: + varsize = 1 + fre_logger.debug(f"varsize: {varsize}") + #note: netcdf dimensions and xarray coords are NOT ALWAYS THE SAME THING. + #If they were, I could get away with the following: + #var_zerovars = [v for v in datavars if not len(dataset[v].coords) > 0]) + #instead of this: + var_shortvars = [v for v in allvars if (len(dataset[v].shape) < varsize) and v not in dataset._coord_names] + #having a variable listed as both a metadata var and a coordinate var seems to + #lead to the weird adding a _FillValue behavior + fre_logger.info(f"var patterns: {VAR_PATTERNS}") + fre_logger.info(f"1 or 2-d vars: {var_shortvars}") + #both combined gets you a decent list of non-diagnostic variables + var_exclude = list(set(VAR_PATTERNS + [str(el) for el in var_shortvars] )) + def matchlist(xstr): + ''' checks a string for matches in a list of patterns + + xstr: string to search for matches + var_exclude: list of patterns defined in VAR_EXCLUDE''' + allmatch = [re.search(el, xstr)for el in var_exclude] + #If there's at least one match in the var_exclude list (average_bnds is OK) + return len(list(set(allmatch))) > 1 + metavars = [el for el in allvars if matchlist(el)] + datavars = [el for el in allvars if not matchlist(el)] + fre_logger.debug(f"metavars: {metavars}") + fre_logger.debug(f"datavars: {datavars}") + fre_logger.debug(f"var filter list: {var_list}") + + #datavars does 2 things: keep track of which vars to write, and tell xarray + #which vars to drop. we need to separate those things for the variable filtering. + if var_list == "all": + write_vars = datavars + else: + if isinstance(var_list, str): + var_list = var_list.split(",") + var_list = list(set(var_list)) + write_vars = [el for el in datavars if el in var_list] + fre_logger.debug(f"intersection of datavars and var_list: {write_vars}") + + if len(write_vars) <= 0: + fre_logger.info(f"No data variables found in {infile}; no writes take place.") + else: + vc_encode = set_coord_encoding(dataset, dataset._coord_names) + for variable in write_vars: + fre_logger.info(f"splitting var {variable}") + #drop all data vars (diagnostics) that are not the current var of interest + #but KEEP the metadata vars + #(seriously, we need the time_bnds) + data2 = dataset.drop_vars([el for el in datavars if el is not variable]) + v_encode= set_var_encoding(dataset, metavars) + #combine 2 dicts into 1 dict - should be no shared keys, + #so the merge is straightforward + var_encode = {**vc_encode, **v_encode} + fre_logger.debug(f"var_encode settings: {var_encode}") + #Encoding principles for xarray: + # - no coords have a _FillValue + # - Everything is written out with THE SAME precision it was read in + # - Everything has THE SAME UNITS as it did when it was read in + var_outfile = fre_outfile_name(os.path.basename(infile), variable) + var_out = os.path.join(outfiledir, os.path.basename(var_outfile)) + data2.to_netcdf(var_out, encoding = var_encode) + fre_logger.debug(f"Wrote '{var_out}'") def get_max_ndims(dataset): - ''' - Gets the maximum number of dimensions of a single var in an xarray Dataset object. Excludes coord vars, which should be single-dim anyway. - - :param dataset: xarray Dataset you want to query - :type dataset: xarray Dataset - :return: The max dimensions that a single var possesses in the Dataset - :rtype: int - ''' - allvars = dataset.data_vars.keys() - ndims = [len(dataset[v].shape) for v in allvars] - return max(ndims) + ''' + Gets the maximum number of dimensions of a single var in an xarray + Dataset object. Excludes coord vars, which should be single-dim anyway. + + :param dataset: xarray Dataset you want to query + :type dataset: xarray Dataset + :return: The max dimensions that a single var possesses in the Dataset + :rtype: int + ''' + allvars = dataset.data_vars.keys() + ndims = [len(dataset[v].shape) for v in allvars] + return max(ndims) def set_coord_encoding(dset, vcoords): - ''' - Gets the encoding settings needed for xarray to write out the coordinates - as expected - we need the list of all vars (varnames) because that's how you get coords - for the metadata vars (i.e. nv or bnds for time_bnds) - - :param dset: xarray Dataset object to query for info - :type dset: xarray Dataset object - :param vcoords: list of coordinate variables to write to file - :type vcoords: list of strings - :return: A dictionary where each key is a coordinate in the xarray Dataset and - each value is a dictionary where the keys are the encoding information from - the coordinate variable in the Dataset plus the units (if present) - :rtype: dict - - .. note:: - This code removes _FillValue from coordinates. CF-compliant files do not - have _FillValue on coordinates, and xarray does not have a good way to get - _FillValue from coordinates. Letting xarray set _FillValue for coordinates - when coordinates *have* a _FillValue gets you wrong metadata, and bad metadata - is worse than no metadata. Dropping the attribute if it's present seems to be - the lesser of two evils. - ''' - fre_logger.debug(f"getting coord encode settings") - encode_dict = {} - for vc in vcoords: - vc_encoding = dset[vc].encoding #dict - encode_dict[vc] = {'_FillValue': None, - 'dtype': dset[vc].encoding['dtype']} - if "units" in vc_encoding.keys(): - encode_dict[vc]['units'] = dset[vc].encoding['units'] - return(encode_dict) + ''' + Gets the encoding settings needed for xarray to write out the coordinates + as expected + we need the list of all vars (varnames) because that's how you get coords + for the metadata vars (i.e. nv or bnds for time_bnds) + + :param dset: xarray Dataset object to query for info + :type dset: xarray Dataset object + :param vcoords: list of coordinate variables to write to file + :type vcoords: list of strings + :return: A dictionary where each key is a coordinate in the xarray Dataset and + each value is a dictionary where the keys are the encoding information from + the coordinate variable in the Dataset plus the units (if present) + :rtype: dict + + .. note:: + This code removes _FillValue from coordinates. CF-compliant files do not + have _FillValue on coordinates, and xarray does not have a good way to get + _FillValue from coordinates. Letting xarray set _FillValue for coordinates + when coordinates *have* a _FillValue gets you wrong metadata, and bad metadata + is worse than no metadata. Dropping the attribute if it's present seems to be + the lesser of two evils. + ''' + fre_logger.debug("getting coord encode settings") + encode_dict = {} + for vc in vcoords: + vc_encoding = dset[vc].encoding #dict + encode_dict[vc] = {'_FillValue': None, + 'dtype': dset[vc].encoding['dtype']} + if "units" in vc_encoding.keys(): + encode_dict[vc]['units'] = dset[vc].encoding['units'] + return encode_dict def set_var_encoding(dset, varnames): - ''' - Gets the encoding settings needed for xarray to write out the variables - as expected - - mostly addressed to time_bnds, because xarray can drop the units attribute - - - https://github.com/pydata/xarray/issues/8368 - - :param dset: xarray dataset object to query for info - :type dset: xarray dataset object - :param varnames: list of variables that will be written to file - :type varnames: list of strings - :return: dict {var1: {encodekey1 : encodeval1, encodekey2:encodeval2...}} - :rtype: dict - ''' - fre_logger.debug(f"getting var encode settings") - encode_dict = {} - for v in varnames: - v_encoding = dset[v].encoding #dict - if not '_FillValue' in v_encoding.keys(): - encode_dict[v] = {'_FillValue': None, - 'dtype': dset[v].encoding['dtype']} - if "units" in v_encoding.keys(): - encode_dict[v]['units'] = dset[v].encoding['units'] - return(encode_dict) + ''' + Gets the encoding settings needed for xarray to write out the variables + as expected + + mostly addressed to time_bnds, because xarray can drop the units attribute + + - https://github.com/pydata/xarray/issues/8368 + + :param dset: xarray dataset object to query for info + :type dset: xarray dataset object + :param varnames: list of variables that will be written to file + :type varnames: list of strings + :return: dict {var1: {encodekey1 : encodeval1, encodekey2:encodeval2...}} + :rtype: dict + ''' + fre_logger.debug("getting var encode settings") + encode_dict = {} + for v in varnames: + v_encoding = dset[v].encoding #dict + if not '_FillValue' in v_encoding.keys(): + encode_dict[v] = {'_FillValue': None, + 'dtype': dset[v].encoding['dtype']} + if "units" in v_encoding.keys(): + encode_dict[v]['units'] = dset[v].encoding['units'] + return encode_dict def fre_outfile_name(infile, varname): - ''' - Builds split var filenames the way that fre expects them - (and in a way that should work for any .nc file) - - This is expected to work with files formed the following way - - - Fre Input format: date.component(.tileX).nc - - Fre Output format: date.component.var(.tileX).nc - - but it should also work on any file filename.nc - - :param infile: name of a file with a . somewhere in the filename - :type infile: string - :param varname: string to add to the infile - :type varname: string - :return: new filename - :rtype: string - ''' - var_outfile = re.sub(".nc", f".{varname}.nc", infile) - return(var_outfile) + ''' + Builds split var filenames the way that fre expects them + (and in a way that should work for any .nc file) + + This is expected to work with files formed the following way + + - Fre Input format: date.component(.tileX).nc + - Fre Output format: date.component.var(.tileX).nc + + but it should also work on any file filename.nc + + :param infile: name of a file with a . somewhere in the filename + :type infile: string + :param varname: string to add to the infile + :type varname: string + :return: new filename + :rtype: string + ''' + var_outfile = re.sub(".nc", f".{varname}.nc", infile) + return var_outfile #Main method invocation diff --git a/fre/pp/status_script.py b/fre/pp/status_script.py index d5d08c47a..963fecd4a 100644 --- a/fre/pp/status_script.py +++ b/fre/pp/status_script.py @@ -9,8 +9,10 @@ def status_subtool(experiment = None, platform = None, target = None): """ Report workflow state for the Cylc workflow $(experiment)__$(platform)__$(target) - - :param experiment: One of the postprocessing experiment names from the yaml displayed by fre list exps -y $yamlfile (e.g. c96L65_am5f4b4r0_amip), default None + + :param experiment: One of the postprocessing experiment names from the + yaml displayed by fre list exps -y $yamlfile + (e.g. c96L65_am5f4b4r0_amip), default None :type experiment: str :param platform: The location + compiler that was used to run the model (e.g. gfdl.ncrc5-deploy), default None :type platform: str @@ -21,10 +23,10 @@ def status_subtool(experiment = None, platform = None, target = None): if None in [experiment, platform, target]: raise ValueError( 'experiment, platform, and target must all not be None.' 'currently, their values are...' - f'{experiment} / {platform} / {target}') + f'{experiment} / {platform} / {target}') workflow_name = make_workflow_name(experiment, platform, target) - cmd = f"cylc workflow-state {workflow_name}" + cmd = f"cylc workflow-state {workflow_name}" fre_logger.debug('running the following command: ') fre_logger.debug(cmd) diff --git a/fre/pp/tests/test_configure_script_yaml.py b/fre/pp/tests/test_configure_script_yaml.py index ec9097882..1cc721603 100644 --- a/fre/pp/tests/test_configure_script_yaml.py +++ b/fre/pp/tests/test_configure_script_yaml.py @@ -3,12 +3,19 @@ """ import os import shutil -import yaml from pathlib import Path + +import pytest +import yaml +from jsonschema import ( + SchemaError, + ValidationError, + validate +) + from fre.pp import configure_script_yaml as csy from fre.yamltools import combine_yamls_script as cy -import pytest -from jsonschema import validate, SchemaError, ValidationError + # Set what would be click options EXPERIMENT = "c96L65_am5f7b12r1_amip" @@ -86,7 +93,10 @@ def test_validate_fail(): "target": "ttest", "directories": {"pp_dir": "/some/path"} } - with pytest.raises(ValueError, match="Combined yaml is not valid. Please fix the errors and try again.") as execinfo: + with pytest.raises( + ValueError, + match="Combined yaml is not valid. Please fix the errors and try again." + ) as execinfo: val_fail = csy.validate_yaml(wrong_yml_dict) assert execinfo.type is ValueError diff --git a/fre/pp/tests/test_histval_script.py b/fre/pp/tests/test_histval_script.py index d336c7a9d..f24d1e73e 100644 --- a/fre/pp/tests/test_histval_script.py +++ b/fre/pp/tests/test_histval_script.py @@ -1,11 +1,14 @@ """ Test histval_script """ -import pytest import re +import subprocess from pathlib import Path + +import pytest + from fre.pp import histval_script as histval -import subprocess + # Set example input paths @@ -49,7 +52,7 @@ def test_histval(capfd): match=re.escape( value_err_str ) ): - result=(histval.validate(test_dir,'00010101',warn=None)) + result = histval.validate(test_dir,'00010101',warn=None) #Delete the test files for x in test_files: diff --git a/fre/pp/tests/test_nccheck_script.py b/fre/pp/tests/test_nccheck_script.py index 73630926f..a74a8d1b1 100644 --- a/fre/pp/tests/test_nccheck_script.py +++ b/fre/pp/tests/test_nccheck_script.py @@ -1,9 +1,11 @@ """ Test nccheck_script """ +import subprocess from pathlib import Path + from fre.pp import nccheck_script as ncc -import subprocess + # Set example input path ncgen_input = Path("fre/tests/test_files/reduced_ascii_files/reduced_ocean_monthly_1x1deg.199301-199302.sos.cdl") diff --git a/fre/pp/tests/test_ppval_script.py b/fre/pp/tests/test_ppval_script.py index 2d3fe334f..ab7ab2625 100644 --- a/fre/pp/tests/test_ppval_script.py +++ b/fre/pp/tests/test_ppval_script.py @@ -1,11 +1,14 @@ """ Test ppval_script """ -import pytest import re +import subprocess from pathlib import Path + +import pytest + from fre.pp import ppval_script as ppval -import subprocess + # Test annual, monthly, daily input files # Set example input paths diff --git a/fre/pp/tests/test_split_netcdf.py b/fre/pp/tests/test_split_netcdf.py index 8603d0a89..6272757c7 100644 --- a/fre/pp/tests/test_split_netcdf.py +++ b/fre/pp/tests/test_split_netcdf.py @@ -2,19 +2,22 @@ Tests split-netcdf, parse_yaml from split_netcdf_script.py ''' -import pytest +import os +import pathlib import re -from fre.pp import split_netcdf_script -from fre.pp.split_netcdf_script import split_file_xarray import subprocess -import os from os import path as osp -import pathlib from pathlib import Path -from fre import fre import click +import pytest from click.testing import CliRunner + +from fre import fre +from fre.pp import split_netcdf_script +from fre.pp.split_netcdf_script import split_file_xarray + + runner=CliRunner() #rootdir = Path(__file__).parents[3] #get to root directory @@ -45,30 +48,30 @@ def test_split_file_setup(): nc_files = [] sp_stat = [] for testcase in cases.keys(): - cds = osp.join(test_dir,cases[testcase]["dir"]) - subdirs = [f.path for f in os.scandir(cds) if f.is_dir()] - for sd in subdirs: - #for each directory in the current dir, make a new dir with "new_" prepended - newdir = osp.join(cds, "new_" + os.path.basename(sd)) - if not osp.exists(newdir): - os.makedirs(newdir) - print(newdir) - cdl_files = [f.path for f in os.scandir(sd) if f.is_file] - cdl_files = [el for el in cdl_files if re.search("cdl", el) is not None] - for cdlf in cdl_files: - cdl_out = re.sub(".cdl", ".nc", cdlf) - cdlf_cmd = ["ncgen3", "-k", "netCDF-4", "-o", cdl_out, cdlf] - nc_files.append(cdl_out) - ncgen_commands.append(cdlf_cmd) - ncgen_commands.append(["ncgen3", "-k", "netCDF-4", "-o", - osp.join(cds, cases[testcase]["nc"]), - osp.join(cds, cases[testcase]["cdl"])]) - for ncg in ncgen_commands: - print(ncg) - sp = subprocess.run(ncg, check = True, capture_output=True) - sp_stat.append(sp.returncode) - sp_success = [el == 0 for el in sp_stat] - nc_files_exist = [osp.isfile(el) for el in nc_files] + cds = osp.join(test_dir,cases[testcase]["dir"]) + subdirs = [f.path for f in os.scandir(cds) if f.is_dir()] + for sd in subdirs: + #for each directory in the current dir, make a new dir with "new_" prepended + newdir = osp.join(cds, "new_" + os.path.basename(sd)) + if not osp.exists(newdir): + os.makedirs(newdir) + print(newdir) + cdl_files = [f.path for f in os.scandir(sd) if f.is_file] + cdl_files = [el for el in cdl_files if re.search("cdl", el) is not None] + for cdlf in cdl_files: + cdl_out = re.sub(".cdl", ".nc", cdlf) + cdlf_cmd = ["ncgen3", "-k", "netCDF-4", "-o", cdl_out, cdlf] + nc_files.append(cdl_out) + ncgen_commands.append(cdlf_cmd) + ncgen_commands.append(["ncgen3", "-k", "netCDF-4", "-o", + osp.join(cds, cases[testcase]["nc"]), + osp.join(cds, cases[testcase]["cdl"])]) + for ncg in ncgen_commands: + print(ncg) + sp = subprocess.run(ncg, check = True, capture_output=True) + sp_stat.append(sp.returncode) + sp_success = [el == 0 for el in sp_stat] + nc_files_exist = [osp.isfile(el) for el in nc_files] assert all( [ sp_success + nc_files_exist ] ) #test splitting files @@ -97,17 +100,21 @@ def test_split_file_run(workdir,infile, outfiledir, varlist): :type workdir: string :param infile: netcdf file to split into single-var files :type infile: string - :param outfiledir: directory to which to write the split netcdf files (new_all_ts_varlist, new_some_ts_varlist, new_none_ts_varlist) + :param outfiledir: directory to which to write the split netcdf files + (new_all_ts_varlist, new_some_ts_varlist, new_none_ts_varlist) :type outfiledir: string :param varlist: comma-separated string specifying which variables to write ("all", some_ts_varlist, none_ts_varlist) :type varlist: string :type origdir: string - Parameters for the 5 tests are based off of the list of variables to filter on plus the type of file: + Parameters for the 5 tests are based off of the list of variables to + filter on plus the type of file: - all: "all", the default, processes all variables in the input - - some: processes a list of variables, some of which are and some of which are not in the input; includes one duplicate var - - none: processes a list of variables, none of which are in the input; should produce no files + - some: processes a list of variables, some of which are and some of which + are not in the input; includes one duplicate var + - none: processes a list of variables, none of which are in the input; + should produce no files - ts: timeseries files - static: static files ''' @@ -136,10 +143,12 @@ def test_split_file_data(workdir,newdir, origdir): :param origdir: dir containing the old files to check against (all_ts_varlist, some_ts_varlist) :type origdir: string - Parameters for the tests differ based off the variable list from test_split_file_run and the type of file being split: + Parameters for the tests differ based off the variable list from + test_split_file_run and the type of file being split: - all: "all", the default, processes all variables in the input - - some: processes a list of variables, some of which are and some of which are not in the input; includes one duplicate var + - some: processes a list of variables, some of which are and some of + which are not in the input; includes one duplicate var - ts: timeseries files - static: static files ''' @@ -148,7 +157,7 @@ def test_split_file_data(workdir,newdir, origdir): orig_count = len([el for el in os.listdir(origdir) if el.endswith(".nc")]) split_files = [el for el in os.listdir(newdir) if el.endswith(".nc")] new_count = len(split_files) - same_count_files = (new_count == orig_count) + same_count_files = new_count == orig_count print(f"orig dir: {origdir} new dir: {newdir}") print(f"orig count: {orig_count} new count: {new_count}") all_files_equal=True @@ -183,10 +192,12 @@ def test_split_file_metadata(workdir,newdir, origdir): :param origdir: dir containing the old files to check against (all_ts_varlist, some_ts_varlist) :type origdir: string - Parameters for the tests differ based off the variable list from test_split_file_run and the type of file being split: + Parameters for the tests differ based off the variable list from + test_split_file_run and the type of file being split: - all: "all", the default, processes all variables in the input - - some: processes a list of variables, some of which are and some of which are not in the input; includes one duplicate var + - some: processes a list of variables, some of which are and some of + which are not in the input; includes one duplicate var - ts: timeseries files - static: static files ''' @@ -195,7 +206,7 @@ def test_split_file_metadata(workdir,newdir, origdir): orig_count = len([el for el in os.listdir(origdir) if el.endswith(".nc")]) split_files = [el for el in os.listdir(newdir) if el.endswith(".nc")] new_count = len(split_files) - same_count_files = (new_count == orig_count) + same_count_files = new_count == orig_count all_files_equal=True for sf in split_files: nccmp_cmd = [ 'nccmp', '-mg', '--force', @@ -216,16 +227,16 @@ def test_split_file_cleanup(): el_list = [] dir_list = [] for path, subdirs, files in os.walk(test_dir): - for name in files: - el_list.append(osp.join(path, name)) - for name in subdirs: - dir_list.append(osp.join(path,name)) + for name in files: + el_list.append(osp.join(path, name)) + for name in subdirs: + dir_list.append(osp.join(path,name)) netcdf_files = [el for el in el_list if el.endswith(".nc")] for nc in netcdf_files: - pathlib.Path.unlink(Path(nc)) + pathlib.Path.unlink(Path(nc)) newdir = [el for el in dir_list if osp.basename(el).startswith("new_")] for nd in newdir: - pathlib.Path.rmdir(Path(nd)) + pathlib.Path.rmdir(Path(nd)) dir_deleted = [not osp.isdir(el) for el in newdir] el_deleted = [not osp.isdir(el) for el in netcdf_files] assert all(el_deleted + dir_deleted) diff --git a/fre/pp/tests/test_split_netcdf_regex.py b/fre/pp/tests/test_split_netcdf_regex.py index 6abe27ee3..4c73a7299 100644 --- a/fre/pp/tests/test_split_netcdf_regex.py +++ b/fre/pp/tests/test_split_netcdf_regex.py @@ -38,7 +38,10 @@ def test_split_netcdf_file_regex_pattern(): for history_source in non_matching_files.keys(): file_regex = generate_regex(history_source) match = re.search(file_regex, non_matching_files[history_source]) - assert match is None, f"File '{non_matching_files[history_source]}' should NOT match regex pattern {file_regex}'" + assert match is None, ( + f"File '{non_matching_files[history_source]}' should NOT match " + f"regex pattern {file_regex}'" + ) def generate_regex(history_source): ''' diff --git a/fre/pp/trigger_script.py b/fre/pp/trigger_script.py index 1ab967864..c4e95f0bc 100644 --- a/fre/pp/trigger_script.py +++ b/fre/pp/trigger_script.py @@ -1,15 +1,20 @@ ''' fre pp trigger ''' +import logging import subprocess + from . import make_workflow_name -import logging + + fre_logger = logging.getLogger(__name__) def trigger(experiment = None, platform = None, target = None, time = None): """ Trigger the postprocessing tasks for one segment of the history. - :param experiment: One of the postprocessing experiment names from the yaml displayed by fre list exps -y $yamlfile (e.g. c96L65_am5f4b4r0_amip), default None + :param experiment: One of the postprocessing experiment names from the + yaml displayed by fre list exps -y $yamlfile + (e.g. c96L65_am5f4b4r0_amip), default None :type experiment: str :param platform: The location + compiler that was used to run the model (e.g. gfdl.ncrc5-deploy), default None :type platform: str diff --git a/fre/pp/validate_script.py b/fre/pp/validate_script.py index a23f516a3..7839f72d8 100644 --- a/fre/pp/validate_script.py +++ b/fre/pp/validate_script.py @@ -21,7 +21,7 @@ def validate_subtool(experiment = None, platform = None, target = None): try: # Change the current working directory os.chdir(directory) - + # Run the Rose validation macros cmd = "rose macro --validate" subprocess.run(cmd, shell=True, check=True) diff --git a/fre/pp/wrapper_script.py b/fre/pp/wrapper_script.py index 81ab3c75c..8d9a090b2 100644 --- a/fre/pp/wrapper_script.py +++ b/fre/pp/wrapper_script.py @@ -23,15 +23,19 @@ fre_logger = logging.getLogger(__name__) -def run_all_fre_pp_steps(experiment = None, platform = None, target = None, config_file = None, branch = None, time = None): +def run_all_fre_pp_steps(experiment = None, platform = None, target = None, + config_file = None, branch = None, time = None): ''' Wrapper script for all the steps of the fre2 pp infrastructure. - Calls config_file, checkout_template, yaml_info, install_subtool, pp_run_subtool, (trigger) and status_subtool in sequence. (trigger) is an optional step. + Calls config_file, checkout_template, yaml_info, install_subtool, pp_run_subtool, + (trigger) and status_subtool in sequence. (trigger) is an optional step. - :param experiment: One of the postprocessing experiment names from the yaml displayed by fre list exps -y $yamlfile (e.g. c96L65_am5f4b4r0_amip), default None + :param experiment: One of the postprocessing experiment names from the yaml displayed + by fre list exps -y $yamlfile (e.g. c96L65_am5f4b4r0_amip), default None :type experiment: str - :param platform: The location + compiler that was used to run the model (e.g. gfdl.ncrc5-deploy), default None + :param platform: The location + compiler that was used to run the model + (e.g. gfdl.ncrc5-deploy), default None :type platform: str :param target: Options used for the model compiler (e.g. prod-openmp), default None :type target: str @@ -39,7 +43,8 @@ def run_all_fre_pp_steps(experiment = None, platform = None, target = None, conf :type config_file: string :param branch: which git branch to pull from, default None :type branch: string - :param time: The start time of the segment to postprocess, if any. Formatted as a series of integers. For more information, see trigger(). + :param time: The start time of the segment to postprocess, if any. + Formatted as a series of integers. For more information, see trigger(). :type time: Cylc representation of a time point ''' fre_logger.info('(run_all_fre_pp_steps) config_file path resolving...') diff --git a/fre/tests/test_fre_cmor_cli.py b/fre/tests/test_fre_cmor_cli.py index f273a4e48..bf12c33ff 100644 --- a/fre/tests/test_fre_cmor_cli.py +++ b/fre/tests/test_fre_cmor_cli.py @@ -435,7 +435,7 @@ def test_cli_fre_cmor_config_case1(): # put an annual directory in to make sure we're not targeting that at the moment (mock_pp_dir / 'ocean' / 'ts' / 'annual').mkdir(parents=True, exist_ok=True) - + # symlink the test nc file into the mock tree src_nc = Path(f'{ROOTDIR}/ocean_sos_var_file/reduced_ocean_monthly_1x1deg.199301-199302.sos.nc') dst_nc = comp_ts_dir / src_nc.name @@ -446,9 +446,10 @@ def test_cli_fre_cmor_config_case1(): varlist_out_dir = Path(f'{ROOTDIR}/mock_writer_varlists') varlist_out_dir.mkdir(exist_ok=True) - # create an empty variable list of one we want to create. it should be remade. + # create an empty variable list of one we want to create. it should be remade. (varlist_out_dir / 'CMIP6_CMIP6_Omon_ocean.list').touch() - assert (varlist_out_dir / 'CMIP6_CMIP6_Omon_ocean.list').exists(), 'pre-existing variable list failed to be created for tests' + assert (varlist_out_dir / 'CMIP6_CMIP6_Omon_ocean.list').exists(), \ + 'pre-existing variable list failed to be created for tests' output_yaml = Path(f'{ROOTDIR}/mock_writer_output.yaml') output_data_dir = Path(f'{ROOTDIR}/mock_writer_outdir') @@ -478,7 +479,8 @@ def test_cli_fre_cmor_config_case1(): ]) assert result.exit_code == 0, f'config failed: {result.output}' assert output_yaml.exists(), 'output YAML was not created' - assert (varlist_out_dir / 'CMIP6_CMIP6_Omon_ocean.list').exists(), 'CMIP6_CMIP6_Omon_ocean.list was not created for some reason' + assert (varlist_out_dir / 'CMIP6_CMIP6_Omon_ocean.list').exists(), \ + 'CMIP6_CMIP6_Omon_ocean.list was not created for some reason' # basic sanity: the written file should contain "cmor:" and "table_targets:" yaml_text = output_yaml.read_text(encoding='utf-8') diff --git a/fre/tests/test_fre_make_cli.py b/fre/tests/test_fre_make_cli.py index 2dc16b664..bebb4d50b 100644 --- a/fre/tests/test_fre_make_cli.py +++ b/fre/tests/test_fre_make_cli.py @@ -13,10 +13,12 @@ and container creation (and cleaning up after those operations) """ -from click.testing import CliRunner -from pathlib import Path import os import shutil +from pathlib import Path + +from click.testing import CliRunner + from fre import fre diff --git a/fre/tests/test_fre_yamltools_cli.py b/fre/tests/test_fre_yamltools_cli.py index 5b377198c..cd6275c3c 100644 --- a/fre/tests/test_fre_yamltools_cli.py +++ b/fre/tests/test_fre_yamltools_cli.py @@ -13,13 +13,15 @@ that a command-line call to combine-yamls makes the same yaml that we expect """ from pathlib import Path + +import yaml from click.testing import CliRunner from fre import fre + runner = CliRunner() -import yaml def test_cli_fre_yamltools(): ''' fre yamltools ''' diff --git a/fre/yamltools/abstract_classes.py b/fre/yamltools/abstract_classes.py index 864d8dd7e..0a681a7f8 100644 --- a/fre/yamltools/abstract_classes.py +++ b/fre/yamltools/abstract_classes.py @@ -17,7 +17,6 @@ def combine_model(self): Function that will combine model yaml information with passed click options name, platform, and target """ - pass @abstractmethod def combine_settings(self, yaml_content_str): @@ -25,7 +24,6 @@ def combine_settings(self, yaml_content_str): Function that will combine settings yaml information with output yaml str from combine_model """ - pass @abstractmethod def combine_yamls(self): @@ -33,7 +31,6 @@ def combine_yamls(self): Function that will combine output yaml str (from merging the model and settings yaml) with pp/experiment yaml information """ - pass @abstractmethod def merge_multiple_yamls(self): @@ -41,7 +38,6 @@ def merge_multiple_yamls(self): Function that will merge multiple yaml dictionaries to produce final combined yaml of information """ - pass # inherited by compile_info_parser class MergeCompileYamls(ABC): @@ -54,7 +50,6 @@ def combine_model(self): Function that will combine model yaml information with passed click options name, platform, and target """ - pass @abstractmethod def combine_compile(self): @@ -62,7 +57,6 @@ def combine_compile(self): Function that will combine compile yaml information with output yaml str from combine_model """ - pass @abstractmethod def combine_platforms(self): @@ -70,7 +64,6 @@ def combine_platforms(self): Function that will combine platform yaml information with output yaml str from combine_compile """ - pass #class ValidateYamls(ABC): # @abstractmethod diff --git a/fre/yamltools/combine_yamls_script.py b/fre/yamltools/combine_yamls_script.py index 3e092f85d..3b4af30a8 100755 --- a/fre/yamltools/combine_yamls_script.py +++ b/fre/yamltools/combine_yamls_script.py @@ -137,7 +137,9 @@ def get_combined_cmoryaml( yamlfile: Union[str, Path], return cleaned_yaml -def consolidate_yamls(yamlfile:str, experiment:str, platform:str, target:str, use:str, output: Optional[str]=None) -> dict: +def consolidate_yamls(yamlfile:str, experiment:str, platform:str, + target:str, use:str, + output: Optional[str]=None) -> dict: """ Depending on `use` argument passed, either create the final combined yaml for compilation or post-processing diff --git a/fre/yamltools/helpers.py b/fre/yamltools/helpers.py index 36fe2836a..858458ec2 100644 --- a/fre/yamltools/helpers.py +++ b/fre/yamltools/helpers.py @@ -2,11 +2,18 @@ # this boots yaml with !join- see __init__ import json -from jsonschema import validate, ValidationError, SchemaError import logging -from . import * -from pathlib import Path import os +from pathlib import Path + +from jsonschema import ( + SchemaError, + ValidationError, + validate +) + +from . import * + fre_logger = logging.getLogger(__name__) diff --git a/fre/yamltools/tests/test_combine_yamls_script.py b/fre/yamltools/tests/test_combine_yamls_script.py index de2c530b0..7ae0594b8 100644 --- a/fre/yamltools/tests/test_combine_yamls_script.py +++ b/fre/yamltools/tests/test_combine_yamls_script.py @@ -1,14 +1,16 @@ """ tests routines in fre.yamltools.combine_yamls """ +import json import os +import pprint +import shutil from pathlib import Path + import pytest -import shutil -import json import yaml -import pprint from jsonschema import validate + from fre.yamltools import combine_yamls_script as cy @@ -173,11 +175,15 @@ def test_check_expected_platformyamlcontent(): 'module load fre/bronx-23', 'module load cray-hdf5/1.12.2.11', 'module load cray-netcdf/4.9.0.11'], - 'mkTemplate': '/ncrc/home2/fms/local/opt/fre-commands/bronx-20/site/ncrc5/intel-classic.mk', + 'mkTemplate': '/ncrc/home2/fms/local/opt/' + 'fre-commands/bronx-20/site/ncrc5/intel-classic.mk', 'modelRoot': '${HOME}/fremake_canopy/test'} expected_platform_info_2 = {'name': 'hpcme.2023', 'compiler': 'intel', - 'RUNenv': ['. /spack/share/spack/setup-env.sh', 'spack load libyaml', 'spack load netcdf-fortran@4.5.4', 'spack load hdf5@1.14.0'], + 'RUNenv': ['. /spack/share/spack/setup-env.sh', + 'spack load libyaml', + 'spack load netcdf-fortran@4.5.4', + 'spack load hdf5@1.14.0'], 'modelRoot': '/apps', 'container': True, 'containerBuild': 'podman', diff --git a/fre/yamltools/tests/test_helpers.py b/fre/yamltools/tests/test_helpers.py index 0fd6ae793..bd7a50bb2 100644 --- a/fre/yamltools/tests/test_helpers.py +++ b/fre/yamltools/tests/test_helpers.py @@ -1,10 +1,12 @@ -import pytest -import tempfile import os +import tempfile + +import pytest import yaml from fre.yamltools.helpers import yaml_load + @pytest.fixture def temp_path(): """Fixture that creates a temporary YAML file and returns its path, then cleans up.""" diff --git a/pylintrc b/pylintrc index 7ec15b8b8..c560bbe79 100644 --- a/pylintrc +++ b/pylintrc @@ -39,7 +39,7 @@ extension-pkg-whitelist= fail-on= # Specify a score threshold under which the program will exit with error. -fail-under=8.30 +fail-under=8.89 # Interpret the stdin as a python script, whose filename needs to be passed as # the module_or_package argument.