diff --git a/config/config.yaml b/config/config.yaml index e32806d..2f71762 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,4 +1,5 @@ -projected_crs: "epsg:8857" +crs: + projected: "EPSG:8857" category: bioenergy: @@ -90,7 +91,6 @@ fuel_mapping: "fossil gas: unknown": "natural gas" imputation: - adjustment_year: 2023 shape_overlap_correction: "split_capacity" # strict or split_capacity scenario: "near-future" # near-future, far-future, far-off-future lifetime_years: diff --git a/tests/integration/test_config.yaml b/tests/integration/test_config.yaml deleted file mode 100644 index 7bf0927..0000000 --- a/tests/integration/test_config.yaml +++ /dev/null @@ -1,129 +0,0 @@ -module_powerplants: - projected_crs: "EPSG:6316" # Balkans zone 7 with 10 metre accuracy for MNE - - category: - bioenergy: - technology_mapping: - "unknown": "bioenergy turbine" - - fossil: - technology_mapping: - coal: - CFB: "coal turbine" - IGCC: "igcc" - subcritical: "coal turbine" - supercritical: "coal turbine" - ultra-supercritical: "coal turbine" - unknown: "coal turbine" - oil_gas: - AFC: "ccgt" - ICCC: "ccgt" - IGCC: "igcc" - ISCC: "ccgt" - combined cycle: "ccgt" - gas turbine: "ocgt" - internal combustion: "reciprocating engine" - steam turbine: "steam turbine" - unknown: "ocgt" - - geothermal: - technology_mapping: - "advanced geothermal systems (AGS)": "high enthalpy geothermal" - "binary cycle": "low enthalpy geothermal" - "dry steam": "high enthalpy geothermal" - "enhanced geothermal systems (EGS)": "high enthalpy geothermal" - "flash steam - double": "low enthalpy geothermal" - "flash steam - single": "high enthalpy geothermal" - "flash steam - triple": "low enthalpy geothermal" - "flash steam - unknown": "high enthalpy geothermal" - "geopressured geothermal system (GGS)": "high enthalpy geothermal" - "unknown": "high enthalpy geothermal" - - hydropower: - technology_mapping: - Canal: "run of river" - PS: "pumped storage" - ROR: "run of river" - STO: "reservoir" - unknown: "reservoir" - - nuclear: - technology_mapping: - boiling water reactor: "nuclear reactor" - fast breeder reactor: "nuclear reactor" - fast neutron reactor: "nuclear reactor" - gas-cooled reactor: "nuclear reactor" - heavy water gas-cooled reactor: "nuclear reactor" - heavy water light water reactor: "nuclear reactor" - high temperature gas reactor: "nuclear reactor" - liquid-metal-cooled fast reactor: "nuclear reactor" - light water graphite reactor: "nuclear reactor" - molten salt reactor: "nuclear reactor" - pressurized heavy water reactor: "nuclear reactor" - pressurized water reactor: "nuclear reactor" - small modular reactor: "nuclear reactor" - steam-generating heavy water reactor: "nuclear reactor" - unknown: "nuclear reactor" - - solar: - technology_mapping: - csp: "concentrating solar power" - rooftop_pv: "rooftop pv" - utility_pv: "utility pv" - dc_ac_ratio: - utility_pv: 1.25 - - wind: - source: "gem" # open data alternative - technology_mapping: - offshore: "offshore" - onshore: "onshore" - - - fuel_mapping: - "bioenergy: unknown": "biomass" - "coal: unknown": "brown coal" - "fossil gas: unknown": "natural gas" - - imputation: - adjustment_year: 2023 - shape_overlap_correction: "split_capacity" - scenario: "near-future" # near-future, far-future, far-off-future - lifetime_years: - bioenergy turbine: 25 - ccgt: 20 - coal turbine: 50 - concentrating solar power: 25 - high enthalpy geothermal: 30 - igcc: 25 - low enthalpy geothermal: 30 - nuclear reactor: 50 - ocgt: 20 - offshore: 25 - onshore: 25 - pumped storage: 80 - reciprocating engine: 20 - reservoir: 80 - rooftop pv: 5 - run of river: 80 - steam turbine: 30 - utility pv: 25 - retirement_delay_years: - bioenergy turbine: 5 - ccgt: 5 - coal turbine: 5 - concentrating solar power: 2 - high enthalpy geothermal: 5 - igcc: 5 - low enthalpy geothermal: 5 - nuclear reactor: 5 - ocgt: 5 - offshore: 2 - onshore: 2 - pumped storage: 10 - reciprocating engine: 5 - reservoir: 10 - rooftop pv: 5 - run of river: 10 - steam turbine: 5 - utility pv: 2 diff --git a/workflow/internal/config.schema.yaml b/workflow/internal/config.schema.yaml index 6fb2d42..d3adbb5 100644 --- a/workflow/internal/config.schema.yaml +++ b/workflow/internal/config.schema.yaml @@ -1,7 +1,7 @@ $schema: "https://json-schema.org/draft/2020-12/schema" type: object required: - - projected_crs + - crs - category - imputation additionalProperties: false @@ -37,15 +37,17 @@ $defs: These names will be reflected in the resulting files, and will also be used during validation. properties: - projected_crs: - title: Projected CRS - description: CRS to use during projected operations (e.g., polygon centroid calculations). - oneOf: - - type: string - pattern: '^(epsg|EPSG):\d{4}$' - - type: integer - minimum: 1000 - maximum: 9999 + crs: + description: Coordinate reference system settings in the form 'epsg:xxxx'. + type: object + properties: + projected: + description: CRS to use during operations related to distance / area. + type: string + pattern: '^(epsg|EPSG):\d{4,5}$' + required: + - projected + additionalProperties: false # Ensures no extra properties in "crs" category: title: Category @@ -351,19 +353,11 @@ properties: type: object additionalProperties: false required: - - adjustment_year - shape_overlap_correction - scenario - lifetime_years - retirement_delay_years properties: - adjustment_year: - title: Adjustment year. - description: > - Year to use as reference when adjusting capacities to fit historical country statistics. - type: integer - minimum: 0 - maximum: 2023 shape_overlap_correction: title: Shape overlap correction method description: | diff --git a/workflow/rules/aggregate.smk b/workflow/rules/aggregate.smk index d30e3e3..b5577c0 100644 --- a/workflow/rules/aggregate.smk +++ b/workflow/rules/aggregate.smk @@ -21,7 +21,6 @@ rule aggregate_capacity: conda: "../envs/powerplants.yaml" params: - year=config["imputation"]["adjustment_year"], category=lambda wc: wc.category, message: "Aggregating capacity for {wildcards.shapes}-{wildcards.adjustment}-{wildcards.category}." diff --git a/workflow/rules/impute.smk b/workflow/rules/impute.smk index 97982fe..6bf92ac 100644 --- a/workflow/rules/impute.smk +++ b/workflow/rules/impute.smk @@ -16,7 +16,7 @@ rule impute_years: "../envs/powerplants.yaml" params: imputation=config["imputation"], - projected_crs=config["projected_crs"], + projected_crs=config["crs"]["projected"], tech_map=lambda wc: get_technology_mapping(wc.category), message: "National-level imputation of missing years for all powerplants in {wildcards.shapes}-{wildcards.category} dataset." @@ -87,9 +87,7 @@ rule impute_capacity_adjustment: category="|".join(IMPUTED_CAT - IMPUTED_CAT_WITHOUT_ADJUSTMENT), conda: "../envs/powerplants.yaml" - params: - year=config["imputation"]["adjustment_year"], message: - "National-level adjustment of powerplant capacity in {wildcards.shapes}-{wildcards.category} to {params.year} statistics." + "National-level adjustment of powerplant capacity in {wildcards.shapes}-{wildcards.category} to EIA statistics." script: "../scripts/impute_capacity_adjustment.py" diff --git a/workflow/rules/prepare.smk b/workflow/rules/prepare.smk index d71151d..d7fe38c 100644 --- a/workflow/rules/prepare.smk +++ b/workflow/rules/prepare.smk @@ -17,7 +17,7 @@ rule prepare_shapes: conda: "../envs/powerplants.yaml" params: - crs=config["projected_crs"], + crs=config["crs"]["projected"], message: "Preparing intermediate shapefile versions to speed up processing." script: diff --git a/workflow/rules/solar.smk b/workflow/rules/solar.smk index 74c7558..15db9b6 100644 --- a/workflow/rules/solar.smk +++ b/workflow/rules/solar.smk @@ -9,7 +9,6 @@ To fill solar capacity, we follow these steps: 2. Assume rooftop PV = national solar statistics - large projects. 3. Use a proxy to disaggregate then aggregate assumed rooftop PV capacity per shape. 4. Combine aggregated large pv projects and rooftop PV capacity. -5. Adjust to national statistics. """ @@ -37,14 +36,13 @@ rule proxy_rooftop_pv: "../envs/powerplants.yaml" params: category="solar", - year=config["imputation"]["adjustment_year"], message: "Generating proxy for rooftop capacity {wildcards.shapes}." script: "../scripts/proxy.py" -rule aggregate_solar_capacity: +rule impute_adjustment_solar: input: large_solar=workflow.pathvars.apply("").format( shapes="{shapes}", @@ -53,65 +51,33 @@ rule aggregate_solar_capacity: ), proxy=rules.proxy_rooftop_pv.output.proxy, shapes="", + stats=rules.prepare_statistics.output.categories, output: aggregated=workflow.pathvars.apply("").format( shapes="{shapes}", - adjustment="unadjusted", + adjustment="adjusted", category="solar", ), - plot=report( - "/{shapes}/aggregated/unadjusted/solar.pdf", + plot_map=report( + "/{shapes}/aggregated/adjusted/solar_map.pdf", caption="../report/aggregate_capacity.rst", category="Powerplants module", subcategory="solar", ), - log: - "/{shapes}/unadjusted/solar/aggregate_solar_capacity.log", - conda: - "../envs/powerplants.yaml" - params: - technology=config["category"]["solar"]["technology_mapping"]["rooftop_pv"], - category="solar", - message: - "Aggregating capacity for {wildcards.shapes}-unadjusted-solar." - script: - "../scripts/aggregate_capacity.py" - - -rule impute_capacity_adjustment_solar: - input: - unadjusted=workflow.pathvars.apply("").format( - shapes="{shapes}", - adjustment="unadjusted", - category="solar", - ), - shapes="", - stats=rules.prepare_statistics.output.categories, - output: - adjusted=workflow.pathvars.apply("").format( - shapes="{shapes}", - adjustment="adjusted", - category="solar", - ), - adj_plot=report( - "/{shapes}/aggregated/adjusted/solar_adjustment.pdf", + plot_stats=report( + "/{shapes}/aggregated/adjusted/solar_stats.pdf", caption="../report/impute_capacity_adjustment.rst", category="Powerplants module", subcategory="solar", ), - map_plot=report( - "/{shapes}/aggregated/adjusted/solar_map.pdf", - caption="../report/aggregate_capacity.rst", - category="Powerplants module", - subcategory="solar", - ), log: - "/{shapes}/adjusted/solar/impute_capacity_adjustment_solar.log", + "/{shapes}/adjusted/solar/aggregate_solar_capacity.log", conda: "../envs/powerplants.yaml" params: - year=config["imputation"]["adjustment_year"], + category="solar", + technology=config["category"]["solar"]["technology_mapping"]["rooftop_pv"], message: - "Adjusting aggregated capacity of {wildcards.shapes}-solar to {params.year} statistics." + "Aggregating capacity for {wildcards.shapes}-adjusted-solar." script: - "../scripts/impute_capacity_adjustment_solar.py" + "../scripts/impute_adjustment_solar.py" diff --git a/workflow/scripts/_plots.py b/workflow/scripts/_plots.py index 7b75d39..38138af 100644 --- a/workflow/scripts/_plots.py +++ b/workflow/scripts/_plots.py @@ -160,9 +160,8 @@ def plot_capacity_adjustment( agg_eia = ( df_eia.groupby([country_col, eia_cat_col])[[eia_cap_col]].sum().reset_index() ) - - countries = sorted(agg_dis[country_col].unique()) - techs = sorted(agg_dis[dis_tech_col].unique()) + countries = sorted(set(agg_dis[country_col]).union(agg_adj[country_col])) + techs = sorted(set(agg_dis[dis_tech_col]).union(agg_adj[dis_tech_col])) cats = sorted(agg_eia[eia_cat_col].unique()) # Avoid overlaps when techs / cats have the same name diff --git a/workflow/scripts/_utils.py b/workflow/scripts/_utils.py index 026882b..34ce3eb 100644 --- a/workflow/scripts/_utils.py +++ b/workflow/scripts/_utils.py @@ -8,7 +8,7 @@ # Average year where powerplant datasets were last updated. # MUST BE ADJUSTED WHENEVER DATASOURCES ARE UPDATED! -DATASET_YEAR = 2024 +DATASET_YEAR = 2023 def listify(item) -> list: diff --git a/workflow/scripts/aggregate_capacity.py b/workflow/scripts/aggregate_capacity.py index 9cfb877..11ecc00 100644 --- a/workflow/scripts/aggregate_capacity.py +++ b/workflow/scripts/aggregate_capacity.py @@ -8,12 +8,10 @@ import _utils import geopandas as gpd import pandas as pd -from gregor.aggregate import aggregate_point_to_polygon, aggregate_raster_to_polygon +from gregor.aggregate import aggregate_point_to_polygon if TYPE_CHECKING: snakemake: Any -sys.stderr = open(snakemake.log[0], "w", buffering=1) - CAPACITY_COLUMNS = {"category", "technology", "chp", "ccs", "fuel_class"} @@ -59,50 +57,18 @@ def capacity(powerplant_file: str, shapes_file: str, year: int, output_file: str agg_plants_df = pd.concat(agg_plants_arr, axis="index", ignore_index=True) agg_plants_df.attrs["year"] = year + agg_plants_df = _utils._clean_positive_capacity(agg_plants_df) _schemas.AggregatedPlantSchema.validate(agg_plants_df).to_parquet(output_file) -def capacity_solar( - large_pv_agg_file: str, - proxy_file: str, - shapes_file: str, - output_file: str, - technology: str, -): - """Aggregate rooftop PV using a proxy raster.""" - large_pv = pd.read_parquet(large_pv_agg_file) - shapes = gpd.read_parquet(shapes_file) - agg_roof_pv_cap = aggregate_raster_to_polygon(proxy_file, shapes, stats="sum") - - agg_roof_pv_cap["category"] = "solar" - agg_roof_pv_cap["technology"] = technology - agg_roof_pv_cap = agg_roof_pv_cap.rename(columns={"sum": "output_capacity_mw"}) - agg_roof_pv_cap = agg_roof_pv_cap.dropna(subset=["output_capacity_mw"]) - - valid_cols = set(_schemas.AggregatedPlantSchema.to_schema().columns) - agg_roof_pv_cap = agg_roof_pv_cap[list(valid_cols & set(agg_roof_pv_cap.columns))] - - solar_mw = pd.concat([agg_roof_pv_cap, large_pv], ignore_index=True) - solar_mw.attrs = large_pv.attrs | agg_roof_pv_cap.attrs - _schemas.AggregatedPlantSchema.validate(solar_mw).to_parquet(output_file) - - if __name__ == "__main__": - if snakemake.params.category == "solar": - capacity_solar( - large_pv_agg_file=snakemake.input.large_solar, - proxy_file=snakemake.input.proxy, - shapes_file=snakemake.input.shapes, - output_file=snakemake.output.aggregated, - technology=snakemake.params.technology, - ) - else: - capacity( - powerplant_file=snakemake.input.powerplants, - shapes_file=snakemake.input.shapes, - year=snakemake.params.year, - output_file=snakemake.output.aggregated, - ) + sys.stderr = open(snakemake.log[0], "w", buffering=1) + capacity( + powerplant_file=snakemake.input.powerplants, + shapes_file=snakemake.input.shapes, + year=_utils.DATASET_YEAR, + output_file=snakemake.output.aggregated, + ) _plots.plot_capacity_aggregation( aggregated_file=snakemake.output.aggregated, shapes_file=snakemake.input.shapes, diff --git a/workflow/scripts/impute_adjustment_solar.py b/workflow/scripts/impute_adjustment_solar.py new file mode 100644 index 0000000..c029cdd --- /dev/null +++ b/workflow/scripts/impute_adjustment_solar.py @@ -0,0 +1,68 @@ +"""Aggregate powerplant capacity into shapes.""" + +import sys +from typing import TYPE_CHECKING, Any + +import _plots +import _schemas +import _utils +import geopandas as gpd +import pandas as pd +from gregor.aggregate import aggregate_raster_to_polygon + +if TYPE_CHECKING: + snakemake: Any + + +def capacity_solar( + large_pv_agg_file: str, proxy_file: str, shapes_file: str, technology: str +) -> gpd.GeoDataFrame: + """Aggregate rooftop PV using a proxy raster.""" + large_pv = pd.read_parquet(large_pv_agg_file) + shapes = gpd.read_parquet(shapes_file) + agg_roof_pv_cap = aggregate_raster_to_polygon(proxy_file, shapes, stats="sum") + + agg_roof_pv_cap["category"] = "solar" + agg_roof_pv_cap["technology"] = technology + agg_roof_pv_cap = agg_roof_pv_cap.rename(columns={"sum": "output_capacity_mw"}) + agg_roof_pv_cap = agg_roof_pv_cap.dropna(subset=["output_capacity_mw"]) + + # Keep only schema-approved columns + valid_cols = set(_schemas.AggregatedPlantSchema.to_schema().columns) + agg_roof_pv_cap = agg_roof_pv_cap[list(valid_cols & set(agg_roof_pv_cap.columns))] + + # Combine and clean the data + solar_mw = pd.concat([agg_roof_pv_cap, large_pv], ignore_index=True) + solar_mw = _utils._clean_positive_capacity(solar_mw) + solar_mw.attrs = large_pv.attrs | agg_roof_pv_cap.attrs + return _schemas.AggregatedPlantSchema.validate(solar_mw) + + +def main(): + """Main snakemake process.""" + solar_gdf = capacity_solar( + large_pv_agg_file=snakemake.input.large_solar, + proxy_file=snakemake.input.proxy, + shapes_file=snakemake.input.shapes, + technology=snakemake.params.technology, + ) + solar_gdf.to_parquet(snakemake.output.aggregated) + _plots.plot_capacity_aggregation( + aggregated_file=snakemake.output.aggregated, + shapes_file=snakemake.input.shapes, + output_file=snakemake.output.plot_map, + category=snakemake.params.category, + ) + _plots.plot_capacity_adjustment( + stats_file=snakemake.input.stats, + unadjusted_file=snakemake.input.large_solar, + adjusted_file=snakemake.output.aggregated, + year=_utils.DATASET_YEAR, + output_file=snakemake.output.plot_stats, + is_disagg=False, + ) + + +if __name__ == "__main__": + sys.stderr = open(snakemake.log[0], "w", buffering=1) + main() diff --git a/workflow/scripts/impute_capacity_adjustment.py b/workflow/scripts/impute_capacity_adjustment.py index 3513fc1..02eee99 100644 --- a/workflow/scripts/impute_capacity_adjustment.py +++ b/workflow/scripts/impute_capacity_adjustment.py @@ -36,7 +36,7 @@ def adjust_powerplant_capacity( adjust_powerplant_capacity( stats_file=snakemake.input.stats, unadjusted_file=snakemake.input.unadjusted, - year=snakemake.params.year, + year=_utils.DATASET_YEAR, output_file=snakemake.output.adjusted, ) @@ -44,7 +44,7 @@ def adjust_powerplant_capacity( stats_file=snakemake.input.stats, unadjusted_file=snakemake.input.unadjusted, adjusted_file=snakemake.output.adjusted, - year=snakemake.params.year, + year=_utils.DATASET_YEAR, output_file=snakemake.output.plot, is_disagg=True, ) diff --git a/workflow/scripts/impute_capacity_adjustment_solar.py b/workflow/scripts/impute_capacity_adjustment_solar.py deleted file mode 100644 index 7288767..0000000 --- a/workflow/scripts/impute_capacity_adjustment_solar.py +++ /dev/null @@ -1,56 +0,0 @@ -"""Adjustment of powerplant capacity for cases that used proxies.""" - -import sys -from typing import TYPE_CHECKING, Any - -import _plots -import _schemas -import _utils -import pandas as pd - -if TYPE_CHECKING: - snakemake: Any -sys.stderr = open(snakemake.log[0], "w") - - -def adjust_aggregated( - stats_file: str, unadjusted_file: str, year: int, output_file: str -): - """Adjust aggregated powerplant capacity in the given year. - - Only provides the requested reference year. - """ - stats = pd.read_parquet(stats_file) - plants = pd.read_parquet(unadjusted_file) - - if plants.empty: - adjusted_plants = plants - else: - adjusted_plants = _utils.adjust_aggregated_capacity(plants, stats, year) - - _schemas.AggregatedPlantSchema.validate(adjusted_plants).to_parquet(output_file) - - -if __name__ == "__main__": - adjust_aggregated( - stats_file=snakemake.input.stats, - unadjusted_file=snakemake.input.unadjusted, - year=snakemake.params.year, - output_file=snakemake.output.adjusted, - ) - - _plots.plot_capacity_adjustment( - stats_file=snakemake.input.stats, - unadjusted_file=snakemake.input.unadjusted, - adjusted_file=snakemake.output.adjusted, - year=snakemake.params.year, - output_file=snakemake.output.adj_plot, - is_disagg=False, - ) - - _plots.plot_capacity_aggregation( - aggregated_file=snakemake.output.adjusted, - shapes_file=snakemake.input.shapes, - output_file=snakemake.output.map_plot, - category="solar", - ) diff --git a/workflow/scripts/prepare_shapes.py b/workflow/scripts/prepare_shapes.py index 7de4073..f8f8654 100644 --- a/workflow/scripts/prepare_shapes.py +++ b/workflow/scripts/prepare_shapes.py @@ -43,7 +43,7 @@ def plot( new_proj.plot(ax=axes[1], color="lightcoral") else: new_proj.plot(col, ax=axes[1], cmap=Colormap(cmap).to_mpl()) - new_proj.boundary.plot(ax=axes[1], color="black", lw=0.2) + new_proj.boundary.plot(ax=axes[1], color="black", lw=0.05) axes[1].set_title(name) for ax in axes: ax.set(xticks=[], yticks=[], xlabel="", ylabel="") diff --git a/workflow/scripts/proxy.py b/workflow/scripts/proxy.py index f23e12d..4c151ff 100644 --- a/workflow/scripts/proxy.py +++ b/workflow/scripts/proxy.py @@ -143,7 +143,7 @@ def plot(proxy_file: str, shapes_file: str, output_file: str, pixels: int = 500_ stats_file=snakemake.input.stats, output_file=snakemake.output.proxy, category=snakemake.params.category, - year=snakemake.params.year, + year=_utils.DATASET_YEAR, ) plot( proxy_file=snakemake.output.proxy,