diff --git a/reeds/input_processing/aggregate_regions.py b/reeds/input_processing/aggregate_regions.py index bdd9513a..8236ea63 100644 --- a/reeds/input_processing/aggregate_regions.py +++ b/reeds/input_processing/aggregate_regions.py @@ -473,11 +473,6 @@ def agg_disagg(filepath, r2aggreg_glob, r_ba_glob, runfiles_row): if list_check == agglevel_check : if row['disaggfunc']=='ignore': return - - # If the file isn't in inputs_case, skip it - if filename not in inputfiles: - if verbose > 1: - logprint(filepath, 'skipped since not in inputs_case') return #%%############## @@ -1079,33 +1074,6 @@ def agg_disagg(filepath, r2aggreg_glob, r_ba_glob, runfiles_row): .rename(columns={'wide (1 if any parameters are in wide format)':'wide', 'header (0 if file has column labels)':'header'}) ) -#%% If any files are missing, stop and alert the user -inputfiles = sorted([ - f.split('inputs_case'+os.sep)[1] - for f in glob(os.path.join(inputs_case,'**'), recursive=True) - if 'metadata' not in f -]) -## Drop the directories and backup h17 files -inputfiles = [f for f in inputfiles if (('.' in f) and not f.endswith('_h17.csv'))] -missingfiles = [f for f in inputfiles if (os.path.basename(f) not in runfiles.index.values) -] -if any(missingfiles): - if missing == 'raise': - raise Exception( - 'Missing aggregation method for:\n{}\n' - '>>> Need to add entries for these files to runfiles.csv' - .format('\n'.join(missingfiles)) - ) - else: - from warnings import warn - warn( - 'Missing aggregation directions for:\n{}\n' - '>>> For this run, these files are copied without modification' - .format('\n'.join(missingfiles)) - ) - for f in missingfiles: - shutil.copy(os.path.join(inputs_case, f), os.path.join(inputs_case, f)) - print(f'copied {f}, which is missing from runfiles.csv') #%% Maps (special case) mapsfile = os.path.join(inputs_case, 'maps.gpkg') @@ -1164,8 +1132,15 @@ def agg_disagg(filepath, r2aggreg_glob, r_ba_glob, runfiles_row): r2aggreg_glob = r2aggreg r_ba_glob = r_ba -# loop over inputfiles from runfiles and call aggregation/disaggregation function -for filepath in inputfiles: +# loop over transmission_files from runfiles and call aggregation/disaggregation function +transmission_files = [ + 'transmission_capacity_future.csv', + 'transmission_capacity_future_baseline.csv', + 'transmission_cost_ac.csv', + 'transmission_cost_dc.csv', + 'transmission_distance.csv', +] +for filepath in transmission_files: ### For debugging: Specify a file # filepath = '' ### Get the appropriate row from runfiles diff --git a/reeds/input_processing/copy_files.py b/reeds/input_processing/copy_files.py index 0d874903..642d2ebc 100644 --- a/reeds/input_processing/copy_files.py +++ b/reeds/input_processing/copy_files.py @@ -1155,50 +1155,6 @@ def map_and_aggregate( return df -def upscale_from_county_to_ba( - df, - region_file_entry, - agglevel_variables, - regions_and_agglevel, - aggfunc=None -): - """ - Changes the resolution of the provided region_col from county to BA - or mixed resolution and aggregates according to the provided aggfunc. - """ - original_cols = df.columns - region_col = region_file_entry['region_col'] - - # Exception for cendiv - if region_col.strip('*') == 'r_cendiv': - val_cendiv = regions_and_agglevel['valid_regions']['cendiv'] - df = df.loc[df['r'].isin(regions_and_agglevel['r_county']['county'])] - df = df.loc[:, df.columns.isin(["r"] + val_cendiv)].reset_index(drop=True) - region_col = 'r' - - # Aggregate values to ba resolution if not running county-level resolution - # and if county level is not a desired resolution in mixed resolution runs - if 'county' not in agglevel_variables['agglevel']: - df = map_and_aggregate(df,regions_and_agglevel,region_file_entry,region_col,aggfunc) - - - # Mixed resolution procedure - elif agglevel_variables['lvl'] == 'mult': - df_ba = df[df[region_col].isin(agglevel_variables['BA_county_list'])] - df_ba = map_and_aggregate(df_ba,regions_and_agglevel,region_file_entry,region_col,aggfunc) - - # Filter out county regions - df_county = df[df[region_col].isin(agglevel_variables['county_regions'])] - # Combine county and BA - df = pd.concat([df_ba, df_county]) - - else: - pass - - df = df[original_cols] - - return df - def write_region_indexed_file( df, @@ -1206,8 +1162,7 @@ def write_region_indexed_file( source_deflator_map, sw, region_file_entry, - regions_and_agglevel, - agglevel_variables + regions_and_agglevel ): """ Write a single region-indexed file to the dir_dst directory @@ -1216,6 +1171,35 @@ def write_region_indexed_file( # Get the filetype of the output file from the filename string filetype_out = os.path.splitext(filename)[1].strip('.') + transmission_files = [ + 'transmission_capacity_future.csv', + 'transmission_capacity_future_baseline.csv', + 'transmission_cost_ac.csv', + 'transmission_cost_dc.csv', + 'transmission_distance.csv', + ] + if filename not in transmission_files: + region_col = region_file_entry['region_col'] + fix_cols = region_file_entry['fix_cols'].split(',') + + if region_file_entry['disaggfunc'] != 'ignore': + df = reeds.spatial.downscale_from_legacy_zone_to_county( + df=df, + region_col=region_col, + fix_cols=fix_cols, + inputs_case=inputs_case, + disaggfunc=region_file_entry['disaggfunc'] + ) + + if region_file_entry['aggfunc'] != 'ignore': + df = reeds.spatial.upscale_from_county_to_zone( + df=df, + region_col=region_col, + fix_cols=fix_cols, + inputs_case=inputs_case, + aggfunc=region_file_entry['aggfunc'] + ) + #---- Write data to dir_dst (inputs_case) folder ---- if filetype_out == 'h5': reeds.io.write_profile_to_h5(df, filename, dir_dst) @@ -1226,27 +1210,6 @@ def write_region_indexed_file( case 'bio_supplycurve.csv': # Adjust for inflation df['price'] = df['price'].astype(float) * source_deflator_map[filepath] - case ( - 'can_exports.csv' - | 'can_imports.csv' - | 'demonstration_plants.csv' - | 'distpvcap.csv' - | 'h2_ba_share.csv' - | 'regional_cap_cost_diff.csv' - | 'cendivweights.csv' - | 'cap_existing_psh.csv' - ): - # The upscale_from_county_to_ba function correctly handles the different spatial resolution options - # This sections just needs to check if the run is at pure county resolution - # The above listed data need to be upscaled if the run includes anything coarser than county resolution - if agglevel_variables['lvl'] != 'county': - df = upscale_from_county_to_ba( - df=df, - region_file_entry=region_file_entry, - agglevel_variables=agglevel_variables, - regions_and_agglevel=regions_and_agglevel, - aggfunc=region_file_entry.aggfunc - ) case 'unitdata.csv': fips_ba_map = regions_and_agglevel['ba_county'].dropna().set_index('county')['ba'] df['reeds_ba'] = df['FIPS'].map(fips_ba_map) @@ -1301,8 +1264,7 @@ def write_region_indexed_files( source_deflator_map, sw, region_file_entry, - regions_and_agglevel, - agglevel_variables + regions_and_agglevel ) @@ -1665,6 +1627,9 @@ def main(reeds_path, inputs_case): # Copy non-region files write_non_region_files(non_region_files, sw, inputs_case, regions_and_agglevel, source_deflator_map) + + # Write files used for disaggregation + write_disagg_data_files(runfiles, inputs_case) # Copy region files write_region_indexed_files( @@ -1676,9 +1641,6 @@ def main(reeds_path, inputs_case): source_deflator_map ) - # Write files used for disaggregation - write_disagg_data_files(runfiles, inputs_case) - # Create a maps.gpkg for this run # Skip if using region dis/aggregation, maps will be written in aggregation_regions.py. # Run if using mixed resolution aggreg-county combination diff --git a/reeds/input_processing/mcs_sampler.py b/reeds/input_processing/mcs_sampler.py index deee88d6..605b2bfc 100644 --- a/reeds/input_processing/mcs_sampler.py +++ b/reeds/input_processing/mcs_sampler.py @@ -1830,8 +1830,7 @@ def write_samples( region_files_row = aux_files['region_files'].query('filename == @file_name').iloc[0] copy_files.write_region_indexed_file(sample_values, dir_dst, aux_files['source_deflator_map'], aux_files['sw'], region_files_row, - aux_files['regions_and_agglevel'], - aux_files['agglevel_variables']) + aux_files['regions_and_agglevel']) # ...if we have a csv file that isn't region-indexed (including switches.csv) elif file_termination == '.csv': if file_name == 'switches.csv': diff --git a/reeds/input_processing/runfiles.csv b/reeds/input_processing/runfiles.csv index 06961a99..ac9f66ea 100644 --- a/reeds/input_processing/runfiles.csv +++ b/reeds/input_processing/runfiles.csv @@ -9,95 +9,50 @@ i_subtech.csv,inputs/sets/i_subtech.csv,1,ignore,ignore,,,,,,,1,set,i_subtech,te i_h2_ptc_gen.csv,inputs/sets/i_h2_ptc_gen.csv,1,ignore,ignore,,,,,,,1,set,i_h2_ptc_gen,technology subset category for clean generators which qualify for the hydrogen production tax credit, sdbin.csv,inputs/sets/sdbin.csv,1,ignore,ignore,,,,,,,1,set,sdbin,storage duration bins, tg.csv,inputs/sets/tg.csv,1,ignore,ignore,,,,,,,1,set,tg,tech groups for growth constraints, -ccseason.csv,,1,ignore,ignore,,,,,,,1,set,ccseason,seasons used for capacity credit; cold is Oct 15-April 14 and hot is April 15-Oct 14, quarter.csv,inputs/sets/quarter.csv,1,ignore,ignore,,,,,,,1,set,quarter,original h17 seasons (four per year), month.csv,inputs/sets/month.csv,1,ignore,ignore,,,,,,,1,set,month,calendar months in a year, RPSCat.csv,inputs/sets/RPSCat.csv,1,ignore,ignore,,,,,,,,set,RPSCat,RPS constraint categories including clean energy standards, aclike.csv,inputs/sets/aclike.csv,1,ignore,ignore,,,,,0,,,set,aclike,AC transmission capacity types, acp_disallowed.csv,inputs/state_policies/acp_disallowed.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,*st,"RPSCat,val",,,0,,,,,, acp_prices.csv,inputs/state_policies/acp_prices.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,st,st,,1,0,,,,,, -tscbin.csv,,1,ignore,ignore,,,,,,,1,set,tscbin,transmission upgrade supply curve bins, -numpartitions.csv,,1,ignore,ignore,,,,,0,,,,,, -agglevels.csv,,1,ignore,ignore,ignore,,,,0,,,,,, -aggreg.csv,,1,ignore,ignore,ignore,,,0,0,,,,,, -aggreg2anchorreg.csv,,1,ignore,ignore,ignore,,,0,0,,,,,, -anchorreg2aggreg.csv,,1,ignore,ignore,ignore,,,0,0,,,,,, allt.csv,inputs/sets/allt.csv,1,ignore,ignore,,,,,,,,set,allt,all potential years, alpha.csv,inputs/fuelprices/alpha_{ngscen}.csv,1,ignore,ignore,wide_cendiv,t,,1,0,,,,,, bio_supplycurve.csv,inputs/supply_curve/bio_supplycurve.csv,1,ignore,ignore,usda_region,,,,0,,,,,, bioclass.csv,inputs/sets/bioclass.csv,1,ignore,ignore,,,,,,,,set,bioclass,, can_exports.csv,inputs/canada_imports/can_exports.csv,int(sw.GSw_Canada) != 0,sum,ignore,r,wide,,1,0,,,,,, -can_exports_h_frac.csv,,1,ignore,ignore,,,,,0,,,,,, can_exports_szn_frac.csv,inputs/canada_imports/can_exports_szn_frac.csv,int(sw.GSw_Canada) != 0,ignore,ignore,,,,,0,,,,,, can_imports.csv,inputs/canada_imports/can_imports.csv,int(sw.GSw_Canada) != 0,sum,ignore,r,wide,,1,0,,,,,, -can_imports_capacity.csv,,1,sum,ignore,*r,t,,0,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py can_imports_quarter_frac.csv,inputs/canada_imports/can_imports_quarter_frac.csv,int(sw.GSw_Canada) != 0,ignore,ignore,,,,,0,,,,,, -can_imports_szn_frac.csv,,1,ignore,ignore,,,,,0,,1,,,, cangrowth.csv,inputs/load/cangrowth.csv,int(sw.GSw_Canada) != 0,ignore,ignore,st,,,1,0,,,,,, -canmexload.csv,,1,sum,ignore,*r,h,,0,0,,1,,,, -cap_cspns.csv,,1,sum,ignore,*r,t,,0,0,,1,,,,disaggfunc set to ignore because data is pulled from the county-indexed generator database cap_existing_hydro.csv,inputs/hydro/cap_existing_hydro.csv,1,ignore,ignore,,t,,0,0,,,,,, cap_existing_psh.csv,inputs/storage/cap_existing_psh.csv,(int(sw.GSw_Storage) != 0) and ((int(sw.GSw_HydroPSHDurData) == 1) or (sw.GSw_HydroStorInMaxFrac == 'data')),sum,ignore,r,"*i,v",,0,0,,,,,, -cap_hyd_ccseason_adj.csv,,1,mean,uniform,r,"*i,ccseason",,0,0,,1,,,, -cap_hyd_szn_adj.csv,,1,mean,uniform,r,"*i,szn",,0,0,,1,,,, -cap_limit.csv,,1,ignore,ignore,,,,1,0,,,,,, cap_penalty.csv,inputs/financials/cap_penalty.csv,1,ignore,ignore,,tg,,,0,,,,,, -capnonrsc.csv,,1,sum,ignore,r,i,,0,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py -capnonrsc_energy.csv,,1,sum,ignore,r,i,,0,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py cappayments_ba.csv,inputs/capacity_exogenous/cappayments_ba.csv,1,ignore,ignore,,*r,,0,0,,,,,,not done but not used -caprsc.csv,,1,sum,ignore,r,i,,0,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py -ccreg.csv,,1,ignore,ignore,,,,,0,,,,,, ccs_link.csv,inputs/emission_constraints/ccs_link.csv,1,ignore,ignore,,,,,0,,,,,, ccs_link_water.csv,inputs/emission_constraints/ccs_link_water.csv,1,ignore,ignore,,,,,0,,,,,, -ccseason_dates.csv,,1,ignore,ignore,,,,,0,,,,,, ccsflex_cat.csv,inputs/sets/ccsflex_cat.csv,1,ignore,ignore,,,,,,,,set,ccsflex_cat,flexible ccs performance parameter categories, -ccsflex_perf.csv,,1,ignore,ignore,,,,,0,,,,,, cd_beta0.csv,inputs/fuelprices/cd_beta0.csv,1,ignore,ignore,*cendiv,,,,0,,,,,, cd_beta0_allsector.csv,inputs/fuelprices/cd_beta0_allsector.csv,1,ignore,ignore,*cendiv,,,,0,,,,,, cendivweights.csv,inputs/fuelprices/cendivweights.csv,1,mean,ignore,r_cendiv,wide,,1,0,,,,,,Includes two region definitions; disaggfunc set to ignore because data is already at county resolution in inputs folder ces_fraction.csv,inputs/state_policies/ces_fraction.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,st,st,,1,0,,,,,, -cf_hyd.csv,,1,mean,uniform,r,"*i,szn,t",,0,0,,1,,,, -cf_vre.csv,,1,mean_cap,ignore,r,"*i,h",*i,0,0,,1,,,,disaggfunc set to ignore because data will be written in correct spatial resolution by hourly_writetimeseries in hourly_repperiods.py -climate_UnappWaterMult.csv,,1,ignore,ignore,,,,,0,,1,,,,created by hourly_writetimeseries.py from temp_UnappWaterMult.csv data -climate_UnappWaterMultAnn.csv,,1,ignore,ignore,,,,,0,,1,,,,created by climateprep.py -climate_UnappWaterSeaAnnDistr.csv,,1,ignore,ignore,,,,,0,,1,,,,created by hourly_writetimeseries.py from temp_UnappWaterSeaAnnDistr.csv data -climate_heuristics_finalyear.csv,,1,ignore,ignore,,,,,0,,,,,, -climate_heuristics_yearfrac.csv,,1,ignore,ignore,,,,,0,,,,,, -climate_hydadjann.csv,,1,ignore,ignore,,,,,0,,1,,,,created by climateprep.py -climate_hydadjsea.csv,,1,ignore,ignore,,,,,0,,1,,,,created by hourly_writetimeseries.py from temp_hydadjsea.csv data -climate_loaddelta_timeslice.csv,,1,ignore,ignore,,"r,h",,1,0,,,,,,not done but rarely used; ignore for now climate_param.csv,inputs/sets/climate_param.csv,1,ignore,ignore,,,,,,,,set,climate_param,parameters defined in climate_heuristics_finalyear, -co2_cap.csv,,int(sw.GSw_AnnualCap) != 0,ignore,ignore,,,,0,0,,,,,, -co2_capture_incentive.csv,,1,ignore,ignore,,,,,0,,,,,, co2_site_char.csv,inputs/ctus/co2_site_char.csv,1,ignore,ignore,,,,0,,,,,,, -co2_tax.csv,,int(sw.GSw_CarbTax) != 0,ignore,ignore,,,,0,,,,,,, coal_price.csv,inputs/fuelprices/coal_{coalscen}.csv,1,ignore,ignore,wide_cendiv,year,,1,0,,,,,, construction_schedules.csv,inputs/financials/construction_schedules_{construction_schedules_suffix}.csv,1,ignore,ignore,,,,1,0,,,,,, construction_times.csv,inputs/financials/construction_times_{construction_times_suffix}.csv,1,ignore,ignore,,,,,0,,,,,, consume_char.csv,inputs/consume/consume_char_{GSw_H2_Inputs}.csv,int(sw.GSw_H2) != 0,ignore,ignore,,"*i,t,parameter",,0,0,,,,,, consumecat.csv,inputs/sets/consumecat.csv,1,ignore,ignore,,,,,,,,set,consumecat,categories for consuming facility characteristics, -consumechardac.csv,,1,ignore,ignore,,"*i,t,variable",,0,0,,,,,, cost_cap_mult.csv,inputs/waterclimate/cost_cap_mult.csv,int(sw.GSw_WaterMain) != 0,ignore,ignore,,,,,0,,,,,, cost_hurdle_country.csv,inputs/transmission/cost_hurdle_country.csv,1,ignore,ignore,*country,,,,0,,,,,, cost_hurdle_intra.csv,inputs/transmission/cost_hurdle_intra.csv,1,ignore,ignore,,t,,,0,,,,,, -cost_hurdle_rate1.csv,,1,ignore,ignore,,t,,0,0,,1,,,, -cost_hurdle_rate2.csv,,1,ignore,ignore,,t,,0,0,,1,,,, -cost_opres.csv,,1,ignore,ignore,,,,,0,,,,,, cost_opres_default.csv,inputs/plant_characteristics/cost_opres_default.csv,int(sw.GSw_OpRes) != 0,ignore,ignore,,,,,0,,,,,, cost_opres_market.csv,inputs/plant_characteristics/cost_opres_market.csv,int(sw.GSw_OpRes) != 0,ignore,ignore,,,,,0,,,,,, -cost_vom.csv,,1,mean,ignore,r,"i,v,t",,0,0,,1,,,,ReEDS-to-PLEXOS output cost_vom_mult.csv,inputs/waterclimate/cost_vom_mult.csv,int(sw.GSw_WaterMain) != 0,ignore,ignore,,,,,0,,,,,, -county2zone.csv,,1,ignore,ignore,,,,,,,1,,,, -county2zone_original.csv,,1,ignore,ignore,,,,,,,1,,,, -crf.csv,,1,ignore,ignore,,,,0,,,,,,, -crf_co2_incentive.csv,,1,ignore,ignore,,,,,0,,,,,, -crf_h2_incentive.csv,,1,ignore,ignore,,,,,0,,,,,, csapr_cat.csv,inputs/sets/csapr_cat.csv,1,ignore,ignore,,,,,,,,set,csapr_cat,CSAPR regulation categories, csapr_group.csv,inputs/sets/csapr_group.csv,1,ignore,ignore,,,,,,,,set,csapr_group,CSAPR trading group, csapr_group1_ex.csv,inputs/emission_constraints/csapr_group1_ex.csv,int(sw.GSw_CSAPR) != 0,ignore,ignore,*st,,,,0,,,,,, csapr_group2_ex.csv,inputs/emission_constraints/csapr_group2_ex.csv,int(sw.GSw_CSAPR) != 0,ignore,ignore,*st,,,,0,,,,,, csapr_ozone_season.csv,inputs/emission_constraints/csapr_ozone_season.csv,int(sw.GSw_CSAPR) != 0,ignore,ignore,st,,,,0,,,,,, -ctus_r_cs_spurlines_200mi.csv,,1,ignore,ignore,,,,,,,1,,,, currency_incentives.csv,inputs/financials/currency_incentives.csv,1,ignore,ignore,,,,,0,,,,,, dac_elec.csv,inputs/consume/dac_elec_{dacscen}.csv,int(sw.GSw_DAC) != 0,ignore,ignore,,,,1,0,,,,,, dac_gas.csv,inputs/consume/dac_gas_{GSw_DAC_Gas_Case}.csv,int(sw.GSw_DAC) != 0,ignore,ignore,,,,1,0,,1,,,, @@ -106,12 +61,9 @@ degradation_annual.csv,inputs/degradation/degradation_annual_{degrade_suffix}.cs demonstration_plants.csv,inputs/capacity_exogenous/demonstration_plants.csv,int(sw.GSw_NuclearDemo) != 0,sum,ignore,r,"t,i,coolingwatertech,ctt,wst,notes",i,0,0,,,,,, depreciation_schedules.csv,inputs/financials/depreciation_schedules_{depreciation_schedules_suffix}.csv,1,ignore,ignore,,,,1,0,,,,,, diagnose.gms,postprocessing/diagnose/diagnose.gms,1,ignore,ignore,,,,,,,,,,, -disagg_geosize.csv,,1,ignore,ignore,,,,,0,,,,,, disagg_hydroexist.csv,inputs/disaggregation/disagg_hydroexist.csv,1,ignore,ignore,,,,,0,,,,,, disagg_population.csv,inputs/disaggregation/county_population.csv,1,ignore,ignore,FIPS,,,,0,,1,,,, disagg_state_lpf.csv,inputs/disaggregation/county_state_lpf.csv,1,ignore,ignore,FIPS,,,,0,,1,,,, -distance_reinforcement.csv,,1,ignore,ignore,r,"*i,rscbin",*i,0,0,,1,,,, -distance_spur.csv,,1,ignore,ignore,r,"*i,rscbin",*i,0,0,,1,,,, distpvcap.csv,inputs/dgen_model_inputs/{distpvscen}/distpvcap_{distpvscen}.csv,1,sum,ignore,r,wide,,1,0,,,,,, dollaryear_consume.csv,inputs/consume/dollaryear.csv,int(sw.GSw_DAC) != 0,ignore,ignore,,,,,0,,,,,,Do we really need 3 separate instances of dollaryear? dollaryear_plant.csv,inputs/plant_characteristics/dollaryear.csv,1,ignore,ignore,,,,,0,,,,,,Do we really need 3 separate instances of dollaryear? @@ -124,11 +76,9 @@ dr_shed_capacity_scalar.csv,inputs/demand_response/dr_shed_capacity_scalar_{dr_s dr_shed_hourly.h5,inputs/profiles_dr/dr_shed_hourly_{dr_shedscen}.h5,int(sw.GSw_DRShed) != 0,ignore,ignore,wide,"datetime,year",,1,keepindex,,,,,,Agg/Disagg handled in hourly_load e.csv,inputs/sets/e.csv,1,ignore,ignore,,,,,0,,,set,e,emission categories used in model, eall.csv,inputs/sets/eall.csv,1,ignore,ignore,,,,,,,,set,eall,emission categories used in reporting, -emit_rate.csv,,1,ignore,ignore,,"etype,e,i,v,r",,0,0,,,,,,ReEDS-to-PLEXOS output emitrate.csv,inputs/emission_constraints/emitrate.csv,1,ignore,ignore,,,,,0,,,,,, energy_communities.csv,inputs/financials/energy_communities.csv,1,ignore,ignore,,,,,0,,,,,,region aggregation and filtering is handled in copy_files etype.csv,inputs/sets/etype.csv,1,ignore,ignore,,,,,,,,set,etype,emission types used in model (upstream and process), -eval_period_adj_mult.csv,,1,ignore,ignore,,,,,0,,,,,, exog_cap_geohydro.csv,inputs/capacity_exogenous/exog_cap_geohydro_{GSw_SitingGeo}.csv,int(sw.GSw_Geothermal) != 0,ignore,ignore,region,"*tech,sc_point_gid,year",,0,0,,,,,, exog_cap_upv.csv,inputs/capacity_exogenous/exog_cap_upv_{GSw_SitingUPV}.csv,1,ignore,ignore,region,"*tech,sc_point_gid,year",,0,0,,,,,, exog_cap_wind-ons.csv,inputs/capacity_exogenous/exog_cap_wind-ons_{GSw_SitingWindOns}.csv,1,ignore,ignore,region,"*tech,sc_point_gid,year",,0,0,,,,,, @@ -137,84 +87,39 @@ financials_hydrogen.csv,inputs/financials/financials_hydrogen.csv,int(sw.GSw_H2) financials_sys.csv,inputs/financials/financials_sys_{financials_sys_suffix}.csv,1,ignore,ignore,,,,,0,,,,,, financials_tech.csv,inputs/financials/financials_tech_{financials_tech_suffix}.csv,1,ignore,ignore,,,,,0,,,,,, financials_transmission.csv,inputs/financials/financials_transmission_{financials_trans_suffix}.csv,1,ignore,ignore,,,,,0,,,,,, -financing_risk_mult.csv,,1,ignore,ignore,,,,,0,,,,,, -firm_import_limit.csv,,1,ignore,ignore,,,,,0,,1,,,, -firstyear.csv,,1,ignore,ignore,,,,,0,,1,,,, years_until_endogenous.csv,inputs/plant_characteristics/years_until_endogenous.csv,1,ignore,ignore,,,,,0,,,,,, -flex_frac_all.csv,,1,mean,population,r,"*flextype,h,wide",,1,0,,1,,,, flex_type.csv,inputs/sets/flex_type.csv,1,ignore,ignore,,,,,,,,set,flex_type,demand flexibility types, forced_retirements.csv,inputs/state_policies/forced_retirements.csv,1,ignore,ignore,st,"*i,t",,0,0,,,,,, -forceperiods.csv,,1,ignore,ignore,,,,,,,,,,, -frac_h_ccseason_weights.csv,,1,ignore,ignore,,,,,,,,,,, -frac_h_month_weights.csv,,1,ignore,ignore,,,,,,,,,,, -frac_h_quarter_weights.csv,,1,ignore,ignore,,,,,,,,,,, fuel2tech.csv,inputs/sets/fuel2tech.csv,1,ignore,ignore,,,,,0,,,set,fuel2tech,mapping between fuel types and generations, -fuel_price.csv,,1,ignore,ignore,,"i,r",,0,0,,,,,,ReEDS-to-PLEXOS output fuelbin.csv,inputs/sets/fuelbin.csv,1,ignore,ignore,,,,,,,,set,fuelbin,gas usage bracket, futurefiles.csv,inputs/userinput/futurefiles.csv,1,ignore,ignore,,,,,0,,,,,, gb.csv,inputs/sets/gb.csv,1,ignore,ignore,,,,,,,,set,gb,gas price bins; must have an odd number of bins; e.g. gb1*gb15, gbin.csv,inputs/sets/gbin.csv,1,ignore,ignore,,,,,,,,set,gbin,growth bins, gbin_min.csv,inputs/growth_constraints/gbin_min.csv,1,ignore,ignore,,,,,0,,,,,, -nat_gen_tech_frac.csv,,1,ignore,ignore,,,,,0,,,,,, -gen_mandate_trajectory.csv,,1,ignore,ignore,,,,,0,,,,,, geo_discovery_factor.csv,inputs/geothermal/geo_discovery_factor_{geohydrosupplycurve}.csv,int(sw.GSw_Geothermal) != 0,mean,uniform,r,*i,*i,0,0,,,,,, geo_discovery_rate.csv,inputs/geothermal/geo_discovery_{geodiscov}.csv,int(sw.GSw_Geothermal) != 0,ignore,ignore,,,,0,,,,,,, -geo_fom.csv,,1,mean,uniform,r,*i,,0,0,,1,,,, -geo_fom_mult.csv,,1,ignore,ignore,,0,,0,,,,,,, -geo_retirements.csv,,1,sum,ignore,r,"i,v,wide",,1,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py geo_rsc.csv,inputs/geothermal/geo_rsc_ATB_2023.csv,int(sw.GSw_Geothermal) != 0,sc_cat,geosize,r,*i,,0,0,value,,,,, -geocapcostmult.csv,,1,ignore,ignore,,,,1,0,,,,,, -geoexist.csv,,1,ignore,ignore,r,*i,,0,0,,1,,,,written to proper spatial aggregation in writecapdat.py growth_bin_size_mult.csv,inputs/growth_constraints/growth_bin_size_mult.csv,1,ignore,ignore,,,,,0,,,,,, growth_limit_absolute.csv,inputs/growth_constraints/growth_limit_absolute.csv,1,ignore,ignore,,,,,0,,,,,, growth_penalty.csv,inputs/growth_constraints/growth_penalty.csv,1,ignore,ignore,,,,,0,,,,,, -gswitches.csv,,1,ignore,ignore,,,,,0,,,,,, h2_ba_share.csv,inputs/consume/h2_demand_county_share.csv,int(sw.GSw_H2) != 0,sum,ignore,*r,t,,0,0,,,,,, gwp.csv,inputs/emission_constraints/gwp.csv,1,ignore,ignore,,,,,0,,1,parameter,gwp,, -h2_existing_smr_cap.csv,,1,sum,population,*r,t,,0,0,,1,,,, -h_preh.csv,,1,ignore,ignore,,,,,0,,,,,, h2_leakage_rate.csv,inputs/emission_constraints/h2_leakage_rate.csv,1,ignore,ignore,,,,,0,,,,,, h2_exogenous_demand.csv,inputs/consume/h2_exogenous_demand.csv,int(sw.GSw_H2) != 0,ignore,ignore,,p,,1,0,,,,,, -h2_pipeline_cap_cost_mult.csv,,1,ignore,ignore,,,,,,,1,,,, -h2_ptc.csv,,1,ignore,ignore,,,*i,,0,,,,,, h2_st.csv,inputs/sets/h2_st.csv,1,ignore,ignore,,,,,,,,set,h2_st,investments needed to store and transport H2, h2_stor.csv,inputs/sets/h2_stor.csv,1,ignore,ignore,,,,,0,,,set,h2_stor,H2 storage options, -h2_storage_rb.csv,,int(sw.GSw_H2) != 0,ignore,ignore,rb,,,0,0,,1,,,, h2_transport_and_storage_costs.csv,inputs/consume/h2_transport_and_storage_costs.csv,int(sw.GSw_H2) != 0,ignore,ignore,,,,,,,,,,, -h_actualszn.csv,,1,ignore,ignore,,,,,0,,,,,, -h_ccseason_prm.csv,,1,ignore,ignore,,,,,0,,,,,, -h_dt_szn.csv,,1,ignore,ignore,,,,,0,,1,,,, -h_szn.csv,,1,ignore,ignore,,,,0,0,,,,,, -h_szn_end.csv,,1,ignore,ignore,,,,,0,,,,,, -h_szn_start.csv,,1,ignore,ignore,,,,,0,,,,,, -heat_rate.csv,,1,ignore,ignore,,"i,v,r",,0,0,,,,,,ReEDS-to-PLEXOS output heat_rate_adj.csv,inputs/plant_characteristics/heat_rate_adj.csv,1,ignore,ignore,,,,,0,,,,,, heat_rate_mult.csv,inputs/waterclimate/heat_rate_mult.csv,int(sw.GSw_WaterMain) != 0,ignore,ignore,,,,,0,,,,,, heat_rate_penalty_spin.csv,inputs/plant_characteristics/heat_rate_penalty_spin.csv,1,ignore,ignore,,,,,0,,,,,, -hierarchy.csv,,1,first,ignore,*r,"nercr,transreg,transgrp,cendiv,st,interconnect,country,usda_region,h2ptcreg",,0,0,,1,,,,post_copy column set to 1 since copy_files filters this file separately -hierarchy_itlgrp.csv,,1,ignore,ignore,,,,,0,,1,,,,post_copy column set to 1 since copy_files copies this file separately -hierarchy_original.csv,,1,ignore,ignore,,,,,0,,1,,,,post_copy column set to 1 since copy_files copies this file separately -hierarchy_with_res.csv,,1,ignore,ignore,,,,,0,,1,,,,post_copy column set to 1 since copy_files copies this file separately hintage_char.csv,inputs/sets/hintage_char.csv,1,ignore,ignore,,,,,,,,set,hintage_char,characteristics available in hintage_data, -hintage_data.csv,,1,ignore,ignore,,,,,0,,,,,,handled separately in WriteHintage.py -hmap_allyrs.csv,,1,ignore,ignore,,,,,0,,,,,, -hmap_myr.csv,,1,ignore,ignore,,,,,0,,,,,, -hour_szn_group.csv,,1,ignore,ignore,,,,,,,,,,, -hourly_szn_end.csv,,1,ignore,ignore,,,,,,,,,,, -hourly_szn_start.csv,,1,ignore,ignore,,,,,,,,,,, -hours_hourly.csv,,1,ignore,ignore,,,,,,,,,,, -hset_hourly.csv,,1,ignore,ignore,,,,,,,,,,, hyd_add_upg_cap.csv,inputs/supply_curve/hyd_add_upg_cap.csv,int(sw.GSw_HydroCapEnerUpgradeType) == 2,sum,hydroexist,r,"i,rscbin,wide",,1,0,,,,,, hyd_fom.csv,inputs/hydro/hyd_fom.csv,1,mean,uniform,wide,i,,1,0,,,,,, -hydadjann.csv,inputs/climate/{climatescen}/hydadjann.csv,int(sw.GSw_ClimateHydro) != 0,mean,uniform,r,t,,0,0,,,,,, -hydadjsea.csv,inputs/climate/{climatescen}/hydadjsea.csv,int(sw.GSw_ClimateHydro) != 0,mean,uniform,r,"month,t",,0,0,,,,,, hydcap.csv,inputs/supply_curve/hydcap.csv,1,sum,geosize,wide,"tech,class",,1,0,,,,,, hydcapadj.csv,inputs/hydro/SeaCapAdj_hy.csv,1,mean,uniform,r,"*i,month",,0,0,,,,,, -hydcf.csv,,1,ignore,ignore,r,"t,*i,month",,0,0,,1,,,, hydcf_fixed.csv,inputs/hydro/hydcf_fixed.csv,1,mean,uniform,r,"*i,month",,0,0,,,,,, hydcost.csv,inputs/supply_curve/hydcost.csv,1,mean,uniform,wide,"tech,class",,1,0,,,,,, hydro_mingen.csv,inputs/hydro/hydro_mingen.csv,1,mean,uniform,r,"*i,quarter",,0,0,,,,,,might be better to do something capacity-weighted -hydrocapcostmult.csv,,1,ignore,ignore,,,,1,0,,,,,, hydrofrac_policy.csv,inputs/state_policies/hydrofrac_policy.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,st,"RPS_All,CES",,,0,,,,,, hydrogen_price.csv,inputs/fuelprices/h2-combustion_{h2combustionfuelscen}.csv,int(sw.GSw_H2Combustion) != 0,ignore,ignore,,,,0,0,,,,,, i_coolingtech_watersource.csv,inputs/waterclimate/i_coolingtech_watersource.csv,int(sw.GSw_WaterMain) != 0,ignore,ignore,,,,,0,,,,,, @@ -227,78 +132,47 @@ i_water_nocooling.csv,inputs/sets/i_water_nocooling.csv,1,ignore,ignore,,,,,0,,, incentives.csv,inputs/financials/incentives_{incentives_suffix}.csv,1,ignore,ignore,,,,,0,,,,,, inflation.csv,inputs/financials/inflation_{inflation_suffix}.csv,1,ignore,ignore,,,,0,0,,,,,, interconnection_queues.csv,inputs/capacity_exogenous/interconnection_queues.csv,1,ignore,ignore,r,"tg,r",,1,0,,,,,, -itc_energy_comm_bonus.csv,,1,mean,ignore,r,*i,,0,0,,1,,,, -itc_frac_monetized.csv,,1,ignore,ignore,,,,,0,,,,,, -itc_fractions.csv,,1,ignore,ignore,,"i,country,t",,0,0,,,,,, -ivt.csv,,1,ignore,ignore,,,,,0,,,,,,created in runreeds.py -ivt_step.csv,,1,ignore,ignore,,,,,0,,,,,, lcclike.csv,inputs/sets/lcclike.csv,1,ignore,ignore,,,,,0,,,set,lcclike,transmission capacity types where lines are bundled with AC/DC converters, -load_2010.csv,,1,sum,ignore,r,wide,,1,0,,1,,,,disaggfunc set to ignore because load will already be in correct spatial resolution -load_allyear.csv,,1,sum,ignore,*r,"h,t",,0,0,,1,,,,disaggfunc set to ignore because load will already be in correct spatial resolution load_multiplier.csv,inputs/load/demand_{demandscen}.csv,1,ignore,ignore,,,,,0,,,,,, -load_multiplier_r.csv,,1,ignore,ignore,,,,1,0,,,,,, loadsite_annual.csv,inputs/load/loadsite_{GSw_LoadSiteTrajectory}.csv,float(sw.GSw_LoadSiteCF) > 0,ignore,ignore,*loadsitereg,t,,,0,,,,,, -maps.gpkg,,1,ignore,ignore,,,,,,,1,,,, maxage.csv,inputs/plant_characteristics/maxage.csv,1,ignore,ignore,,,,,0,,,,,, maxdailycf.csv,inputs/plant_characteristics/maxdailycf.csv,int(sw.GSw_MaxDailyCF) != 0,ignore,ignore,,,,,0,,,,,, mcs_distributions.yaml,inputs/userinput/mcs_distributions_{MCS_dist}.yaml,int(sw.MCS_runs) != 0,ignore,ignore,,,,,,,,,,, -mcs_group_weights.csv,,1,ignore,ignore,,,,,,,,,,, -methane_leakage_rate.csv,,1,ignore,ignore,,,,0,0,,,,,, mex_growth_rate.csv,inputs/load/mex_growth_rate.csv,1,ignore,ignore,,,,0,,,,,,, minCF.csv,inputs/plant_characteristics/minCF.csv,int(sw.GSw_MinCF) != 0,ignore,ignore,,,,,0,,,,,, min_retire_age.csv,inputs/plant_characteristics/min_retire_age.csv,1,ignore,ignore,,,,,0,,,,,, mingen_fixed.csv,inputs/plant_characteristics/mingen_fixed.csv,int(sw.GSw_MingenFixed) != 0,ignore,ignore,,,*i,,0,,,,,, minloadfrac0.csv,inputs/plant_characteristics/minloadfrac0.csv,(int(sw.GSw_Mingen) != 0) or (int(sw.GSw_MinLoading) != 0),ignore,ignore,,,,,0,,,,,, modeled_regions.csv,inputs/userinput/modeled_regions.csv,1,ignore,ignore,,,,,,,,,,, -modeledyears.csv,,1,ignore,ignore,,,,,0,,,,,, month2quarter.csv,inputs/temporal/month2quarter.csv,1,ignore,ignore,,,,,0,,,,,, mttr.csv,inputs/plant_characteristics/mttr.csv,1,ignore,ignore,,,tech,,0,,,,,, natgas_price_cendiv.csv,inputs/fuelprices/ng_{ngscen}.csv,1,ignore,ignore,wide_cendiv,year,,1,0,,,,,, national_rps_frac_allScen.csv,inputs/national_generation/national_rps_frac_allScen.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,,,,1,0,,,,,, net_gen_existing_hydro.csv,inputs/hydro/net_gen_existing_hydro.csv,1,ignore,ignore,,"t,month",,0,0,,,,,, peak_net_imports.csv,inputs/reserves/peak_net_imports.csv,1,ignore,ignore,nercr,t,,0,0,,,,,, -nexth.csv,,1,ignore,ignore,,,,,0,,,,,, -nexth_actualszn.csv,,1,ignore,ignore,,,,,0,,,,,, -nextpartition.csv,,1,ignore,ignore,,,,,0,,,,,, -ng_crf_penalty.csv,,1,ignore,ignore,,,,0,0,,,,,, ng_crf_penalty_st.csv,inputs/state_policies/ng_crf_penalty_st.csv,1,ignore,ignore,st,*t,,0,0,,,,,, ng_demand_elec.csv,inputs/fuelprices/ng_demand_{ngscen}.csv,1,ignore,ignore,wide_cendiv,year,,1,0,,,,,, ng_demand_tot.csv,inputs/fuelprices/ng_tot_demand_{ngscen}.csv,1,ignore,ignore,wide_cendiv,year,,1,0,,,,,, noretire.csv,inputs/sets/noretire.csv,1,ignore,ignore,,,,,0,,,set,noretire,technologies that will never be retired, notvsc.csv,inputs/sets/notvsc.csv,1,ignore,ignore,,,,,0,,,set,notvsc,transmission capacity types that are not VSC, -nuclear_ba_ban_list.csv,,int(sw.GSw_NukeStateBan) != 0,ignore,ignore,,,,,,,,,,, nuclear_energy_communities.csv,inputs/financials/nuclear_energy_communities.csv,1,ignore,ignore,,,,,0,,,,,,region aggregation and filtering is handled in copy_files nuclear_subsidies.csv,inputs/state_policies/nuclear_subsidies.csv,1,ignore,ignore,*st,year,,0,0,,,,,, -numhours.csv,,1,ignore,ignore,,,,,0,,,,,, -numhours_nexth.csv,,1,ignore,ignore,,,,,0,,1,,,, objective_function_params.yaml,tests/objective_function_params.yaml,1,ignore,ignore,,,,,,,,,,, offshore_req.csv,inputs/state_policies/offshore_req_{GSw_OfsWindForceScen}.csv,(int(sw.GSw_StateRPS) != 0) and (int(sw.GSw_OfsWind) != 0),ignore,ignore,st,,,1,0,,,,,, ofstype.csv,inputs/sets/ofstype.csv,1,ignore,ignore,,,,,,,,set,ofstype,offshore types used in offshore requirement constraint (eq_RPS_OFSWind), ofstype_i.csv,inputs/sets/ofstype_i.csv,1,ignore,ignore,,,,,0,,,set,ofstype_i,crosswalk between ofstype and i, -ofswind_rsc_mult.csv,,1,ignore,ignore,,,,1,0,,,,,, oosfrac.csv,inputs/state_policies/oosfrac.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,*st,,,,0,,,,,, opres_periods.csv,inputs/reserves/opres_periods.csv,int(sw.GSw_OpRes) != 0,ignore,ignore,,,,,0,,,,,, orcat.csv,inputs/sets/orcat.csv,1,ignore,ignore,,,,,,,,set,orcat,operating reserve category for RHS calculations, orperc.csv,inputs/reserves/orperc.csv,int(sw.GSw_OpRes) != 0,ignore,ignore,,,,,0,,,,,, ortype.csv,inputs/sets/ortype.csv,1,ignore,ignore,,,,,,,,set,ortype,types of operating reserve constraints, -outage_forced_h.csv,,1,ignore,ignore,r,"*i,h",,0,0,,1,,,,handled in outage_rates.py -outage_forced_hourly.h5,,1,ignore,ignore,wide,index,index,1,keepindex,,1,,,,handled in outage_rates.py outage_forced_static.csv,inputs/plant_characteristics/outage_forced_static.csv,1,ignore,ignore,,,,,none,,,,,, outage_forced_temperature.csv,inputs/plant_characteristics/outage_forced_temperature_{GSw_OutageScen}.csv,sw.GSw_OutageScen != 'static',ignore,ignore,,,,,0,,,,,, outage_scheduled_static.csv,inputs/plant_characteristics/outage_scheduled_static.csv,1,ignore,ignore,,,,,0,,,,,, outage_scheduled_monthly.csv,inputs/plant_characteristics/outage_scheduled_monthly.csv,1,ignore,ignore,,,,,0,,,,,, p.csv,inputs/sets/p.csv,1,ignore,ignore,,,,,,,,set,p,products produced, -peak_ccseason.csv,,1,sum,ignore,*r,"ccseason,t",,0,0,,1,,,,ok because it's load during peak NERC hour -peak_h.csv,,1,sum,ignore,r,"h,wide",,1,0,,1,,,, -peakload.csv,,1,ignore,ignore,,,,,0,,1,,,, -peakload_nercr.csv,,1,ignore,ignore,,,,,0,,1,,,, -period_szn.csv,,1,ignore,ignore,,,,,0,,,,,, period_szn_user.csv,inputs/temporal/period_szn_{GSw_HourlyClusterAlgorithm}.csv,sw.GSw_HourlyClusterAlgorithm == 'user*',ignore,ignore,,,,,,,,,,, -period_weights.csv,,1,ignore,ignore,,,,,0,,,,,, -periodmap_1yr.csv,,1,ignore,ignore,,,,0,0,,,,,, -pipeline_cost_mult.csv,,int(sw.GSw_H2) != 0,trans_lookup,uniform,"*r,rr",,,0,0,drop_dup_r,1,,,, plantcat.csv,inputs/sets/plantcat.csv,1,ignore,ignore,,,,,,,,set,plantcat,categories for plant characteristics, -plantcharout.csv,,1,ignore,ignore,,"0,2",,0,,,,,,, plantchar_beccs.csv,inputs/plant_characteristics/{plantchar_beccs}.csv,int(sw.GSw_BECCS) != 0,ignore,ignore,,,,,0,,,,,,used to define plantcharout.csv plantchar_biopower.csv,inputs/plant_characteristics/{plantchar_biopower}.csv,1,ignore,ignore,,,,,0,,,,,,used to define plantcharout.csv plantchar_ccsflex_cost.csv,inputs/plant_characteristics/{ccsflexscen}_cost.csv,(int(sw.GSw_CCSFLEX_BYP) != 0) or (int(sw.GSw_CCSFLEX_DAC) != 0) or (int(sw.GSw_CCSFLEX_STO) != 0),ignore,ignore,,,,,0,,,,,,used to define plantcharout.csv @@ -326,77 +200,40 @@ plantchar_other.csv,inputs/plant_characteristics/{plantchar_other}.csv,1,ignore, plantchar_pvb.csv,inputs/plant_characteristics/pvb_{pvbscen}.csv,int(sw.GSw_PVB) != 0,ignore,ignore,,,,,0,,,,,,used to define plantcharout.csv plantchar_upgrades.csv,inputs/upgrades/{upgradescen}.csv,sw.upgradescen != 'default',ignore,ignore,,,,,0,,,,,,used to define plantcharout.csv plantchar_upv.csv,inputs/plant_characteristics/{plantchar_upv}.csv,1,ignore,ignore,,,,,0,,,,,,used to define plantcharout.csv -poi_cap_init.csv,,1,sum,ignore,*r,,,0,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py prepost.csv,inputs/sets/prepost.csv,1,ignore,ignore,,,,,,,,set,prepost,defines pre-2010 versus post-2010 years, -prescribed_builds_wind-ofs.csv,inputs/capacity_exogenous/prescribed_builds_wind-ofs_{GSw_OffshoreFiles}_{GSw_SitingWindOfs}.csv,int(sw.GSw_OfsWind) != 0,sum,ignore,region,year,,0,0,,,,,,disaggfunc set to ignore because data will be read in at the correct spatial resolution -prescribed_builds_wind-ons.csv,inputs/capacity_exogenous/prescribed_builds_wind-ons_{GSw_SitingWindOns}.csv,1,sum,ignore,region,year,,0,0,,,,,,disaggfunc set to ignore because data will be read in at the correct spatial resolution -prescribed_nonRSC.csv,,1,sum,ignore,r,"t,i",,0,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py -prescribed_rsc.csv,,1,sum,ignore,r,"t,i",,0,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py +prescribed_builds_wind-ofs.csv,inputs/capacity_exogenous/prescribed_builds_wind-ofs_{GSw_OffshoreFiles}_{GSw_SitingWindOfs}.csv,int(sw.GSw_OfsWind) != 0,ignore,ignore,region,year,,0,0,,,,,,aggfunc and disaggfunc set to ignore because data will be read in at the correct spatial resolution +prescribed_builds_wind-ons.csv,inputs/capacity_exogenous/prescribed_builds_wind-ons_{GSw_SitingWindOns}.csv,1,ignore,ignore,region,year,,0,0,,,,,,aggfunc and disaggfunc set to ignore because data will be read in at the correct spatial resolution prescriptivelink0.csv,inputs/sets/prescriptivelink0.csv,1,ignore,ignore,,,,,0,,,set,prescriptivelink0,initial set of prescribed categories and their technologies - used in assigning prescribed builds, -prm_initial.csv,,1,ignore,ignore,*r,,,,0,,1,,,, -prm.csv,,1,ignore,ignore,*r,,,,0,,1,,,, -psh_sc_duration.csv,,1,ignore,ignore,,,,,0,,1,,,,Delete once aggregate_regions.py is moved up psh_supply_curves_duration.csv,inputs/storage/PSH_supply_curves_durations.csv,int(sw.GSw_Storage) != 0,ignore,ignore,,,,,0,,1,,,, psh_supply_curves_capacity.csv,inputs/supply_curve/PSH_supply_curves_capacity_{pshsupplycurve}.csv,int(sw.GSw_Storage) != 0,sum,geosize,r,wide,,1,0,,,,,, psh_supply_curves_cost.csv,inputs/supply_curve/PSH_supply_curves_cost_{pshsupplycurve}.csv,int(sw.GSw_Storage) != 0,mean,uniform,r,wide,,1,0,,,,,, -pv_cf_improve.csv,,1,ignore,ignore,,,,0,,,,,,, pvb_agg.csv,inputs/sets/pvb_agg.csv,1,ignore,ignore,,,,,0,,,set,pvb_agg,crosswalk between hybrid pv+battery configurations and technology options, -pvb_bir.csv,,1,ignore,ignore,,,,,0,,,,,, pvb_config.csv,inputs/sets/pvb_config.csv,1,ignore,ignore,,,,,,,,set,pvb_config,set of hybrid pv+battery configurations, -pvb_ilr.csv,,1,ignore,ignore,,,,,0,,,,,, -pvbcapcostmult.csv,,1,ignore,ignore,,,,0,0,,,,,, -r.csv,,1,first,ignore,0,,,0,,,1,,,,disaggfunc set to ignore because this file is dynamic to the user-defined spatial aggregation level -r_ba.csv,,1,ignore,ignore,,,,,,,,,,, -r_cendiv.csv,,1,ignore,ignore,,,,,,,,,,, -r_county.csv,,1,ignore,ignore,,,,,,,,,,, -r_cs.csv,,1,first,ignore,*r,cs,,0,0,,1,,,,disaggfunc set to ignore because the spatial aggregation level of this file is controlled by the agglevel switch -r_cs_distance_mi.csv,,1,mean,ignore,*r,cs,,0,0,,1,,,,disaggfunc set to ignore because the spatial aggregation level of this file is controlled by the agglevel switch -routes_adjacent.csv,,1,trans_lookup,ignore,"*r,rr",,,0,0,,1,,,,disaggfunc set to ignore because the spatial aggregation level of this file is controlled by the agglevel switch ramprate.csv,inputs/plant_characteristics/ramprate.csv,int(sw.GSw_OpRes) != 0,ignore,ignore,,,,,0,,,,,, ramptime.csv,inputs/reserves/ramptime.csv,int(sw.GSw_OpRes) != 0,ignore,ignore,,,,,0,,,,,, -rb.csv,,1,first,ignore,0,,,0,,,1,,,,disaggfunc set to ignore because this file is specifically a collection of all valid BA regions -rb_aggreg.csv,,1,ignore,ignore,,,,,,,,,,, recstyle.csv,inputs/state_policies/recstyle.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,*st,"RPSCat,style",,,0,,,,,, rectable.csv,inputs/state_policies/rectable.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,st_st,st,,,0,,,,,, regional_cap_cost_diff.csv,inputs/financials/reg_cap_cost_diff_{reg_cap_cost_diff_suffix}.csv,1,mean,ignore,r,wide,,1,0,,,,,,precursor data to reg_cap_cost_diff.csv -regions.csv,,1,ignore,ignore,,,,,0,,,,,,not done but only for retail resourceclass.csv,inputs/sets/resourceclass.csv,1,ignore,ignore,,,,,,,,set,resourceclass,renewable resource classes, -resources.csv,,1,resources,ignore,r,"i,ccreg",i,0,0,,1,,,,disaggfunc set to ignore because this file contains all spatial resolutions valid to the model run retire_penalty.csv,inputs/financials/retire_penalty.csv,1,ignore,ignore,,,,,0,,,,,, -retirements.csv,,1,sum,ignore,r,"t,i",,0,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py -retirements_energy.csv,,1,sum,ignore,r,"t,i",,0,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py rev_paths.csv,inputs/supply_curve/rev_paths.csv,1,ignore,ignore,,,,,0,,,,,, rev_transmission_basecost.csv,inputs/transmission/rev_transmission_basecost.csv,1,ignore,ignore,,,,,0,,,,,, rggi_states.csv,inputs/emission_constraints/rggi_states.csv,int(sw.GSw_RGGI) != 0,ignore,ignore,*st,,,,0,,,,,, rggicon.csv,inputs/emission_constraints/rggicon.csv,int(sw.GSw_RGGI) != 0,ignore,ignore,,,,0,,,,,,, rps_fraction.csv,inputs/state_policies/rps_fraction.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,st,st,,1,0,,,,,, -rsc_combined.csv,,1,sc_cat,ignore,r,"*i,rscbin",*i,0,0,value,1,,,,done for upv/csp/wind; disaggfunc set to ignore because supply curve data is already at county level -rsc_wsc.csv,,1,ignore,ignore,,,,,0,,,,,, sc_cat.csv,inputs/sets/sc_cat.csv,1,ignore,ignore,,,,,0,,,set,sc_cat,supply curve categories (capacity and cost), scalars.csv,inputs/scalars.csv,1,ignore,ignore,,,,,0,,1,,,, -set_actualszn.csv,,1,ignore,ignore,,,,,0,,,,,, -set_h.csv,,1,ignore,ignore,,,,,,,,,,, -set_szn.csv,,1,ignore,ignore,,,,,,,,,,, -site_bin_map.csv,,1,ignore,ignore,,,,,0,,,,,, -spur_parameters.csv,,1,ignore,ignore,,,,,0,,,,,,"TODO (only used for plotting so ignoring for now, but should be fixed)" -spurline_cost.csv,,1,ignore,ignore,,,,,0,,1,,,,Delete once aggregate_regions.py is moved up -spurline_sitemap.csv,,1,ignore,ignore,,,,,0,,,,,,handled in writesupplycurves.py startcost.csv,inputs/plant_characteristics/startcost.csv,int(sw.GSw_StartCost) != 0,ignore,ignore,,,*i,,0,,,,,, state_cap.csv,inputs/emission_constraints/state_cap.csv,int(sw.GSw_StateCap) != 0,ignore,ignore,*st,t,,0,0,,,,,, storage_duration.csv,inputs/storage/storage_duration.csv,int(sw.GSw_Storage) != 0,ignore,ignore,,,,,0,,,,,, -storage_duration_pshdata.csv,,1,ignore,ignore,r,"*i,v",,0,0,,1,,,, storage_mandates.csv,inputs/state_policies/storage_mandates.csv,int(sw.GSw_BatteryMandate) != 0,ignore,ignore,*st,t,,0,0,,,,,, -storinmaxfrac.csv,,1,ignore,ignore,r,"*i,v",,0,0,,1,,,, stressperiods_user.csv,inputs/temporal/stressperiods_{GSw_PRM_StressModel}.csv,sw.GSw_PRM_StressModel == 'user*',ignore,ignore,,,,,,,,,,, -stressperiods_seed.csv,,1,ignore,ignore,,,,,0,,,,,, supply_chain_adjust.csv,inputs/financials/supply_chain_adjust.csv,1,ignore,ignore,,,,,0,,,,,, supplycurve_egs.csv,inputs/supply_curve/supplycurve_egs-reference.csv,int(sw.GSw_Geothermal) != 0,ignore,ignore,region,,,,0,,,,,,No fix_col (only region_col) because this is an intermediate input file supplycurve_upv.csv,inputs/supply_curve/supplycurve_upv-{GSw_SitingUPV}.csv,1,ignore,ignore,region,,,,0,,,,,,No fix_col (only region_col) because this is an intermediate input file supplycurve_wind-ofs.csv,inputs/supply_curve/supplycurve_wind-ofs-{GSw_SitingWindOfs}.csv,int(sw.GSw_OfsWind) != 0,ignore,ignore,region,,,,0,,,,,,No fix_col (only region_col) because this is an intermediate input file supplycurve_wind-ons.csv,inputs/supply_curve/supplycurve_wind-ons-{GSw_SitingWindOns}.csv,1,ignore,ignore,region,,,,0,,,,,,No fix_col (only region_col) because this is an intermediate input file wst_surface.csv,inputs/sets/wst_surface.csv,1,ignore,ignore,,,,,0,,,set,wst_surface,surface water types where access is based on consumption not withdrawal, -switches.csv,,1,ignore,ignore,,,,,0,,,,,, -szn_actualszn.csv,,1,ignore,ignore,,,,,0,,,,,, tc_phaseout_schedule.csv,inputs/financials/tc_phaseout_schedule_{GSw_TCPhaseout_schedule}.csv,int(sw.GSw_TCPhaseout) != 0,ignore,ignore,,,,,0,,,,,, tech-subset-table.csv,inputs/tech-subset-table.csv,1,ignore,ignore,,,,,0,,,,,, tech_resourceclass.csv,inputs/techs/tech_resourceclass.csv,1,ignore,ignore,,,,,0,,,,,, @@ -405,74 +242,38 @@ techs_banned.csv,inputs/state_policies/techs_banned.yaml,1,ignore,ignore,wide,i, techs_banned_ces.csv,inputs/state_policies/techs_banned_ces.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,wide_st,i,,,0,,,,,, techs_banned_imports_rps.csv,inputs/state_policies/techs_banned_imports_rps.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,wide_st,i,,,0,,,,,, techs_banned_rps.csv,inputs/state_policies/techs_banned_rps.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,wide_st,i,,,0,,,,,, -temp_UnappWaterMult.csv,,1,ignore,ignore,,,,,0,,1,,,,intermediate file created in climateprep.py to create climate_UnappWaterMult.csv -temp_UnappWaterSeaAnnDistr.csv,,1,ignore,ignore,,,,,0,,1,,,,intermediate file created in climateprep.py to create climate_UnappWaterSeaAnnDistr.csv -temp_hydadjsea.csv,,1,ignore,ignore,,,,,0,,1,,,,intermediate file created in climateprep.py to create climate_hydadjsea.csv tg_rsc_cspagg.csv,inputs/sets/tg_rsc_cspagg.csv,1,ignore,ignore,,,,,0,,,set,tg_rsc_cspagg,csp technologies that belong to the same class, tg_rsc_cspagg_tmp.csv,inputs/waterclimate/tg_rsc_cspagg_tmp.csv,(int(sw.GSw_CSP) != 0) and (int(sw.GSw_WaterMain) != 0),ignore,ignore,,,,,0,,,,,, tg_rsc_upvagg.csv,inputs/sets/tg_rsc_upvagg.csv,1,ignore,ignore,,,,,0,,,set,tg_rsc_upvagg,pv and pvb technologies that belong to the same class, -timestamps.csv,,1,ignore,ignore,,,,,0,,,,,, -trancap_fut.csv,,1,sum,ignore,"*r,rr","status,trtype,t",,0,0,drop_dup_r,1,,,,disaggfunc set to ignore because all transmisison data will be read into model at the correct spatial resolution trancap_fut_cat.csv,inputs/sets/trancap_fut_cat.csv,1,ignore,ignore,,,,,,,,set,trancap_fut_cat,categories of near-term transmission projects that describe the likelihood of being completed, -trancap_init_energy.csv,,1,ignore,ignore,"*r,rr",trtype,,0,0,drop_dup_r,1,,,, -trancap_init_prm.csv,,1,ignore,ignore,"*r,rr",trtype,,0,0,drop_dup_r,1,,,, -trancap_init_transgroup.csv,,1,ignore,ignore,,,,0,0,,,,,, -trancap_init_itlgrp.csv,,1,ignore,ignore,,,,0,0,,,,,, -tranloss.csv,,1,trans_lookup,ignore,"*r,rr",trtype,,0,0,drop_dup_r,1,,,,disaggfunc set to ignore because all transmisison data will be read into model at the correct spatial resolution -trans_itc_fractions.csv,,1,ignore,ignore,,,,,0,,,,,, transmission_capacity_future.csv,inputs/transmission/transmission_capacity_future_{lvl}_{GSw_TransScen}.csv,1,sum,ignore,"r,rr","status,trtype,t",,0,0,drop_dup_r,,,,,'ignore’ in disaggfunc because all transmisison data will be read into model at appropriate spatial resolution transmission_capacity_future_baseline.csv,inputs/transmission/transmission_capacity_future_{lvl}_baseline.csv,1,sum,ignore,"r,rr","status,trtype,t",,0,0,drop_dup_r,,,,,'ignore’ in disaggfunc because all transmisison data will be read into model at appropriate spatial resolution transmission_cost_ac.csv,inputs/transmission/transmission_cost_ac_{GSw_TransUpgradeMethod}_{lvl}.h5,1,trans_lookup,ignore,"r,rr",tscbin,,0,0,drop_dup_r,,,,, transmission_cost_dc.csv,inputs/transmission/transmission_cost_dc_{lvl}.csv,1,trans_lookup,ignore,"r,rr",,,0,0,drop_dup_r,,,,, transmission_distance.csv,inputs/transmission/transmission_distance_{lvl}.h5,1,trans_lookup,ignore,"r,rr",,,0,0,drop_dup_r,,,,,Stored in wide-format h5 to reduce county filesize but converted to long in copy_files.py -transmission_line_fom.csv,,1,trans_lookup,ignore,"*r,rr",trtype,,0,0,drop_dup_r,1,,,,disaggfunc set to ignore because all transmisison data will be read into model at the correct spatial resolution trtype.csv,inputs/sets/trtype.csv,1,ignore,ignore,,,,,,,,set,trtype,transmission capacity type, -unapp_water_sea_distr.csv,inputs/waterclimate/unapp_water_sea_distr.csv,int(sw.GSw_WaterMain) != 0,mean,geosize,r,"wst,wide",,1,0,,,,,, +unapp_water_sea_distr.csv,inputs/waterclimate/unapp_water_sea_distr.csv,int(sw.GSw_WaterMain) != 0,mean,uniform,r,"wst,wide",,1,0,,,,,, UnappWaterMult.csv,inputs/climate/{climatescen}/UnappWaterMult.csv,int(sw.GSw_ClimateWater) != 0,mean,uniform,r,"wst,month,t",,0,0,,,,,, UnappWaterMultAnn.csv,inputs/climate/{climatescen}/UnappWaterMultAnn.csv,int(sw.GSw_ClimateWater) != 0,mean,uniform,r,"wst,t",,0,0,,,,,, UnappWaterSeaAnnDistr.csv,inputs/climate/{climatescen}/UnappWaterSeaAnnDistr.csv,int(sw.GSw_ClimateWater) != 0,mean,uniform,r,"wst,month,t",,0,0,,,,,, unbundled_limit_ces.csv,inputs/state_policies/unbundled_limit_ces.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,st,,,,0,,,,,, unbundled_limit_rps.csv,inputs/state_policies/unbundled_limit_rps.csv,int(sw.GSw_StateRPS) != 0,ignore,ignore,st,,,,0,,,,,, unitdata.csv,inputs/capacity_exogenous/ReEDS_generator_database_final_{unitdata}.csv,1,ignore,ignore,FIPS,,,,0,,,,,, -unitsize.csv,,1,ignore,ignore,,,,,0,,1,,,, unitspec_upgrades.csv,inputs/sets/unitspec_upgrades.csv,1,ignore,ignore,,,,,0,,,set,unitspec_upgrades,upgraded technologies that get unit-specific characteristics, -upgrade_costs_ccs_coal.csv,,1,ignore,ignore,,,,,0,,,,,, -upgrade_costs_ccs_gas.csv,,1,ignore,ignore,,,,,0,,,,,, upgrade_hintage_char.csv,inputs/sets/upgrade_hintage_char.csv,1,ignore,ignore,,,,,0,,,set,upgrade_hintage_char,sets to operate over in extension of hintage_data characteristics when sw_upgrades = 1, upgrade_link.csv,inputs/upgrades/upgrade_link.csv,1,ignore,ignore,,,,,0,,,,,, upgrade_mult_advanced.csv,inputs/upgrades/upgrade_mult_atb23_ccs_mid.csv,int(sw.GSw_UpgradeCost_Mult) != 0,ignore,ignore,,,,,0,,,,,, upgrade_mult_conservative.csv,inputs/upgrades/upgrade_mult_atb23_ccs_con.csv,int(sw.GSw_UpgradeCost_Mult) == 2,ignore,ignore,,,,,0,,,,,, -upgrade_mult_final.csv,,1,ignore,ignore,,,,,0,,,,,, upgrade_mult_mid.csv,inputs/upgrades/upgrade_mult_atb23_ccs_mid.csv,"int(sw.GSw_UpgradeCost_Mult) in [0, 4]",ignore,ignore,,,,,0,,,,,, upgradelink_water.csv,inputs/upgrades/upgradelink_water.csv,1,ignore,ignore,,,,,0,,,,,, uranium_price.csv,inputs/fuelprices/uranium_{uraniumscen}.csv,1,ignore,ignore,,,,0,0,,,,,, -va_ng_crf_penalty.csv,,1,ignore,ignore,,,,0,0,,,,,, -val_ba.csv,,1,ignore,ignore,,,,,,,1,,,, -county.csv,,1,ignore,ignore,,,,0,none,,,,,, -val_cs.csv,,1,ignore,ignore,,,,0,none,,,,,, -val_r_all.csv,,1,ignore,ignore,,,,0,none,,,,,, var_map.csv,inputs/valuestreams/var_map.csv,1,ignore,ignore,,,,,0,,,,,, wat_access_cap_cost.csv,inputs/waterclimate/wat_access_cap_cost.csv,int(sw.GSw_WaterMain) != 0,sc_cat,geosize,r,*wst,,0,0,value,,,,, -water_req_psh_10h_1_51.csv,inputs/waterclimate/water_req_psh_10h_1_51.csv,(int(sw.GSw_PSHwatercon) != 0) and (int(sw.GSw_WaterMain) != 0),mean,geosize,r,wide,,1,0,,,,,,not yet ready for county-level disaggregation +water_req_psh_10h_1_51.csv,inputs/waterclimate/water_req_psh_10h_1_51.csv,(int(sw.GSw_PSHwatercon) != 0) and (int(sw.GSw_WaterMain) != 0),sum,geosize,r,wide,,1,0,,,,,,not yet ready for county-level disaggregation water_with_cons_rate.csv,inputs/waterclimate/water_with_cons_rate.csv,int(sw.GSw_WaterMain) != 0,ignore,ignore,,,,,0,,,,,, -wind_retirements.csv,,1,sum,ignore,r,"i,v,wide",,1,0,,1,,,,disaggfunc set to ignore because data will be written at county resolution by writecapdat.py -windcfmult.csv,,1,ignore,ignore,,,,1,0,,,,,, -windcfout.csv,,1,ignore,ignore,,,,1,0,,,,,, windows.csv,inputs/userinput/windows_{windows_suffix}.csv,1,ignore,ignore,,,,,0,,,,,, wst_climate.csv,inputs/sets/wst_climate.csv,1,ignore,ignore,,,,,0,,,set,wst_climate,, -x.csv,,1,ignore,ignore,,,,,0,,,,,, -x_r.csv,,1,first,ignore,"*x,r",,,0,0,,1,,,,handled in writesupplycurves.py yearafter.csv,inputs/sets/yearafter.csv,1,ignore,ignore,,,,,,,,set,yearafter,set to loop over for the final year calculation, -inputs.gdx,,1,ignore,ignore,,,,,0,,,,,, -inputs.h5,,1,ignore,ignore,,,,,0,,,,,, -plexos_inputs.gdx,,1,ignore,ignore,,,,,0,,,,,, -load.h5,,1,ignore,ignore,wide,"year,hour",,1,keepindex,,1,,,,Disaggregation and aggregation handled in hourly_load.py -recf.h5,,1,recf,ignore,wide,datetime,,1,keepindex,,1,,,, -csp.h5,,1,csp,ignore,wide,datetime,,1,keepindex,,1,,,, -gswitches.txt,,1,ignore,ignore,,,,,0,,,,,, -scalars.txt,,1,ignore,ignore,,,,,0,,,,,, Project.toml,Project.toml,1,ignore,ignore,,,,,,,,,,, gamslice.txt,gamslice.txt,0,ignore,ignore,,,,,,,,,,, -max_hintage_number.txt,,1,ignore,ignore,,,,,0,,,,,, -mcs_latin_hypercube_samples.csv,,0,ignore,ignore,,,,,,,,,,, runreeds.py,runreeds.py,1,ignore,ignore,,,,,,,,,,, diff --git a/reeds/io.py b/reeds/io.py index 9d35ab7b..994431f1 100644 --- a/reeds/io.py +++ b/reeds/io.py @@ -413,10 +413,23 @@ def get_dfmap(case=None, levels=None, exclude_water_areas=False): return dfmap -def get_disagg_data(case, disagg_variable='population'): +def get_disagg_data( + case: str | Path, + disagg_variable: Literal['hydroexist', 'geosize', 'population', 'state_lpf'] +): """ Get state/region-to-county disaggregation factors for the given variable. """ + if disagg_variable not in [ + 'hydroexist', + 'geosize', + 'population', + 'state_lpf' + ]: + raise NotImplementedError( + f"'{disagg_variable}' is not a valid disagg variable." + ) + return pd.read_csv( os.path.join(case, 'inputs_case', f'disagg_{disagg_variable}.csv') ) diff --git a/reeds/spatial.py b/reeds/spatial.py index 74f0392c..cbaa0a9b 100644 --- a/reeds/spatial.py +++ b/reeds/spatial.py @@ -3,6 +3,7 @@ import pandas as pd import geopandas as gpd from pathlib import Path +from typing import Literal sys.path.append(str(Path(__file__).parent.parent)) import reeds @@ -215,3 +216,355 @@ def get_map(resolution='county', source='tiger', crs='ESRI:102008'): df = df.set_index(index[resolution]).copy() return df + + +def apply_uniform_disaggregation( + df: pd.DataFrame, + region_col: str +): + """ + Disaggregate a dataframe whose regional scope is the 134 legacy + zones (as specified in the dataframe's 'region_col' column) to + the county level by copying the zonal values to their constituent counties. + """ + county2zone = reeds.io.get_county2zone(GSw_ZoneSet='z134', as_map=False) + county2zone['FIPS'] = 'p' + county2zone.FIPS + df = ( + df + .merge(county2zone[['r', 'FIPS']], left_on=region_col, right_on='r') + .drop(columns=[region_col, 'r']) + .rename(columns={'FIPS': region_col}) + [df.columns] + ) + + return df + + +def apply_variable_disaggregation( + df: pd.DataFrame, + region_col: str, + fix_cols: list[str], + inputs_case: str, + disagg_variable: Literal['hydroexist', 'geosize', 'population', 'state_lpf'] +): + """ + Disaggregate a dataframe whose regional scope is the 134 legacy zones + (as specified in the dataframe's 'region_col' column) to the county level + by allocating zonal values to their corresponding counties according to + each county's share of 'disagg_variable'. + + The county shares of 'disagg_variable' are read from the provided + 'inputs_case' folder. The 'fix_cols' argument lists columns that + should be considered part of the dataframe's index. + """ + # Save the dataframe's original columns + # (used later to put the output in the correct format) + original_columns = df.columns + + # Get legacy zone-to-county allocation factors for disagg_variable + disagg_data = reeds.io.get_disagg_data( + os.path.dirname(inputs_case), + disagg_variable + ) + + # Append the allocation factors to the dataframe + if disagg_variable == 'hydroexist': + df = df.merge( + disagg_data, + left_on=[region_col, 'i'], + right_on=['PCA_REG', 'i'] + ) + else: + df = df.merge( + disagg_data[['PCA_REG', 'FIPS', 'fracdata']], + left_on=region_col, + right_on='PCA_REG' + ) + + # Replace legacy zones in region_col with the county FIPS codes + df = ( + df.drop(columns=[region_col, 'PCA_REG']) + .rename(columns={'FIPS': region_col}) + ) + + # If the dataframe values are 'wide', set the dataframe index + # and then multiply all values by their allocation factor. + # Otherwise, multiply the 'value' and allocation factor columns. + if 'wide' in fix_cols: + index_cols = ( + [col for col in fix_cols if col in original_columns] + + [region_col] + ) + df = df.set_index(index_cols) + df = ( + df.mul(df['fracdata'], axis='index') + .reset_index() + [original_columns] + ) + else: + df = ( + df.assign(value=lambda x: x['value'] * x['fracdata']) + [original_columns] + ) + + return df + + +def apply_supply_curve_disaggregation( + df: pd.DataFrame, + region_col: str, + fix_cols: list[str], + inputs_case: str, + disagg_variable: Literal['hydroexist', 'geosize', 'population', 'state_lpf'] +): + """ + Disaggregate a supply curve dataframe whose regional scope is the 134 + legacy zones (as specified in the dataframe's 'region_col' column) to + the county level by allocating zonal capacities to their corresponding + counties according to each county's share of 'disagg_variable' and + assigning zonal costs to counties uniformly. + + The county shares of 'disagg_variable' are read from the provided + 'inputs_case' folder. The 'fix_cols' argument lists columns that + should be considered part of the dataframe's index. + """ + # Disaggregate zonal capacities to counties according to disagg_variable + df_cap = df.loc[df['sc_cat'] == 'cap'].drop(columns='sc_cat') + df_cap = apply_variable_disaggregation( + df_cap, + region_col, + fix_cols, + inputs_case, + disagg_variable + ) + + # Disaggregate zonal costs to counties uniformly + df_cost = df.loc[df['sc_cat'] == 'cost'].drop(columns='sc_cat') + df_cost = apply_uniform_disaggregation(df_cost, region_col) + + # Combine capacities and costs and return to original format + df = ( + pd.concat( + [df_cap.assign(sc_cat='cap'), df_cost.assign(sc_cat='cost')], + ignore_index=True + ) + [df.columns] + ) + + return df + + +def downscale_from_legacy_zone_to_county( + df: pd.DataFrame, + region_col: str, + fix_cols: list[str], + inputs_case: str, + disaggfunc: str +): + """ + Disaggregate a dataframe whose regional scope is the 134 legacy zones + (as specified in the dataframe's 'region_col' column) to the county level + according to the specified disaggregation function ('disaggfunc'). + + If 'disaggfunc' is a variable by which zonal values are allocated to + counties (e.g., population), county shares of the variable are read from + the provided 'inputs_case' folder. The 'fix_cols' argument lists columns + that should be considered part of the dataframe's index. + """ + # If 'region_col' is 'wide' (zones are contained in the column headers + # rather than one of the columns), temporarily reformat the dataframe + # to 'long' format, specifying zones in a new 'r' column + if region_col == 'wide': + df = pd.melt(df, id_vars=fix_cols, var_name='r') + region_col = 'r' + # Indicate that the dataframe should be reformatted + # to its original (wide) format after disaggregation + reformat_to_wide = True + else: + reformat_to_wide = False + + # Apply disaggregation according to 'disaggfunc' + match disaggfunc: + case 'uniform': + df = apply_uniform_disaggregation(df, region_col) + case 'geosize' | 'hydroexist' | 'population' | 'state_lpf': + if 'sc_cat' in df.columns: + df = apply_supply_curve_disaggregation( + df, + region_col, + fix_cols, + inputs_case, + disaggfunc + ) + else: + df = apply_variable_disaggregation( + df, + region_col, + fix_cols, + inputs_case, + disaggfunc + ) + case 'ignore': + pass + case _: + raise NotImplementedError( + f"Disaggfunc '{disaggfunc}' has not been implemented." + ) + + # If applicable, restore original (wide) format + if reformat_to_wide: + df = ( + pd.pivot_table(df, values='value', index=fix_cols, columns=['r']) + .reset_index() + .rename_axis('', axis=1) + ) + + return df + + +def apply_supply_curve_aggregation( + df: pd.DataFrame, + region_col: str, + groupby_cols: list[str], + county_r_map: dict[str, str], +): + """ + Aggregate a supply curve dataframe with county-level regional scope + (as specified in the dataframe's 'region_col' column) to the zone level + (using zones corresponding to 'county_r_map') by combining capacities + via sum and combining costs via capacity-weighted average. The + 'groupby_cols' argument lists columns that should be grouped before + aggregation. + """ + # Save the dataframe's original columns + # (used later to put the output in the correct format) + original_columns = df.columns + + # Reformat dataframe so that capacities and costs are listed + # side-by-side for each group (based on 'groupby_cols') + df_cap = ( + df.loc[df.sc_cat == 'cap'] + .drop(columns='sc_cat') + .set_index(groupby_cols) + ['value'] + .rename('cap') + ) + df_cost = ( + df.loc[df.sc_cat == 'cost'] + .drop(columns='sc_cat') + .set_index(groupby_cols) + ['value'] + .rename('cost') + ) + df = ( + pd.concat([df_cap, df_cost], axis=1) + .reset_index() + ) + + # Calculate products of weights (capacities) and costs, which will be + # used to calculate capacity-weighted costs. Where capacities are + # null or 0, we use 1 MW as the weight. + df['cap_weight'] = df['cap'].fillna(1).replace(0, 1) + df['cap_weight_times_cost'] = df['cap_weight'] * df['cost'] + + # Map counties to zones + df[region_col] = df[region_col].map(county_r_map) + + # Calculate capacity/cost/weight totals for each group + # and then divide total weight*cost by total weight to derive + # capacity-weighted cost for each group. + df = ( + df.groupby(groupby_cols) + .sum() + .assign(cost=lambda x: x['cap_weight_times_cost'] / x['cap_weight']) + .reset_index() + .drop(columns=['cap_weight', 'cap_weight_times_cost']) + ) + + # Restore original format + df = pd.melt( + df, + id_vars=groupby_cols, + value_vars=['cap', 'cost'], + var_name='sc_cat' + ) + df = df[original_columns] + + return df + + +def upscale_from_county_to_zone( + df: pd.DataFrame, + region_col: str, + fix_cols: list[str], + inputs_case: str, + aggfunc: str +): + """ + Aggregate a dataframe with county-level regional scope (as specified in + the dataframe's 'region_col' column) to the zone level (using zones + corresponding to 'inputs_case') according to the specified aggregation + function ('aggfunc'). The 'fix_cols' argument lists columns that should + be considered part of the dataframe's index. + """ + # If 'region_col' is 'r_cendiv', counties are stored in the 'r' column + # and dataframe values are stored in columns whose headers are cendivs, + # so for this function, 'r' can be treated as the region column. + if region_col == 'r_cendiv': + region_col = 'r' + + # If 'region_col' is 'wide' (zones are contained in the column headers + # rather than one of the columns), temporarily reformat the dataframe + # to 'long' format, specifying zones in a new 'r' column + if region_col == 'wide': + df = pd.melt(df, id_vars=fix_cols, var_name='r') + region_col = 'r' + # Indicate that the dataframe should be reformatted + # to its original (wide) format after aggregation + reformat_to_wide = True + else: + reformat_to_wide = False + + # Identify columns that should be grouped before aggregation + groupby_cols = ( + [col for col in fix_cols if col in df.columns] + + [region_col] + ) + + # Get county-to-zone map + county_r_map = reeds.io.get_county2zone(os.path.dirname(inputs_case)) + county_r_map.index = 'p' + county_r_map.index.str.zfill(5) + + # Apply aggregation according to 'aggfunc' + match aggfunc: + case 'sc_cat': + df = apply_supply_curve_aggregation( + df, + region_col, + groupby_cols, + county_r_map + ) + case 'sum' | 'mean': + df[region_col] = df[region_col].map(county_r_map) + df = ( + df.groupby(groupby_cols, as_index=False) + .agg(aggfunc) + [df.columns] + ) + case 'ignore': + pass + case _: + raise NotImplementedError( + f"Aggfunc '{aggfunc}' has not been implemented." + ) + + + # If applicable, restore original (wide) format + if reformat_to_wide: + df = ( + pd.pivot_table(df, values='value', index=fix_cols, columns=['r']) + .reset_index() + .rename_axis('', axis=1) + ) + + return df \ No newline at end of file