From 24bc66c018f566ec11c8035c34b4f5546a3e236e Mon Sep 17 00:00:00 2001
From: kodiobika <kodiobika@gmail.com>
Date: Fri, 24 Apr 2026 16:31:13 -0400
Subject: [PATCH] Migrate reVeal2ReEDS pipeline to hourlize

---
 hourlize/inputs/configs/config_base.json |   1 +
 hourlize/load.py                         |  24 +++
 hourlize/reveal2reeds/config.json        |   8 +
 hourlize/reveal2reeds/reveal2reeds.py    | 239 +++++++++++++++++++++++
 4 files changed, 272 insertions(+)
 create mode 100644 hourlize/reveal2reeds/config.json
 create mode 100644 hourlize/reveal2reeds/reveal2reeds.py

diff --git a/hourlize/inputs/configs/config_base.json b/hourlize/inputs/configs/config_base.json
index 1409d39a..3f840d3f 100644
--- a/hourlize/inputs/configs/config_base.json
+++ b/hourlize/inputs/configs/config_base.json
@@ -51,6 +51,7 @@
       "2045": 1,
       "2050": 1
     },
+    "custom_data_center_projection_years": [2025, 2030, 2035, 2040, 2045, 2050],
     "scenarios": ["IRA cons", "central", "baseline"],
     "sector_config_file": "{hourlize_path}/inputs/load/sector_config.json",
     "weather_years": [2007,2008,2009,2010,2011,2012,2013,2016,2017,2018,2019,2020,2021,2022,2023]
diff --git a/hourlize/load.py b/hourlize/load.py
index 0bc27802..97cbb4ed 100644
--- a/hourlize/load.py
+++ b/hourlize/load.py
@@ -9,6 +9,19 @@
 import pandas as pd
 import site
 from types import SimpleNamespace
+from reveal2reeds import reveal2reeds
+
+def get_reveal2reeds_config() -> dict:
+    configpath = "reveal2reeds/config.json"
+    with open(configpath, "r") as f:
+        config = json.load(f, object_pairs_hook=OrderedDict)
+    reveal2reeds_config = SimpleNamespace(**config)
+    reveal2reeds_config.cooling_proportions_source = (
+        reveal2reeds_config.cooling_proportions_source
+        .format(scenario=reveal2reeds_config.scenario)
+    )
+
+    return reveal2reeds_config
 
 def get_state_name_code_map(reeds_path: str) -> dict:
     """
@@ -269,6 +282,17 @@ def create_hourly_state_load_for_model_year(
         compression='gzip',
         parse_dates=['weather_datetime']
     )
+
+    # If applicable, replace data center cooling and IT projections with
+    # custom projections specified in reveal2reeds/config.json
+    if model_year in cf.custom_data_center_projection_years:
+        reveal2reeds_config = get_reveal2reeds_config()
+        df_load = reveal2reeds.apply_custom_data_center_demand_projections(
+            df_load,
+            model_year,
+            reveal2reeds_config
+        )
+
     # Downselect to specified weather years
     df_load = df_load.loc[df_load.weather_datetime.dt.year.isin(weather_years)]
 
diff --git a/hourlize/reveal2reeds/config.json b/hourlize/reveal2reeds/config.json
new file mode 100644
index 00000000..6eedc7c0
--- /dev/null
+++ b/hourlize/reveal2reeds/config.json
@@ -0,0 +1,8 @@
+{
+    "national_demand_source": "/projects/largeload/geospatial/runs/random_forest_base_weights_01_09_2026/downscaling_2026-01-07_agg64/eer_national_central/eer_national_central_downscaled_projections.csv",
+    "cooling_proportions_source": "/projects/largeload/reVeal2ReEDS/files/{scenario}_dc_cooling_prop.csv",
+    "propagation_source": "/projects/largeload/reVeal2ReEDS/files/weather_year_propagation.csv",
+    "replace_existing_data_center_demand": true,
+    "scenario": "central",
+    "state_proportions_source": "/projects/eerload/source_eer_load_profiles/20250512_eer_download/shape_outputs_2025-05-12/annual_files/data center load allocation ADP 2024.xlsx"
+}
\ No newline at end of file
diff --git a/hourlize/reveal2reeds/reveal2reeds.py b/hourlize/reveal2reeds/reveal2reeds.py
new file mode 100644
index 00000000..03331fd6
--- /dev/null
+++ b/hourlize/reveal2reeds/reveal2reeds.py
@@ -0,0 +1,239 @@
+import numpy as np
+import pandas as pd
+
+def get_national_model_year_data_center_demand(
+    national_demand_source_path: str,
+    model_year: int
+) -> int:
+    data_center_demand = pd.read_csv(national_demand_source_path)
+    model_year_data_center_demand = (
+        data_center_demand.loc[(
+            data_center_demand.year == model_year
+        )]
+        .copy()
+    )
+    national_model_year_data_center_demand = (
+        model_year_data_center_demand['total_data_center_mw'].sum()
+    )
+
+    return national_model_year_data_center_demand
+
+def get_propagation_by_weather_year(
+    propagation_source_path: str,
+    scenario: str
+) -> pd.Series:
+    propagation_by_weather_year = pd.read_csv(propagation_source_path)
+    propagation_by_weather_year = (
+        propagation_by_weather_year.loc[(
+            propagation_by_weather_year.scenario == scenario
+        )]
+        .set_index('year')
+        ['avg_prop']
+    )
+
+    return propagation_by_weather_year
+    
+
+def calculate_national_data_center_demand_hourly(
+    df_load: pd.DataFrame,
+    model_year: int,
+    scenario: str,
+    national_demand_source_path: str,
+    propagation_source_path: str
+):
+    # Calculate national projected data center demand for the model year
+    national_data_center_demand = get_national_model_year_data_center_demand(
+        national_demand_source_path,
+        model_year
+    )
+
+    # Get propagation factors by weather year for the given scenario.
+    # Propagation factors represent the percentage of projected national
+    # data center demand for the model year that is expected to be
+    # realized during each hour of each weather year.
+    propagation_by_weather_year = get_propagation_by_weather_year(
+        propagation_source_path,
+        scenario
+    )
+
+    # Estimate national hourly load values for each weather year
+    # by multiplying the propagation factors by national data
+    # center demand for the model year.
+    national_data_center_demand_hourly = pd.DataFrame(
+        index=df_load['weather_datetime'].drop_duplicates()
+    )
+    national_data_center_demand_hourly['propagation_factor'] = (
+        national_data_center_demand_hourly.index.year
+        .map(propagation_by_weather_year)
+    )
+    national_data_center_demand_hourly['demand_MW'] = (
+        national_data_center_demand_hourly['propagation_factor']
+        * national_data_center_demand
+    )
+    national_data_center_demand_hourly = (
+        national_data_center_demand_hourly['demand_MW']
+    )
+
+    return national_data_center_demand_hourly
+
+def get_data_center_cooling_weights(
+    cooling_proportions_source_path: str
+) -> pd.DataFrame:
+    state_cooling_weights = pd.read_csv(cooling_proportions_source_path)
+    state_cooling_weights["weather_datetime"] = (
+        pd.to_datetime(state_cooling_weights["weather_datetime"])
+    )
+    national_cooling_weights = (
+        state_cooling_weights.groupby("weather_datetime")
+        ["cooling_prop"]
+        .mean()
+    )
+
+    return national_cooling_weights
+
+def get_data_center_state_weights(
+    state_proportions_source_path: str,
+    model_year: int,
+    scenario: str
+) -> pd.DataFrame:
+    data_center_year = 2024 if model_year == 2025 else model_year
+    state_weights = pd.read_excel(state_proportions_source_path)
+    state_weights = (
+        state_weights.loc[
+            (state_weights['Run Name'] == scenario)
+            & (state_weights['Year'] == data_center_year)
+        ]
+        .set_index('State')
+        ["% of Total Data Center Load"]
+    )
+
+    return state_weights
+
+
+def apply_state_and_subsector_weights(
+    national_demand: pd.DataFrame,
+    state_weights: pd.Series,
+    subsector_weights: pd.Series,
+    subsector: str,
+):
+    national_subsector_demand = national_demand * subsector_weights
+    state_subsector_demand = pd.DataFrame(
+        np.outer(national_subsector_demand, state_weights),
+        index=national_subsector_demand.index,
+        columns=state_weights.index
+    )
+    state_subsector_demand = (
+        state_subsector_demand.reset_index()
+        .assign(
+            sector='commercial',
+            subsector=subsector,
+            dispatch_feeder='Commercial'
+        )
+        .rename_axis(columns='')
+    )
+
+    return state_subsector_demand
+
+def calculate_state_subsector_data_center_demand_hourly(
+    df_load: pd.DataFrame,
+    model_year: int,
+    scenario: str,
+    national_demand_source_path: str,
+    cooling_proportions_source_path: str,
+    propagation_source_path: str,
+    state_proportions_source_path: str
+) -> pd.DataFrame:
+    # Calculate hourly national data center demand
+    national_data_center_demand_hourly = (
+        calculate_national_data_center_demand_hourly(
+            df_load,
+            model_year,
+            scenario,
+            national_demand_source_path,
+            propagation_source_path
+        )
+    )
+    # Calculate proportion of national demand attributable to each state
+    state_weights = get_data_center_state_weights(
+        state_proportions_source_path,
+        model_year,
+        scenario
+    )
+    state_weights = state_weights.loc[state_weights.index.isin(df_load.columns)]
+    # Get proportion of hourly demand attributable to cooling
+    data_center_cooling_weights = get_data_center_cooling_weights(
+        cooling_proportions_source_path
+    )
+    # Calculate state-by-state hourly demand for data center cooling subsector
+    state_data_center_cooling_demand_hourly = apply_state_and_subsector_weights(
+        national_demand=national_data_center_demand_hourly,
+        state_weights=state_weights,
+        subsector_weights=data_center_cooling_weights,
+        subsector='data center cooling',
+    )
+    # Calculate state-by-state hourly demand for data center IT subsector
+    data_center_it_weights = 1 - data_center_cooling_weights
+    state_data_center_it_demand_hourly = apply_state_and_subsector_weights(
+        national_demand=national_data_center_demand_hourly,
+        state_weights=state_weights,
+        subsector_weights=data_center_it_weights,
+        subsector='data center it',
+    )
+    # Concatenate all state subsector-level demand
+    state_subsector_data_center_demand_hourly = (
+        pd.concat(
+            [
+                state_data_center_cooling_demand_hourly,
+                state_data_center_it_demand_hourly
+            ],
+            ignore_index=True
+        )
+        .fillna(0)
+    )
+    return state_subsector_data_center_demand_hourly
+
+def apply_custom_data_center_demand_projections(
+    df_load: pd.DataFrame,
+    model_year: int,
+    cf: dict
+):
+    state_subsector_data_center_demand_hourly = (
+        calculate_state_subsector_data_center_demand_hourly(
+            df_load,
+            model_year,
+            cf.scenario,
+            cf.national_demand_source,
+            cf.cooling_proportions_source,
+            cf.propagation_source,
+            cf.state_proportions_source
+        )
+    )
+
+    if cf.replace_existing_data_center_demand:
+        data_center_subsectors = ['data center cooling', 'data center it']
+        df_load = pd.concat(
+            [
+                df_load.loc[~df_load.subsector.isin(data_center_subsectors)],
+                state_subsector_data_center_demand_hourly
+            ],
+            ignore_index=True
+        )
+    else:
+        df_load = (
+            pd.concat(
+                [df_load, state_subsector_data_center_demand_hourly],
+                ignore_index=True
+            )
+            .groupby(
+                [
+                    'weather_datetime',
+                    'sector',
+                    'subsector',
+                    'dispatch_feeder'
+                ],
+                as_index=False
+            )
+            .sum(numeric_only=True)
+        )
+
+    return df_load
\ No newline at end of file