From 750eccce968bfdf1da4d78828dca6fceec64467d Mon Sep 17 00:00:00 2001 From: paulf81 Date: Wed, 8 Apr 2026 14:37:06 -0600 Subject: [PATCH 01/19] interpolate with midpoint correction --- hercules/utilities.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/hercules/utilities.py b/hercules/utilities.py index 401e15bd..20b744d1 100644 --- a/hercules/utilities.py +++ b/hercules/utilities.py @@ -479,7 +479,16 @@ def interpolate_df(df, new_time): def _interpolate_with_polars(df, new_time, datetime_cols, numeric_cols): - """Interpolate using Polars backend. + """Interpolate using Polars backend with midpoint correction. + + Numeric columns are assumed to represent period-averaged values whose + timestamps mark the start of each period. To recover the best estimate + of the instantaneous value at a query time, each value is assigned to the + midpoint of its interval before interpolating. + + Datetime columns (e.g. ``time_utc``) are instantaneous coordinates — they + map simulation time to wall-clock time directly — so they are interpolated + without the midpoint shift. Args: df (pd.DataFrame): Input DataFrame. @@ -512,8 +521,10 @@ def _interpolate_with_polars(df, new_time, datetime_cols, numeric_cols): time_values = col_data["time"].to_numpy() col_values = col_data[col].to_numpy() + midpoints = _compute_interval_midpoints(time_values) + # Linear interpolation with float32 precision - interpolated_values = np.interp(new_time, time_values, col_values).astype( + interpolated_values = np.interp(new_time, midpoints, col_values).astype( hercules_float_type ) @@ -540,6 +551,25 @@ def _interpolate_with_polars(df, new_time, datetime_cols, numeric_cols): return result_pl.to_pandas() +def _compute_interval_midpoints(time_values): + """Compute the midpoints of consecutive time intervals. + + For start-of-period timestamps, each value is best represented at the + centre of its interval. The last interval width is assumed equal to the + preceding one. + + Args: + time_values (np.ndarray): Sorted array of start-of-period timestamps. + + Returns: + np.ndarray: Array of interval midpoints, same length as *time_values*. + """ + midpoints = np.empty_like(time_values, dtype=np.float64) + midpoints[:-1] = (time_values[:-1] + time_values[1:]) / 2.0 + midpoints[-1] = time_values[-1] + (time_values[-1] - time_values[-2]) / 2.0 + return midpoints + + def find_time_utc_value(df, time_value, time_column="time", time_utc_column="time_utc"): """Return UTC timestamp at a given time value via linear interpolation or extrapolation. From 065197547784f019708d99494c15a0e410398b48 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Wed, 8 Apr 2026 14:37:32 -0600 Subject: [PATCH 02/19] update tests --- .../example_00_regression_test.py | 4 +- .../example_00b_regression_precom_test.py | 4 +- .../example_03_regression_test.py | 6 +- tests/power_playback_test.py | 6 +- .../solar_pysam_pvwatts_regression_test.py | 58 +++++++++---------- tests/test_inputs/scada_input.csv | 2 +- tests/utilities_test.py | 31 ++++++---- tests/wind_farm_dynamic_floris_test.py | 23 +++++--- tests/wind_farm_precom_floris_test.py | 28 ++++++--- tests/wind_farm_scada_power_test.py | 6 +- 10 files changed, 100 insertions(+), 68 deletions(-) diff --git a/tests/example_regression_tests/example_00_regression_test.py b/tests/example_regression_tests/example_00_regression_test.py index ca2b265f..85723fda 100644 --- a/tests/example_regression_tests/example_00_regression_test.py +++ b/tests/example_regression_tests/example_00_regression_test.py @@ -18,8 +18,8 @@ # Test configuration NUM_TIME_STEPS = 5 -EXPECTED_FINAL_WIND_POWER = 3271 # Updated after wind model changes -EXPECTED_FINAL_PLANT_POWER = 3271 # Same as wind power for wind-only case +EXPECTED_FINAL_WIND_POWER = 3265 # Updated for midpoint interpolation correction +EXPECTED_FINAL_PLANT_POWER = 3265 # Same as wind power for wind-only case # File names INPUT_FILE = "hercules_input.yaml" diff --git a/tests/example_regression_tests/example_00b_regression_precom_test.py b/tests/example_regression_tests/example_00b_regression_precom_test.py index c8c6716c..20c6cd33 100644 --- a/tests/example_regression_tests/example_00b_regression_precom_test.py +++ b/tests/example_regression_tests/example_00b_regression_precom_test.py @@ -20,8 +20,8 @@ # Test configuration NUM_TIME_STEPS = 5 -EXPECTED_FINAL_WIND_POWER = 3021 # Updated for precomputed FLORIS model -EXPECTED_FINAL_PLANT_POWER = 3021 # Same as wind power for wind-only case +EXPECTED_FINAL_WIND_POWER = 3020 # Updated for midpoint interpolation correction +EXPECTED_FINAL_PLANT_POWER = 3020 # Same as wind power for wind-only case # File names INPUT_FILE = "hercules_input.yaml" diff --git a/tests/example_regression_tests/example_03_regression_test.py b/tests/example_regression_tests/example_03_regression_test.py index b5e67777..16d74c8f 100644 --- a/tests/example_regression_tests/example_03_regression_test.py +++ b/tests/example_regression_tests/example_03_regression_test.py @@ -21,9 +21,9 @@ # Test configuration NUM_TIME_STEPS = 5 -EXPECTED_FINAL_WIND_POWER = 14322 # Updated for 9 turbines with large config -EXPECTED_FINAL_SOLAR_POWER = 20912 # Expected final solar farm power output (kW) -EXPECTED_FINAL_PLANT_POWER = 35234 # Wind + Solar (14322 + 20912) +EXPECTED_FINAL_WIND_POWER = 14321 # Updated for midpoint interpolation correction +EXPECTED_FINAL_SOLAR_POWER = 21054 # Updated for midpoint interpolation correction +EXPECTED_FINAL_PLANT_POWER = 35375 # Wind + Solar (14321 + 21054) # File names INPUT_FILE = "hercules_input.yaml" diff --git a/tests/power_playback_test.py b/tests/power_playback_test.py index 7448abf6..0504d309 100644 --- a/tests/power_playback_test.py +++ b/tests/power_playback_test.py @@ -46,8 +46,10 @@ def test_power_playback_step(): step_h_dict["step"] = 1 result = power_playback.step(step_h_dict) - # Verify power - assert np.isclose(result["power_playback"]["power"], 2000.0) + # With midpoint correction the value at t=1 is the average of period-0 + # (midpoint = 0.5 seconds after starttime) + # (1000) and period-1 (2000) values (midpoint = 1.5 seconds after starttime) + assert np.isclose(result["power_playback"]["power"], 1500.0) def test_power_playback_raises_on_nan_in_power_columns(): diff --git a/tests/regression_tests/solar_pysam_pvwatts_regression_test.py b/tests/regression_tests/solar_pysam_pvwatts_regression_test.py index 19c2c966..c239db88 100644 --- a/tests/regression_tests/solar_pysam_pvwatts_regression_test.py +++ b/tests/regression_tests/solar_pysam_pvwatts_regression_test.py @@ -10,16 +10,16 @@ powers_base_no_control = np.array( [ - 16528.82749492729, - 16541.958599140045, - 16555.08955834377, - 16568.220372741496, - 16581.35104253094, - 16594.481567904546, - 16607.61194537151, - 16620.74217922295, - 16633.872269119838, - 16647.002215233784, + 17092.15820312, + 17098.77539062, + 17112.00976562, + 17125.24609375, + 17138.48242188, + 17151.71679688, + 17164.94921875, + 17178.18554688, + 17191.41992188, + 17204.65625, ] ) @@ -41,30 +41,30 @@ dni_base_no_control = np.array( [ 330.86019897, - 331.19604492, - 331.53189087, - 331.86773682, - 332.20358276, - 332.53942871, - 332.87527466, - 333.21112061, - 333.54696655, - 333.8828125, + 331.02813721, + 331.36395264, + 331.6998291, + 332.03564453, + 332.371521, + 332.70733643, + 333.04321289, + 333.37902832, + 333.71490479, ] ) aoi_base_no_control = np.array( [ - 67.82689268, - 67.82689268, - 67.82689268, - 67.82689268, - 67.82689268, - 67.82689268, - 67.82689268, - 67.82689268, - 67.82689268, - 67.82689268, + 67.82688904, + 67.82688904, + 67.82688904, + 67.82688904, + 67.82688904, + 67.82688904, + 67.82688904, + 67.82688904, + 67.82688904, + 67.82688904, ] ) diff --git a/tests/test_inputs/scada_input.csv b/tests/test_inputs/scada_input.csv index 1d934114..e6ba4499 100644 --- a/tests/test_inputs/scada_input.csv +++ b/tests/test_inputs/scada_input.csv @@ -3,7 +3,7 @@ time_utc,wd_mean,ws_000,ws_001,ws_002,pow_000,pow_001,pow_002 2018-05-10 12:31:01,185.2,9.1,9.0,9.2,3200.0,3100.0,3300.0 2018-05-10 12:31:02,190.8,7.8,7.7,7.9,2200.0,2100.0,2300.0 2018-05-10 12:31:03,175.3,6.5,6.4,6.6,1500.0,1400.0,1600.0 -2018-05-10 12:31:04,170.1,10.2,10.1,10.3,4200.0,4100.0,4300.0 +2018-05-10 12:31:04,170.1,10.2,10.1,10.3,5000.0,4100.0,4300.0 2018-05-10 12:31:05,165.7,11.5,11.4,11.6,5000.0,4900.0,5000.0 2018-05-10 12:31:06,160.4,9.8,9.7,9.9,5000.0,3800.0,4000.0 2018-05-10 12:31:07,155.9,8.7,8.6,8.8,3000.0,2900.0,3100.0 diff --git a/tests/utilities_test.py b/tests/utilities_test.py index bd39efe1..479cf8a3 100644 --- a/tests/utilities_test.py +++ b/tests/utilities_test.py @@ -32,6 +32,8 @@ def test_upsampling(): } ) + # Midpoints will be 1, 3, 5, 7, 9, 11 + # Create new_time with more points (upsampling) new_time = np.linspace(0, 10, 11) # [0, 1, 2, 3, ..., 10] @@ -42,7 +44,13 @@ def test_upsampling(): assert np.allclose(result["time"], new_time) # Assert values are correct - expected_values = new_time # Linear function y = x + # Time 0 is before first midpoint, so value should clamp to 0 + # Time 1 is at first midpoint, so value should be 0 + # Time 2 is between first and second midpoint, so value should be 1 + # Time 3 is at second midpoint, so value should be 2 + # ... + # Time 10 is in between last and second last midpoint, so value should be 9 + expected_values = [0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] assert np.allclose(result["value"], expected_values), "Interpolated values should match y = x" @@ -55,21 +63,24 @@ def test_downsampling(): """ time_points = np.linspace(0, 10, 11) - df = pd.DataFrame({"time": time_points, "value": time_points * 1.7}) + value_points = time_points * 1.7 + df = pd.DataFrame({"time": time_points, "value": value_points}) - # Create new_time with fewer points (downsampling) - new_time = np.array([0, 2, 4]) + # Query at interval midpoints (0.5, 5.5, 9.5) and end points (0, 10) + new_time = np.array([0.0, 5.0, 5.5, 10.0, 10.5]) - # Interpolate result = interpolate_df(df, new_time) - # For our quadratic function, the interpolated values should be the square of new_time - expected_values = new_time * 1.7 + # At the midpoints we should recover the original period values + expected_values = [ + value_points[0], + (value_points[4] + value_points[5]) / 2, + value_points[5], + (value_points[-2] + value_points[-1]) / 2, + value_points[-1], + ] assert np.allclose(result["value"], expected_values) - # Check the shape is correct - assert result.shape[0] == len(new_time) - def test_datetime_interpolation(): """ diff --git a/tests/wind_farm_dynamic_floris_test.py b/tests/wind_farm_dynamic_floris_test.py index 99064f53..de8a166c 100644 --- a/tests/wind_farm_dynamic_floris_test.py +++ b/tests/wind_farm_dynamic_floris_test.py @@ -8,7 +8,7 @@ import pandas as pd import pytest from hercules.plant_components.wind_farm import WindFarm -from hercules.utilities import hercules_float_type +from hercules.utilities import hercules_float_type, interpolate_df from tests.test_inputs.h_dict import h_dict_wind @@ -42,14 +42,23 @@ def test_wind_farm_ws_mean(): # Test that, since individual speed are specified, ws_mean is ignored # Note that h_dict_wind specifies an end time of 10. wind_sim = WindFarm(test_h_dict, "wind_farm") - assert ( - wind_sim.ws_mat[:, 0] == df_input["ws_000"].to_numpy(dtype=hercules_float_type)[:10] - ).all() + + # Assume df_input represents time stamps indicating start of period. + # Convert to instantanous values with midpoint correction as would be done + # internally by interpolate_df function. + df_input["time"] = np.arange(0, df_input.shape[0], 1) + df_input["time_utc"] = pd.to_datetime(df_input["time_utc"]) + df_input_interpolated = interpolate_df(df_input, np.arange(0, df_input.shape[0], 1)) + + assert np.allclose( + wind_sim.ws_mat[:, 0], + df_input_interpolated["ws_000"].to_numpy(dtype=hercules_float_type)[:10], + ) assert np.allclose( wind_sim.ws_mat_mean, - (df_input[["ws_000", "ws_001", "ws_002"]].mean(axis=1)).to_numpy(dtype=hercules_float_type)[ - :10 - ], + (df_input_interpolated[["ws_000", "ws_001", "ws_002"]].mean(axis=1)).to_numpy( + dtype=hercules_float_type + )[:10], ) # Drop individual speeds and test that ws_mean is used instead diff --git a/tests/wind_farm_precom_floris_test.py b/tests/wind_farm_precom_floris_test.py index 356d0143..66303435 100644 --- a/tests/wind_farm_precom_floris_test.py +++ b/tests/wind_farm_precom_floris_test.py @@ -8,7 +8,7 @@ import pandas as pd import pytest from hercules.plant_components.wind_farm import WindFarm -from hercules.utilities import hercules_float_type +from hercules.utilities import hercules_float_type, interpolate_df from tests.test_inputs.h_dict import h_dict_wind @@ -52,14 +52,23 @@ def test_wind_farm_precom_floris_ws_mean(): # Test that, since individual speed are specified, ws_mean is ignored # Note that h_dict_wind_precom_floris specifies an end time of 10. wind_sim = WindFarm(test_h_dict, "wind_farm") - assert ( - wind_sim.ws_mat[:, 0] == df_input["ws_000"].to_numpy(dtype=hercules_float_type)[:10] - ).all() + + # Assume df_input represents time stamps indicating start of period. + # Convert to instantaneous values with midpoint correction as would be done + # internally by interpolate_df function. + df_input["time"] = np.arange(0, df_input.shape[0], 1) + df_input["time_utc"] = pd.to_datetime(df_input["time_utc"]) + df_input_interpolated = interpolate_df(df_input, np.arange(0, df_input.shape[0], 1)) + + assert np.allclose( + wind_sim.ws_mat[:, 0], + df_input_interpolated["ws_000"].to_numpy(dtype=hercules_float_type)[:10], + ) assert np.allclose( wind_sim.ws_mat_mean, - (df_input[["ws_000", "ws_001", "ws_002"]].mean(axis=1)).to_numpy(dtype=hercules_float_type)[ - :10 - ], + (df_input_interpolated[["ws_000", "ws_001", "ws_002"]].mean(axis=1)).to_numpy( + dtype=hercules_float_type + )[:10], ) # Drop individual speeds and test that ws_mean is used instead @@ -254,8 +263,9 @@ def test_wind_farm_precom_floris_velocities_update_correctly(): "Withwakes wind speeds should have been updated" ) - # Verify the wind speeds match the expected values from the input data - expected_background = np.array([9.0, 9.5, 10.0]) # ws values for step 1 + # With midpoint correction, the value at step=1 is the average of + # period-0 and period-1 input values for each turbine. + expected_background = np.array([8.5, 9.0, 9.5]) np.testing.assert_array_equal(wind_sim.wind_speeds_background, expected_background) # Verify that wake deficits are recalculated diff --git a/tests/wind_farm_scada_power_test.py b/tests/wind_farm_scada_power_test.py index 55f11c18..f80dd507 100644 --- a/tests/wind_farm_scada_power_test.py +++ b/tests/wind_farm_scada_power_test.py @@ -87,9 +87,9 @@ def test_wind_farm_scada_power_step(): result["wind_farm"]["wind_speeds_background"], ) - # Verify turbine powers - assert np.allclose(result["wind_farm"]["turbine_powers"], [3200.0, 3100.0, 3300.0]) - assert np.isclose(result["wind_farm"]["power"], 3200.0 + 3100.0 + 3300.0) + # At step=1, midpoint correction gives the average of period-0 and period-1 values. + assert np.allclose(result["wind_farm"]["turbine_powers"], [2850.0, 2750.0, 2950.0]) + assert np.isclose(result["wind_farm"]["power"], 2850.0 + 2750.0 + 2950.0) def test_wind_farm_scada_power_get_initial_conditions_and_meta_data(): From f0aa0132c3746fec568193cfead8e7e637fd0235 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Wed, 8 Apr 2026 14:48:32 -0600 Subject: [PATCH 03/19] update docs --- docs/hercules_input.md | 2 +- docs/output_files.md | 2 ++ docs/power_playback.md | 2 +- docs/solar_pv.md | 2 +- docs/timing.md | 57 +++++++++++++++++++++++++++++++++++++++++- docs/wind.md | 2 +- 6 files changed, 62 insertions(+), 5 deletions(-) diff --git a/docs/hercules_input.md b/docs/hercules_input.md index 3f674c5a..5b5764d0 100644 --- a/docs/hercules_input.md +++ b/docs/hercules_input.md @@ -131,7 +131,7 @@ The old format is still supported for backward compatibility but will show a dep ### External Data File Format The CSV file must contain: -- A `time_utc` column with UTC timestamps in ISO 8601 format +- A `time_utc` column with UTC timestamps in ISO 8601 format. Each timestamp marks the **start of a reporting period**; values on that row are treated as period averages. See [Time Interpretation](timing.md#time-interpretation-inputs-vs-internal-values) for how Hercules converts these to instantaneous values. - One or more data columns with external signals. Note that the names of the other columns are arbitrary; any column names will be carried forward and interpolated. However, the values must be floats. Additionally, some controllers and plotting utilities that work on external signals may require specific column names like `lmp_rt`, `lmp_da`, `wind_forecast`, etc. Example `lmp_data.csv`: diff --git a/docs/output_files.md b/docs/output_files.md index a2c33148..1f872792 100644 --- a/docs/output_files.md +++ b/docs/output_files.md @@ -2,6 +2,8 @@ Hercules generates HDF5 output files containing simulation data for analysis and visualization. This page describes the file format, available utilities for reading the data, and how HerculesModel generates these files. +All values in output files represent **instantaneous** quantities at each time step, not period averages. This differs from the convention used by input data files, where timestamps mark the start of a reporting period. See [Time Interpretation](timing.md#time-interpretation-inputs-vs-internal-values) for details on this distinction and the midpoint correction applied during input interpolation. + ## File Format Hercules outputs simulation data in HDF5 (Hierarchical Data Format 5) format. diff --git a/docs/power_playback.md b/docs/power_playback.md index d231883c..4c68d8ba 100644 --- a/docs/power_playback.md +++ b/docs/power_playback.md @@ -32,7 +32,7 @@ power_unit_1: The input file must contain the following columns: -- `time_utc`: Timestamps in UTC (ISO 8601 format or parseable datetime strings) +- `time_utc`: Timestamps in UTC (ISO 8601 format or parseable datetime strings). Each timestamp marks the **start of a reporting period**; the power value on that row is treated as the period average. See [Time Interpretation](timing.md#time-interpretation-inputs-vs-internal-values) for how Hercules converts these to instantaneous values. - `power`: Power output in kW Supported file formats: `.csv`, `.p`, `.pkl` (pickle), `.f`, `.ftr` (feather). diff --git a/docs/solar_pv.md b/docs/solar_pv.md index ae84d729..48248883 100644 --- a/docs/solar_pv.md +++ b/docs/solar_pv.md @@ -12,7 +12,7 @@ Presently only one solar simulator is available Both models require an input weather file: 1. A CSV file that specifies the weather conditions (e.g. NonAnnualSimulation-sample_data-interpolated-daytime.csv). This file should include: - - timestamp (see [timing](timing.md) for time format requirements) + - timestamp (see [timing](timing.md) for time format requirements). Each `time_utc` timestamp marks the **start of a reporting period**; irradiance and weather values on that row are treated as period averages. See [Time Interpretation](timing.md#time-interpretation-inputs-vs-internal-values) for how Hercules converts these to instantaneous values. - direct normal irradiance (DNI) - diffuse horizontal irradiance (DHI) - global horizontal irradiance (GHI) diff --git a/docs/timing.md b/docs/timing.md index 795ebbb4..ad1b3d07 100644 --- a/docs/timing.md +++ b/docs/timing.md @@ -9,6 +9,59 @@ Timing in Hercules is specified using two complementary representations: - `time` (float): Simulation time in seconds, where `time=0` corresponds to `starttime_utc` - `time_utc` (datetime): Absolute UTC timestamp +## Time Interpretation: Inputs vs. Internal Values + +### Input files: start-of-period convention + +In external data sources such as weather files, SCADA records, and resource +databases, each `time_utc` timestamp marks the **beginning** of a reporting +period and the associated values (irradiance, wind speed, power, etc.) +represent an average or aggregate over that period. For example, an hourly +weather file with a row at `2020-06-15T12:00:00Z` and GHI = 735 W/m² means +that 735 W/m² is the average GHI from 12:00 to 13:00. + +### Hercules internal values: instantaneous convention + +Inside the simulation, values at a given time step represent **instantaneous** +quantities at that moment. All Hercules output values follow this same +instantaneous convention. + +### Midpoint correction during interpolation + +Because input data uses start-of-period timestamps while Hercules needs +instantaneous values, a correction is applied when input data is resampled +onto the simulation time grid. The best single-point estimate of a +period-averaged value is at the **midpoint** of its interval, not the start. +For example, the hourly average from 12:00–13:00 is most representative of +conditions at 12:30. + +The functions `interpolate_df`, `_interpolate_with_polars`, and +`_compute_interval_midpoints` in `utilities.py` implement this correction: + +1. Each numeric value is assigned to the midpoint of its input interval. +2. Linear interpolation is then performed between these midpoints to produce + values at the simulation time steps. + +Datetime columns (e.g. `time_utc`) are **not** shifted because they are +instantaneous coordinate mappings between simulation time and wall-clock time, +not period-averaged measurements. + +``` +Input file (start-of-period): + +time_utc value +12:00 100 ← average over [12:00, 13:00) +13:00 200 ← average over [13:00, 14:00) + +After midpoint correction: + +time value +12:30 100 ← midpoint of [12:00, 13:00) +13:30 200 ← midpoint of [13:00, 14:00) + +Querying at 13:00 yields 150 (halfway between midpoints). +``` + ## Input Requirements All Hercules input files must specify start and end times using UTC datetime strings: @@ -113,7 +166,7 @@ For the example above, `endtime` would be 3600.0 seconds. ### Wind and Solar Input Data -Both wind and solar input CSV/Feather/Parquet files must contain a `time_utc` column with UTC timestamps: +Both wind and solar input CSV/Feather/Parquet files must contain a `time_utc` column with UTC timestamps. Each `time_utc` value marks the **start of a reporting period**; the data values on that row are treated as period averages. See [Time Interpretation](#time-interpretation-inputs-vs-internal-values) above for how Hercules handles the conversion to instantaneous values. ```text time_utc,wd_mean,ws_000,ws_001,ws_002 @@ -145,6 +198,8 @@ Key Points: ## Output Files +All values in Hercules output files represent **instantaneous** quantities at each time step, not period averages. See [Time Interpretation](#time-interpretation-inputs-vs-internal-values) for the distinction from input files. + Hercules output HDF5 files store: - `time` array: Simulation time points (seconds from t=0) diff --git a/docs/wind.md b/docs/wind.md index 24fb2de6..0fef6df5 100644 --- a/docs/wind.md +++ b/docs/wind.md @@ -54,7 +54,7 @@ Required parameters for WindFarmSCADAPower: **SCADA File Format:** The SCADA file must contain the following columns: -- `time_utc`: Timestamps in UTC (ISO 8601 format or parseable datetime strings) +- `time_utc`: Timestamps in UTC (ISO 8601 format or parseable datetime strings). Each timestamp marks the **start of a reporting period**; values on that row are treated as period averages. See [Time Interpretation](timing.md#time-interpretation-inputs-vs-internal-values) for how Hercules converts these to instantaneous values. - `wd_mean`: Mean wind direction in degrees - `pow_###`: Power output for each turbine (e.g., `pow_000`, `pow_001`, `pow_002`) From aa82d059fae831f2ad21a187d57394a1878b4d12 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Wed, 8 Apr 2026 14:58:22 -0600 Subject: [PATCH 04/19] pass midpoints to solar module --- hercules/plant_components/solar_pysam_base.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hercules/plant_components/solar_pysam_base.py b/hercules/plant_components/solar_pysam_base.py index 01c24030..91219939 100644 --- a/hercules/plant_components/solar_pysam_base.py +++ b/hercules/plant_components/solar_pysam_base.py @@ -128,12 +128,15 @@ def _load_solar_data(self, h_dict): time_steps_all = np.arange(self.starttime, self.endtime, self.dt, dtype=hercules_float_type) df_solar = interpolate_df(df_solar, time_steps_all) + # Shift df_solar["time_utc"] by a half step to compute midpoints + df_solar["time_utc_midpoints"] = df_solar["time_utc"] + pd.Timedelta(seconds=self.dt / 2) + # Can now save the input data as simple columns - self.year_array = df_solar["time_utc"].dt.year.values - self.month_array = df_solar["time_utc"].dt.month.values - self.day_array = df_solar["time_utc"].dt.day.values - self.hour_array = df_solar["time_utc"].dt.hour.values - self.minute_array = df_solar["time_utc"].dt.minute.values + self.year_array = df_solar["time_utc_midpoints"].dt.year.values + self.month_array = df_solar["time_utc_midpoints"].dt.month.values + self.day_array = df_solar["time_utc_midpoints"].dt.day.values + self.hour_array = df_solar["time_utc_midpoints"].dt.hour.values + self.minute_array = df_solar["time_utc_midpoints"].dt.minute.values self.ghi_array = self._get_solar_data_array(df_solar, "Global Horizontal Irradiance") self.dni_array = self._get_solar_data_array(df_solar, "Direct Normal Irradiance") self.dhi_array = self._get_solar_data_array(df_solar, "Diffuse Horizontal Irradiance") From 72510da563b7c8a2152bd32a6e7da9ec33ef5a9a Mon Sep 17 00:00:00 2001 From: paulf81 Date: Wed, 8 Apr 2026 14:59:39 -0600 Subject: [PATCH 05/19] undo change --- hercules/plant_components/solar_pysam_base.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/hercules/plant_components/solar_pysam_base.py b/hercules/plant_components/solar_pysam_base.py index 91219939..01c24030 100644 --- a/hercules/plant_components/solar_pysam_base.py +++ b/hercules/plant_components/solar_pysam_base.py @@ -128,15 +128,12 @@ def _load_solar_data(self, h_dict): time_steps_all = np.arange(self.starttime, self.endtime, self.dt, dtype=hercules_float_type) df_solar = interpolate_df(df_solar, time_steps_all) - # Shift df_solar["time_utc"] by a half step to compute midpoints - df_solar["time_utc_midpoints"] = df_solar["time_utc"] + pd.Timedelta(seconds=self.dt / 2) - # Can now save the input data as simple columns - self.year_array = df_solar["time_utc_midpoints"].dt.year.values - self.month_array = df_solar["time_utc_midpoints"].dt.month.values - self.day_array = df_solar["time_utc_midpoints"].dt.day.values - self.hour_array = df_solar["time_utc_midpoints"].dt.hour.values - self.minute_array = df_solar["time_utc_midpoints"].dt.minute.values + self.year_array = df_solar["time_utc"].dt.year.values + self.month_array = df_solar["time_utc"].dt.month.values + self.day_array = df_solar["time_utc"].dt.day.values + self.hour_array = df_solar["time_utc"].dt.hour.values + self.minute_array = df_solar["time_utc"].dt.minute.values self.ghi_array = self._get_solar_data_array(df_solar, "Global Horizontal Irradiance") self.dni_array = self._get_solar_data_array(df_solar, "Direct Normal Irradiance") self.dhi_array = self._get_solar_data_array(df_solar, "Diffuse Horizontal Irradiance") From f1abb94373d0c3612d805b51fae9135f6d8ca33b Mon Sep 17 00:00:00 2001 From: paulf81 Date: Wed, 8 Apr 2026 15:17:44 -0600 Subject: [PATCH 06/19] add to public docstring --- hercules/utilities.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hercules/utilities.py b/hercules/utilities.py index 20b744d1..a60072ce 100644 --- a/hercules/utilities.py +++ b/hercules/utilities.py @@ -451,6 +451,15 @@ def close_logging(logger): def interpolate_df(df, new_time): """Interpolate DataFrame values to match new time axis. + Numeric columns are assumed to represent period-averaged values whose + timestamps mark the start of each period. To recover the best estimate + of the instantaneous value at a query time, each value is assigned to the + midpoint of its interval before interpolating. + + Datetime columns (e.g. ``time_utc``) are instantaneous coordinates — they + map simulation time to wall-clock time directly — so they are interpolated + without the midpoint shift. + Uses linear interpolation with Polars backend for better performance and memory efficiency. Converts datetime columns to timestamps for interpolation. From 806ee4a384a8d89be767bc0bc3b884db82c45ba9 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Wed, 8 Apr 2026 15:18:10 -0600 Subject: [PATCH 07/19] typo --- tests/wind_farm_dynamic_floris_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/wind_farm_dynamic_floris_test.py b/tests/wind_farm_dynamic_floris_test.py index de8a166c..b76247fa 100644 --- a/tests/wind_farm_dynamic_floris_test.py +++ b/tests/wind_farm_dynamic_floris_test.py @@ -44,7 +44,7 @@ def test_wind_farm_ws_mean(): wind_sim = WindFarm(test_h_dict, "wind_farm") # Assume df_input represents time stamps indicating start of period. - # Convert to instantanous values with midpoint correction as would be done + # Convert to instantaneous values with midpoint correction as would be done # internally by interpolate_df function. df_input["time"] = np.arange(0, df_input.shape[0], 1) df_input["time_utc"] = pd.to_datetime(df_input["time_utc"]) From 753d0774b68ee2da8972188473f8637dcc7af06f Mon Sep 17 00:00:00 2001 From: paulf81 Date: Wed, 8 Apr 2026 15:22:45 -0600 Subject: [PATCH 08/19] dont compute midpoints in loop --- hercules/utilities.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/hercules/utilities.py b/hercules/utilities.py index a60072ce..e381cc02 100644 --- a/hercules/utilities.py +++ b/hercules/utilities.py @@ -517,20 +517,14 @@ def _interpolate_with_polars(df, new_time, datetime_cols, numeric_cols): # Start with the time column result_pl = new_time_pl + # Compute time values and midpoints + time_values = df_pl["time"].to_numpy() + midpoints = _compute_interval_midpoints(time_values) + # Process numeric columns using Polars' interpolation if numeric_cols: for col in numeric_cols: - # Use Polars' join_asof for efficient interpolation-like behavior - # This is more memory efficient than pandas for large datasets - col_data = df_pl.select(["time", col]).sort("time") - - # Perform interpolation using Polars operations - # Note: Polars doesn't have direct linear interpolation, so we use numpy interp - # but with Polars' efficient data extraction - time_values = col_data["time"].to_numpy() - col_values = col_data[col].to_numpy() - - midpoints = _compute_interval_midpoints(time_values) + col_values = df_pl[col].to_numpy() # Linear interpolation with float32 precision interpolated_values = np.interp(new_time, midpoints, col_values).astype( From 55c112d2920f94a2feab3f600b4491fef73ac237 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Wed, 8 Apr 2026 15:24:19 -0600 Subject: [PATCH 09/19] handle edge case --- hercules/utilities.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hercules/utilities.py b/hercules/utilities.py index e381cc02..346bb68d 100644 --- a/hercules/utilities.py +++ b/hercules/utilities.py @@ -567,9 +567,15 @@ def _compute_interval_midpoints(time_values): Returns: np.ndarray: Array of interval midpoints, same length as *time_values*. """ + # Allow the edge case of a single time value by returning the time value itself + if len(time_values) < 2: + return time_values + # Compute midpoints midpoints = np.empty_like(time_values, dtype=np.float64) midpoints[:-1] = (time_values[:-1] + time_values[1:]) / 2.0 - midpoints[-1] = time_values[-1] + (time_values[-1] - time_values[-2]) / 2.0 + midpoints[-1] = ( + time_values[-1] + (time_values[-1] - time_values[-2]) / 2.0 + ) # Last interval is equal to the previous one return midpoints From a444406efee175ab00574869e228fd37958ac803 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Mon, 13 Apr 2026 16:41:14 -0600 Subject: [PATCH 10/19] combine interpolate functions --- docs/timing.md | 4 ++-- hercules/utilities.py | 49 +++++-------------------------------------- 2 files changed, 7 insertions(+), 46 deletions(-) diff --git a/docs/timing.md b/docs/timing.md index ad1b3d07..ee2c1578 100644 --- a/docs/timing.md +++ b/docs/timing.md @@ -35,8 +35,8 @@ period-averaged value is at the **midpoint** of its interval, not the start. For example, the hourly average from 12:00–13:00 is most representative of conditions at 12:30. -The functions `interpolate_df`, `_interpolate_with_polars`, and -`_compute_interval_midpoints` in `utilities.py` implement this correction: +The functions `interpolate_df` and `_compute_interval_midpoints` in +`utilities.py` implement this correction: 1. Each numeric value is assigned to the midpoint of its input interval. 2. Linear interpolation is then performed between these midpoints to produce diff --git a/hercules/utilities.py b/hercules/utilities.py index 346bb68d..ecea6c64 100644 --- a/hercules/utilities.py +++ b/hercules/utilities.py @@ -484,55 +484,16 @@ def interpolate_df(df, new_time): else: numeric_cols.append(col) - return _interpolate_with_polars(df, new_time, datetime_cols, numeric_cols) - - -def _interpolate_with_polars(df, new_time, datetime_cols, numeric_cols): - """Interpolate using Polars backend with midpoint correction. - - Numeric columns are assumed to represent period-averaged values whose - timestamps mark the start of each period. To recover the best estimate - of the instantaneous value at a query time, each value is assigned to the - midpoint of its interval before interpolating. - - Datetime columns (e.g. ``time_utc``) are instantaneous coordinates — they - map simulation time to wall-clock time directly — so they are interpolated - without the midpoint shift. - - Args: - df (pd.DataFrame): Input DataFrame. - new_time (np.ndarray): New time points. - datetime_cols (list): Datetime column names. - numeric_cols (list): Numeric column names. - - Returns: - pd.DataFrame: Interpolated DataFrame. - """ - # Convert to Polars for efficient processing df_pl = pl.from_pandas(df) + result_pl = pl.DataFrame({"time": new_time}) - # Create a Polars DataFrame for the new time points - new_time_pl = pl.DataFrame({"time": new_time}) - - # Start with the time column - result_pl = new_time_pl - - # Compute time values and midpoints time_values = df_pl["time"].to_numpy() midpoints = _compute_interval_midpoints(time_values) - # Process numeric columns using Polars' interpolation - if numeric_cols: - for col in numeric_cols: - col_values = df_pl[col].to_numpy() - - # Linear interpolation with float32 precision - interpolated_values = np.interp(new_time, midpoints, col_values).astype( - hercules_float_type - ) - - # Add interpolated column to result - result_pl = result_pl.with_columns(pl.lit(interpolated_values).alias(col)) + for col in numeric_cols: + col_values = df_pl[col].to_numpy() + interpolated_values = np.interp(new_time, midpoints, col_values).astype(hercules_float_type) + result_pl = result_pl.with_columns(pl.lit(interpolated_values).alias(col)) # Process datetime columns for col in datetime_cols: From 9367453b76f8ede64a0de4a6ed10191187fb2d88 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Mon, 13 Apr 2026 17:01:15 -0600 Subject: [PATCH 11/19] Add additional interpolation methods --- docs/timing.md | 64 ++++++++++++++----- hercules/hercules_model.py | 17 +++-- hercules/plant_components/power_playback.py | 4 +- hercules/plant_components/solar_pysam_base.py | 4 +- hercules/plant_components/wind_farm.py | 4 +- .../plant_components/wind_farm_scada_power.py | 4 +- hercules/utilities.py | 62 ++++++++++++++---- tests/utilities_test.py | 21 ++++-- tests/wind_farm_dynamic_floris_test.py | 6 +- tests/wind_farm_precom_floris_test.py | 6 +- 10 files changed, 148 insertions(+), 44 deletions(-) diff --git a/docs/timing.md b/docs/timing.md index ee2c1578..7196dbf4 100644 --- a/docs/timing.md +++ b/docs/timing.md @@ -26,26 +26,24 @@ Inside the simulation, values at a given time step represent **instantaneous** quantities at that moment. All Hercules output values follow this same instantaneous convention. -### Midpoint correction during interpolation +### Interpolation methods -Because input data uses start-of-period timestamps while Hercules needs -instantaneous values, a correction is applied when input data is resampled -onto the simulation time grid. The best single-point estimate of a -period-averaged value is at the **midpoint** of its interval, not the start. -For example, the hourly average from 12:00–13:00 is most representative of -conditions at 12:30. +The `interpolate_df` function in `utilities.py` accepts a mandatory +`interpolation_method` parameter that controls how numeric columns are +resampled onto the simulation time grid. Three methods are available: -The functions `interpolate_df` and `_compute_interval_midpoints` in -`utilities.py` implement this correction: +#### `"averaged_to_instantaneous"` (wind, solar, and similar) -1. Each numeric value is assigned to the midpoint of its input interval. +Input values are period averages whose timestamps mark the **start** of each +period. The best single-point estimate of a period-averaged value is at the +**midpoint** of its interval, not the start. For example, the hourly average +from 12:00-13:00 is most representative of conditions at 12:30. + +1. Each numeric value is assigned to the midpoint of its input interval + (using `_compute_interval_midpoints`). 2. Linear interpolation is then performed between these midpoints to produce values at the simulation time steps. -Datetime columns (e.g. `time_utc`) are **not** shifted because they are -instantaneous coordinate mappings between simulation time and wall-clock time, -not period-averaged measurements. - ``` Input file (start-of-period): @@ -62,6 +60,38 @@ time value Querying at 13:00 yields 150 (halfway between midpoints). ``` +#### `"zoh_to_instantaneous"` (LMP, external signals) + +Input values are piecewise-constant (zero-order hold) with timestamps at the +start of each interval. Each query time receives the value of the last +original timestamp at or before it -- the value is held constant until the +next timestamp. This is appropriate for signals like locational marginal +prices (LMP) that change in discrete steps. + +``` +Input file: + +time_utc value +12:00 100 ← held constant over [12:00, 13:00) +13:00 200 ← held constant over [13:00, 14:00) + +Querying at 12:30 yields 100. +Querying at 13:00 yields 200. +``` + +#### `"instantaneous_to_instantaneous"` + +Input values already represent instantaneous measurements at their +timestamps. Standard linear interpolation is performed directly on the +original timestamps with no midpoint shift. + +--- + +In all three methods, datetime columns (e.g. `time_utc`) are linearly +interpolated on the raw timestamps without any shift, because they are +instantaneous coordinate mappings between simulation time and wall-clock +time, not period-averaged measurements. + ## Input Requirements All Hercules input files must specify start and end times using UTC datetime strings: @@ -166,7 +196,11 @@ For the example above, `endtime` would be 3600.0 seconds. ### Wind and Solar Input Data -Both wind and solar input CSV/Feather/Parquet files must contain a `time_utc` column with UTC timestamps. Each `time_utc` value marks the **start of a reporting period**; the data values on that row are treated as period averages. See [Time Interpretation](#time-interpretation-inputs-vs-internal-values) above for how Hercules handles the conversion to instantaneous values. +Both wind and solar input CSV/Feather/Parquet files must contain a `time_utc` column with UTC timestamps. Each `time_utc` value marks the **start of a reporting period**; the data values on that row are treated as period averages. These are interpolated with `"averaged_to_instantaneous"`. See [Interpolation methods](#interpolation-methods) above for details. + +### External Data (LMP, etc.) + +External data files loaded via `_read_external_data_file` are interpolated with `"zoh_to_instantaneous"` (zero-order hold), which is appropriate for signals like LMP prices that are piecewise-constant over each interval rather than time-averaged. ```text time_utc,wd_mean,ws_000,ws_001,ws_002 diff --git a/hercules/hercules_model.py b/hercules/hercules_model.py index dfe78b18..731b0f80 100644 --- a/hercules/hercules_model.py +++ b/hercules/hercules_model.py @@ -172,10 +172,15 @@ def _read_external_data_file(self, filename): """ Read and interpolate external data from a CSV, feather, or pickle file. - This method reads external data from the specified file (CSV, feather, or pickle) - and interpolates it according to the simulation time steps. The external data must - include a 'time_utc' column which will be converted to simulation time. - The interpolated data is stored in self.external_signals_all. + This method reads external data from the specified file (CSV, feather, or + pickle) and interpolates it onto the simulation time grid using zero-order + hold (``"zoh_to_instantaneous"``). ZOH is appropriate because external + signals such as LMP prices are piecewise-constant over each reporting + interval, unlike time-averaged weather data used by wind/solar components. + + The external data must include a ``time_utc`` column which will be + converted to simulation time. The interpolated data is stored in + ``self.external_signals_all``. Args: filename (str): Path to the file containing external data. Supported formats: @@ -216,7 +221,9 @@ def _read_external_data_file(self, filename): ) # Interpolate using the utility function - df_interpolated = interpolate_df(df_ext, new_times) + df_interpolated = interpolate_df( + df_ext, new_times, interpolation_method="zoh_to_instantaneous" + ) # Convert interpolated DataFrame to dictionary format for col in df_interpolated.columns: diff --git a/hercules/plant_components/power_playback.py b/hercules/plant_components/power_playback.py index e12f4361..3d127ea1 100644 --- a/hercules/plant_components/power_playback.py +++ b/hercules/plant_components/power_playback.py @@ -122,7 +122,9 @@ def __init__(self, h_dict, component_name): # Interpolate df_scada on to the time steps time_steps_all = np.arange(self.starttime, self.endtime, self.dt, dtype=hercules_float_type) - df_scada = interpolate_df(df_scada, time_steps_all) + df_scada = interpolate_df( + df_scada, time_steps_all, interpolation_method="averaged_to_instantaneous" + ) # Confirm that there is a column called "power" if "power" not in df_scada.columns: diff --git a/hercules/plant_components/solar_pysam_base.py b/hercules/plant_components/solar_pysam_base.py index 01c24030..857ecc1a 100644 --- a/hercules/plant_components/solar_pysam_base.py +++ b/hercules/plant_components/solar_pysam_base.py @@ -126,7 +126,9 @@ def _load_solar_data(self, h_dict): # Interpolate df_solar on to the time steps time_steps_all = np.arange(self.starttime, self.endtime, self.dt, dtype=hercules_float_type) - df_solar = interpolate_df(df_solar, time_steps_all) + df_solar = interpolate_df( + df_solar, time_steps_all, interpolation_method="averaged_to_instantaneous" + ) # Can now save the input data as simple columns self.year_array = df_solar["time_utc"].dt.year.values diff --git a/hercules/plant_components/wind_farm.py b/hercules/plant_components/wind_farm.py index 4845d8f8..2cc97893 100644 --- a/hercules/plant_components/wind_farm.py +++ b/hercules/plant_components/wind_farm.py @@ -188,7 +188,9 @@ def __init__(self, h_dict, component_name): # Interpolate df_wi on to the time steps time_steps_all = np.arange(self.starttime, self.endtime, self.dt, dtype=hercules_float_type) - df_wi = interpolate_df(df_wi, time_steps_all) + df_wi = interpolate_df( + df_wi, time_steps_all, interpolation_method="averaged_to_instantaneous" + ) # INITIALIZE FLORIS BASED ON WAKE MODEL if self.wake_method == "precomputed": diff --git a/hercules/plant_components/wind_farm_scada_power.py b/hercules/plant_components/wind_farm_scada_power.py index 9ae544d7..6a80b01f 100644 --- a/hercules/plant_components/wind_farm_scada_power.py +++ b/hercules/plant_components/wind_farm_scada_power.py @@ -128,7 +128,9 @@ def __init__(self, h_dict, component_name): # Interpolate df_scada on to the time steps time_steps_all = np.arange(self.starttime, self.endtime, self.dt, dtype=hercules_float_type) - df_scada = interpolate_df(df_scada, time_steps_all) + df_scada = interpolate_df( + df_scada, time_steps_all, interpolation_method="averaged_to_instantaneous" + ) # Get a list of power columns and infer number of turbines self.power_columns = sorted([col for col in df_scada.columns if col.startswith("pow_")]) diff --git a/hercules/utilities.py b/hercules/utilities.py index ecea6c64..0597d72c 100644 --- a/hercules/utilities.py +++ b/hercules/utilities.py @@ -448,29 +448,52 @@ def close_logging(logger): logger.removeHandler(handler) -def interpolate_df(df, new_time): - """Interpolate DataFrame values to match new time axis. +_VALID_INTERPOLATION_METHODS = { + "averaged_to_instantaneous", + "zoh_to_instantaneous", + "instantaneous_to_instantaneous", +} - Numeric columns are assumed to represent period-averaged values whose - timestamps mark the start of each period. To recover the best estimate - of the instantaneous value at a query time, each value is assigned to the - midpoint of its interval before interpolating. - Datetime columns (e.g. ``time_utc``) are instantaneous coordinates — they - map simulation time to wall-clock time directly — so they are interpolated - without the midpoint shift. +def interpolate_df(df, new_time, interpolation_method): + """Interpolate DataFrame values to match new time axis. - Uses linear interpolation with Polars backend for better performance and memory efficiency. - Converts datetime columns to timestamps for interpolation. + The ``interpolation_method`` parameter controls how numeric columns are + resampled onto ``new_time``: + + - ``"averaged_to_instantaneous"``: Input values are period averages whose + timestamps mark the **start** of each period. Each value is assigned to + the midpoint of its interval and then linearly interpolated. Use for + wind speed, solar irradiance, and similar time-averaged signals. + - ``"zoh_to_instantaneous"``: Input values are piecewise-constant + (zero-order hold) with timestamps at the start of each interval. Each + query time receives the value of the last original timestamp at or + before it. Use for LMP prices and other step-change signals. + - ``"instantaneous_to_instantaneous"``: Input values already represent + instantaneous measurements. Standard linear interpolation is performed + directly on the original timestamps with no midpoint shift. + + Datetime columns (e.g. ``time_utc``) are always linearly interpolated on + the raw timestamps regardless of the chosen method, because they map + simulation time to wall-clock time directly. Args: df (pd.DataFrame): DataFrame with 'time' column and data columns. new_time (array-like): New time points for interpolation. + interpolation_method (str): One of ``"averaged_to_instantaneous"``, + ``"zoh_to_instantaneous"``, or + ``"instantaneous_to_instantaneous"``. Returns: pd.DataFrame: DataFrame with new time axis and interpolated data columns. + """ - # Convert new_time to numpy array for consistency + if interpolation_method not in _VALID_INTERPOLATION_METHODS: + raise ValueError( + f"Unknown interpolation_method '{interpolation_method}'. " + f"Must be one of {sorted(_VALID_INTERPOLATION_METHODS)}." + ) + new_time = np.asarray(new_time) # Separate datetime and non-datetime columns for different processing @@ -488,11 +511,22 @@ def interpolate_df(df, new_time): result_pl = pl.DataFrame({"time": new_time}) time_values = df_pl["time"].to_numpy() - midpoints = _compute_interval_midpoints(time_values) + + if interpolation_method == "averaged_to_instantaneous": + x_coords = _compute_interval_midpoints(time_values) + else: + x_coords = time_values for col in numeric_cols: col_values = df_pl[col].to_numpy() - interpolated_values = np.interp(new_time, midpoints, col_values).astype(hercules_float_type) + if interpolation_method == "zoh_to_instantaneous": + indices = np.searchsorted(time_values, new_time, side="right") - 1 + indices = np.clip(indices, 0, len(col_values) - 1) + interpolated_values = col_values[indices].astype(hercules_float_type) + else: + interpolated_values = np.interp(new_time, x_coords, col_values).astype( + hercules_float_type + ) result_pl = result_pl.with_columns(pl.lit(interpolated_values).alias(col)) # Process datetime columns diff --git a/tests/utilities_test.py b/tests/utilities_test.py index 479cf8a3..bb079af1 100644 --- a/tests/utilities_test.py +++ b/tests/utilities_test.py @@ -38,7 +38,7 @@ def test_upsampling(): new_time = np.linspace(0, 10, 11) # [0, 1, 2, 3, ..., 10] # Interpolate - result = interpolate_df(df, new_time) + result = interpolate_df(df, new_time, interpolation_method="averaged_to_instantaneous") # Assert time is correct assert np.allclose(result["time"], new_time) @@ -69,7 +69,7 @@ def test_downsampling(): # Query at interval midpoints (0.5, 5.5, 9.5) and end points (0, 10) new_time = np.array([0.0, 5.0, 5.5, 10.0, 10.5]) - result = interpolate_df(df, new_time) + result = interpolate_df(df, new_time, interpolation_method="averaged_to_instantaneous") # At the midpoints we should recover the original period values expected_values = [ @@ -82,6 +82,19 @@ def test_downsampling(): assert np.allclose(result["value"], expected_values) +def test_zoh_interpolation(): + """Test zero-order hold interpolation with interpolate_df. + + Each query time should receive the value at the last original + timestamp at or before it (piecewise-constant / step behaviour). + """ + df = pd.DataFrame({"time": [0, 5, 10], "value": [100, 200, 300]}) + new_time = np.array([0, 2, 5, 7, 10, 12]) + result = interpolate_df(df, new_time, interpolation_method="zoh_to_instantaneous") + expected = [100, 100, 200, 200, 300, 300] + assert np.allclose(result["value"], expected) + + def test_datetime_interpolation(): """ Test interpolation of datetime columns with interpolate_df function. @@ -109,7 +122,7 @@ def test_datetime_interpolation(): new_time = np.array([0, 2.5, 5, 7.5, 10]) # Interpolate - result = interpolate_df(df, new_time) + result = interpolate_df(df, new_time, interpolation_method="averaged_to_instantaneous") # Assert time is correct assert np.allclose(result["time"], new_time) @@ -577,7 +590,7 @@ def test_interpolate_df_with_large_dataset(): new_time = np.linspace(0, 1000, 500) # Interpolate - result = interpolate_df(df, new_time) + result = interpolate_df(df, new_time, interpolation_method="averaged_to_instantaneous") # Verify result has the correct shape and columns assert len(result) == len(new_time) diff --git a/tests/wind_farm_dynamic_floris_test.py b/tests/wind_farm_dynamic_floris_test.py index b76247fa..7e7568cf 100644 --- a/tests/wind_farm_dynamic_floris_test.py +++ b/tests/wind_farm_dynamic_floris_test.py @@ -48,7 +48,11 @@ def test_wind_farm_ws_mean(): # internally by interpolate_df function. df_input["time"] = np.arange(0, df_input.shape[0], 1) df_input["time_utc"] = pd.to_datetime(df_input["time_utc"]) - df_input_interpolated = interpolate_df(df_input, np.arange(0, df_input.shape[0], 1)) + df_input_interpolated = interpolate_df( + df_input, + np.arange(0, df_input.shape[0], 1), + interpolation_method="averaged_to_instantaneous", + ) assert np.allclose( wind_sim.ws_mat[:, 0], diff --git a/tests/wind_farm_precom_floris_test.py b/tests/wind_farm_precom_floris_test.py index 66303435..0a73f782 100644 --- a/tests/wind_farm_precom_floris_test.py +++ b/tests/wind_farm_precom_floris_test.py @@ -58,7 +58,11 @@ def test_wind_farm_precom_floris_ws_mean(): # internally by interpolate_df function. df_input["time"] = np.arange(0, df_input.shape[0], 1) df_input["time_utc"] = pd.to_datetime(df_input["time_utc"]) - df_input_interpolated = interpolate_df(df_input, np.arange(0, df_input.shape[0], 1)) + df_input_interpolated = interpolate_df( + df_input, + np.arange(0, df_input.shape[0], 1), + interpolation_method="averaged_to_instantaneous", + ) assert np.allclose( wind_sim.ws_mat[:, 0], From 9cf9211c27f3aee0ecd1899c0c6fa3400d132e44 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Fri, 17 Apr 2026 14:39:48 -0600 Subject: [PATCH 12/19] Update hercules/utilities.py Americanize spelling Co-authored-by: genevievestarke <103534902+genevievestarke@users.noreply.github.com> --- hercules/utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hercules/utilities.py b/hercules/utilities.py index 0597d72c..ae87012a 100644 --- a/hercules/utilities.py +++ b/hercules/utilities.py @@ -553,7 +553,7 @@ def _compute_interval_midpoints(time_values): """Compute the midpoints of consecutive time intervals. For start-of-period timestamps, each value is best represented at the - centre of its interval. The last interval width is assumed equal to the + center of its interval. The last interval width is assumed equal to the preceding one. Args: From 87904a321e4f8b27a9640cb5ed14b234d9311d62 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Fri, 17 Apr 2026 14:40:16 -0600 Subject: [PATCH 13/19] Make docs more specific Co-authored-by: genevievestarke <103534902+genevievestarke@users.noreply.github.com> --- docs/timing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/timing.md b/docs/timing.md index 7196dbf4..0451d819 100644 --- a/docs/timing.md +++ b/docs/timing.md @@ -32,7 +32,7 @@ The `interpolate_df` function in `utilities.py` accepts a mandatory `interpolation_method` parameter that controls how numeric columns are resampled onto the simulation time grid. Three methods are available: -#### `"averaged_to_instantaneous"` (wind, solar, and similar) +#### `"averaged_to_instantaneous"` (wind, solar, and similar resource and power signals) Input values are period averages whose timestamps mark the **start** of each period. The best single-point estimate of a period-averaged value is at the From 17ff78ec599ed7431e9f4ed1ffe852a36f999df7 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Fri, 17 Apr 2026 14:40:38 -0600 Subject: [PATCH 14/19] Improve docs Co-authored-by: genevievestarke <103534902+genevievestarke@users.noreply.github.com> --- docs/timing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/timing.md b/docs/timing.md index 0451d819..33fc305d 100644 --- a/docs/timing.md +++ b/docs/timing.md @@ -37,7 +37,7 @@ resampled onto the simulation time grid. Three methods are available: Input values are period averages whose timestamps mark the **start** of each period. The best single-point estimate of a period-averaged value is at the **midpoint** of its interval, not the start. For example, the hourly average -from 12:00-13:00 is most representative of conditions at 12:30. +from 12:00-13:00 is most representative of conditions at 12:30. This also ensures that an average of the signal back to the original time interval will match the original data. 1. Each numeric value is assigned to the midpoint of its input interval (using `_compute_interval_midpoints`). From 31e9bc556824b790d1ad786b37a25c8ef722587d Mon Sep 17 00:00:00 2001 From: paulf81 Date: Tue, 21 Apr 2026 13:17:15 -0600 Subject: [PATCH 15/19] remove zoh and update docs and tests --- docs/timing.md | 69 ++++++++++++++++++++++++++------------ hercules/hercules_model.py | 18 +++++++--- hercules/utilities.py | 28 +++++++--------- tests/utilities_test.py | 13 ------- 4 files changed, 73 insertions(+), 55 deletions(-) diff --git a/docs/timing.md b/docs/timing.md index 33fc305d..4a9233ec 100644 --- a/docs/timing.md +++ b/docs/timing.md @@ -30,7 +30,7 @@ instantaneous convention. The `interpolate_df` function in `utilities.py` accepts a mandatory `interpolation_method` parameter that controls how numeric columns are -resampled onto the simulation time grid. Three methods are available: +resampled onto the simulation time grid. Two methods are available: #### `"averaged_to_instantaneous"` (wind, solar, and similar resource and power signals) @@ -60,25 +60,6 @@ time value Querying at 13:00 yields 150 (halfway between midpoints). ``` -#### `"zoh_to_instantaneous"` (LMP, external signals) - -Input values are piecewise-constant (zero-order hold) with timestamps at the -start of each interval. Each query time receives the value of the last -original timestamp at or before it -- the value is held constant until the -next timestamp. This is appropriate for signals like locational marginal -prices (LMP) that change in discrete steps. - -``` -Input file: - -time_utc value -12:00 100 ← held constant over [12:00, 13:00) -13:00 200 ← held constant over [13:00, 14:00) - -Querying at 12:30 yields 100. -Querying at 13:00 yields 200. -``` - #### `"instantaneous_to_instantaneous"` Input values already represent instantaneous measurements at their @@ -87,11 +68,46 @@ original timestamps with no midpoint shift. --- -In all three methods, datetime columns (e.g. `time_utc`) are linearly +In both methods, datetime columns (e.g. `time_utc`) are linearly interpolated on the raw timestamps without any shift, because they are instantaneous coordinate mappings between simulation time and wall-clock time, not period-averaged measurements. +#### Achieving zero-order-hold (ZOH) behaviour + +`interpolate_df` does not provide a dedicated zero-order-hold mode. If you +need step/piecewise-constant semantics -- for example, LMP prices that +should be held constant across each reporting interval -- pre-process your +input data to include an additional row at the end of each interval that +carries the same value as the start-of-interval row, and then use +`"instantaneous_to_instantaneous"`. Linear interpolation between each pair +of identical endpoints reproduces the ZOH shape. + +``` +Original data (start-of-interval only): + +time_utc value +12:00 100 +13:00 200 + +After inserting end-of-interval rows (just before the next start): + +time_utc value +12:00 100 +12:59:59 100 ← added endpoint +13:00 200 +13:59:59 200 ← added endpoint + +Querying at 12:30 with "instantaneous_to_instantaneous" yields 100. +Querying at 13:00 yields 200. +``` + +See +[`generate_locational_marginal_price_dataframe_from_gridstatus`](../hercules/grid/grid_utilities.py) +in `hercules/grid/grid_utilities.py` for a worked example of this +endpoint-insertion pattern (it shifts a copy of the data by `dt - 1` seconds +and merges it back in before handing the frame to Hercules). + ## Input Requirements All Hercules input files must specify start and end times using UTC datetime strings: @@ -200,7 +216,16 @@ Both wind and solar input CSV/Feather/Parquet files must contain a `time_utc` co ### External Data (LMP, etc.) -External data files loaded via `_read_external_data_file` are interpolated with `"zoh_to_instantaneous"` (zero-order hold), which is appropriate for signals like LMP prices that are piecewise-constant over each interval rather than time-averaged. +External data files loaded via `_read_external_data_file` are upsampled onto +the simulation time grid with `"instantaneous_to_instantaneous"` (linear +interpolation between the supplied timestamps). If you want zero-order-hold +(piecewise-constant) behaviour for signals like LMP prices, pre-process the +file to include end-of-interval rows that repeat the previous value as +described in [Achieving zero-order-hold (ZOH) behaviour](#achieving-zero-order-hold-zoh-behaviour). +The helper +[`generate_locational_marginal_price_dataframe_from_gridstatus`](../hercules/grid/grid_utilities.py) +in `hercules/grid/grid_utilities.py` is a concrete example of adding those +endpoint rows for LMP data. ```text time_utc,wd_mean,ws_000,ws_001,ws_002 diff --git a/hercules/hercules_model.py b/hercules/hercules_model.py index 731b0f80..0b4a3f28 100644 --- a/hercules/hercules_model.py +++ b/hercules/hercules_model.py @@ -173,10 +173,18 @@ def _read_external_data_file(self, filename): Read and interpolate external data from a CSV, feather, or pickle file. This method reads external data from the specified file (CSV, feather, or - pickle) and interpolates it onto the simulation time grid using zero-order - hold (``"zoh_to_instantaneous"``). ZOH is appropriate because external - signals such as LMP prices are piecewise-constant over each reporting - interval, unlike time-averaged weather data used by wind/solar components. + pickle) and upsamples it onto the simulation time grid using + ``"instantaneous_to_instantaneous"`` (linear interpolation between the + values at the supplied timestamps). + + If zero-order-hold (piecewise-constant / step) behavior is desired -- + for example, LMP prices that should be held constant across each + reporting interval -- the external data file must be pre-processed to + include an additional row at the end of each interval carrying the + same value. Linear interpolation between each pair of identical + endpoints then reproduces the ZOH shape. See + ``hercules.grid.grid_utilities.generate_locational_marginal_price_dataframe_from_gridstatus`` + for a worked example of this endpoint-insertion pattern. The external data must include a ``time_utc`` column which will be converted to simulation time. The interpolated data is stored in @@ -222,7 +230,7 @@ def _read_external_data_file(self, filename): # Interpolate using the utility function df_interpolated = interpolate_df( - df_ext, new_times, interpolation_method="zoh_to_instantaneous" + df_ext, new_times, interpolation_method="instantaneous_to_instantaneous" ) # Convert interpolated DataFrame to dictionary format diff --git a/hercules/utilities.py b/hercules/utilities.py index ae87012a..dcda7426 100644 --- a/hercules/utilities.py +++ b/hercules/utilities.py @@ -450,7 +450,6 @@ def close_logging(logger): _VALID_INTERPOLATION_METHODS = { "averaged_to_instantaneous", - "zoh_to_instantaneous", "instantaneous_to_instantaneous", } @@ -465,10 +464,6 @@ def interpolate_df(df, new_time, interpolation_method): timestamps mark the **start** of each period. Each value is assigned to the midpoint of its interval and then linearly interpolated. Use for wind speed, solar irradiance, and similar time-averaged signals. - - ``"zoh_to_instantaneous"``: Input values are piecewise-constant - (zero-order hold) with timestamps at the start of each interval. Each - query time receives the value of the last original timestamp at or - before it. Use for LMP prices and other step-change signals. - ``"instantaneous_to_instantaneous"``: Input values already represent instantaneous measurements. Standard linear interpolation is performed directly on the original timestamps with no midpoint shift. @@ -477,11 +472,21 @@ def interpolate_df(df, new_time, interpolation_method): the raw timestamps regardless of the chosen method, because they map simulation time to wall-clock time directly. + Note: + A dedicated zero-order-hold (ZOH) mode is intentionally not provided. + If you need step/piecewise-constant behaviour (e.g. LMP prices that + should be held constant across each reporting interval), pre-process + the input DataFrame to include an extra row at the end of each + interval carrying the same value, and then call this function with + ``"instantaneous_to_instantaneous"``. Linear interpolation between + each pair of identical endpoints reproduces the ZOH shape. See + ``hercules.grid.grid_utilities.generate_locational_marginal_price_dataframe_from_gridstatus`` + for an example of this endpoint-insertion pattern. + Args: df (pd.DataFrame): DataFrame with 'time' column and data columns. new_time (array-like): New time points for interpolation. - interpolation_method (str): One of ``"averaged_to_instantaneous"``, - ``"zoh_to_instantaneous"``, or + interpolation_method (str): One of ``"averaged_to_instantaneous"`` or ``"instantaneous_to_instantaneous"``. Returns: @@ -519,14 +524,7 @@ def interpolate_df(df, new_time, interpolation_method): for col in numeric_cols: col_values = df_pl[col].to_numpy() - if interpolation_method == "zoh_to_instantaneous": - indices = np.searchsorted(time_values, new_time, side="right") - 1 - indices = np.clip(indices, 0, len(col_values) - 1) - interpolated_values = col_values[indices].astype(hercules_float_type) - else: - interpolated_values = np.interp(new_time, x_coords, col_values).astype( - hercules_float_type - ) + interpolated_values = np.interp(new_time, x_coords, col_values).astype(hercules_float_type) result_pl = result_pl.with_columns(pl.lit(interpolated_values).alias(col)) # Process datetime columns diff --git a/tests/utilities_test.py b/tests/utilities_test.py index bb079af1..b25a72fd 100644 --- a/tests/utilities_test.py +++ b/tests/utilities_test.py @@ -82,19 +82,6 @@ def test_downsampling(): assert np.allclose(result["value"], expected_values) -def test_zoh_interpolation(): - """Test zero-order hold interpolation with interpolate_df. - - Each query time should receive the value at the last original - timestamp at or before it (piecewise-constant / step behaviour). - """ - df = pd.DataFrame({"time": [0, 5, 10], "value": [100, 200, 300]}) - new_time = np.array([0, 2, 5, 7, 10, 12]) - result = interpolate_df(df, new_time, interpolation_method="zoh_to_instantaneous") - expected = [100, 100, 200, 200, 300, 300] - assert np.allclose(result["value"], expected) - - def test_datetime_interpolation(): """ Test interpolation of datetime columns with interpolate_df function. From fcc046d69c00605590e47e636ec2f892ffc1dde3 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Tue, 21 Apr 2026 15:26:31 -0600 Subject: [PATCH 16/19] fix docs --- docs/hercules_input.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/hercules_input.md b/docs/hercules_input.md index 5b5764d0..982066ed 100644 --- a/docs/hercules_input.md +++ b/docs/hercules_input.md @@ -131,7 +131,7 @@ The old format is still supported for backward compatibility but will show a dep ### External Data File Format The CSV file must contain: -- A `time_utc` column with UTC timestamps in ISO 8601 format. Each timestamp marks the **start of a reporting period**; values on that row are treated as period averages. See [Time Interpretation](timing.md#time-interpretation-inputs-vs-internal-values) for how Hercules converts these to instantaneous values. +- A `time_utc` column with UTC timestamps in ISO 8601 format. Unlike wind/solar/SCADA/playback inputs (which are treated as start-of-period period averages), external data values are treated as **instantaneous** samples at their timestamps and are upsampled to the simulation time grid via `"instantaneous_to_instantaneous"` (linear interpolation). If you need zero-order-hold (piecewise-constant) behaviour -- e.g. for LMP prices -- pre-process the file to include an extra row at the end of each interval carrying the previous value; see [Achieving zero-order-hold (ZOH) behaviour](timing.md#achieving-zero-order-hold-zoh-behaviour) and the [`generate_locational_marginal_price_dataframe_from_gridstatus`](../hercules/grid/grid_utilities.py) helper. - One or more data columns with external signals. Note that the names of the other columns are arbitrary; any column names will be carried forward and interpolated. However, the values must be floats. Additionally, some controllers and plotting utilities that work on external signals may require specific column names like `lmp_rt`, `lmp_da`, `wind_forecast`, etc. Example `lmp_data.csv`: From d872b4b3fdd3a63d1df3a2bfbe0d1d0b909fbb2f Mon Sep 17 00:00:00 2001 From: paulf81 Date: Tue, 21 Apr 2026 15:27:09 -0600 Subject: [PATCH 17/19] fix --- docs/timing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/timing.md b/docs/timing.md index 4a9233ec..29e8f5f6 100644 --- a/docs/timing.md +++ b/docs/timing.md @@ -76,7 +76,7 @@ time, not period-averaged measurements. #### Achieving zero-order-hold (ZOH) behaviour `interpolate_df` does not provide a dedicated zero-order-hold mode. If you -need step/piecewise-constant semantics -- for example, LMP prices that +need step/piecewise-constant values -- for example, LMP prices that should be held constant across each reporting interval -- pre-process your input data to include an additional row at the end of each interval that carries the same value as the start-of-interval row, and then use From b62f96a50bba2b13742ca5f77b8d457c2611d236 Mon Sep 17 00:00:00 2001 From: paulf81 Date: Tue, 21 Apr 2026 15:33:36 -0600 Subject: [PATCH 18/19] fix sorting --- hercules/utilities.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/hercules/utilities.py b/hercules/utilities.py index dcda7426..d7e7cf96 100644 --- a/hercules/utilities.py +++ b/hercules/utilities.py @@ -512,7 +512,11 @@ def interpolate_df(df, new_time, interpolation_method): else: numeric_cols.append(col) - df_pl = pl.from_pandas(df) + # Sort by "time" once up front so that np.interp (which requires + # strictly-increasing x-coordinates) sees monotonic input for every + # column. Applying the sort in one place keeps numeric and datetime + # columns consistently ordered. + df_pl = pl.from_pandas(df).sort("time") result_pl = pl.DataFrame({"time": new_time}) time_values = df_pl["time"].to_numpy() @@ -527,14 +531,9 @@ def interpolate_df(df, new_time, interpolation_method): interpolated_values = np.interp(new_time, x_coords, col_values).astype(hercules_float_type) result_pl = result_pl.with_columns(pl.lit(interpolated_values).alias(col)) - # Process datetime columns + # Process datetime columns (use the same sorted frame as numeric cols) for col in datetime_cols: - # Extract datetime data using Polars - col_data = df_pl.select(["time", col]).sort("time") - time_values = col_data["time"].to_numpy() - - # Convert datetime to timestamps for interpolation - datetime_values = col_data[col].to_pandas().astype("int64").values / 10**9 + datetime_values = df_pl[col].to_pandas().astype("int64").values / 10**9 # Interpolate timestamps (datetime precision doesn't need float32 constraint) interpolated_timestamps = np.interp(new_time, time_values, datetime_values) From 34d0053efcc636b939bdea037db34d10d1997b8b Mon Sep 17 00:00:00 2001 From: paulf81 Date: Tue, 21 Apr 2026 15:35:04 -0600 Subject: [PATCH 19/19] fix assertion --- tests/utilities_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/utilities_test.py b/tests/utilities_test.py index b25a72fd..cbc5f9d1 100644 --- a/tests/utilities_test.py +++ b/tests/utilities_test.py @@ -51,7 +51,10 @@ def test_upsampling(): # ... # Time 10 is in between last and second last midpoint, so value should be 9 expected_values = [0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - assert np.allclose(result["value"], expected_values), "Interpolated values should match y = x" + assert np.allclose(result["value"], expected_values), ( + "Interpolated values should match midpoint-corrected " + "averaged_to_instantaneous output with edge clamping" + ) def test_downsampling():