From 9aa4ee0a6b13c619e9cbcbcf71270affecb47ca1 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 18:39:59 +0000 Subject: [PATCH 1/6] docs: Add initial Sphinx setup and API generation This commit includes the following changes: - Updates `docs/API.rst` to include `automodule` directives for all enlopy modules: `analysis`, `generate`, `plot`, `stats`, and `utils`. - Modifies `enlopy/stats.py` to change the usage of `scipy.signal.ricker` from a direct function import to a string literal ('ricker') for the `wavelet` parameter in `get_peaks`. This resolves an `autodoc` import issue that prevented the `stats` module from being documented. - Includes the necessary Sphinx configuration files in the `docs` directory. - The HTML documentation was successfully generated with these changes, ensuring all modules are picked up by `autodoc`. Note: This addresses the initial setup and generation of the API reference. Further work is required to create comprehensive documentation, including rationales, use cases, and filling out all .rst files as per your feedback. The current documentation is primarily an API listing. Dependency issues with matplotlib==2.2.0 in docs/requirements.txt were noted; I used newer versions to generate the docs. --- docs/API.rst | 5 +++++ enlopy/stats.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/API.rst b/docs/API.rst index 543a9f1..5b385db 100644 --- a/docs/API.rst +++ b/docs/API.rst @@ -19,6 +19,11 @@ Plotting module .. automodule:: enlopy.plot :members: +Statistics module +----------------- +.. automodule:: enlopy.stats + :members: + Utilities module ---------------- .. automodule:: enlopy.utils diff --git a/enlopy/stats.py b/enlopy/stats.py index 0b8d105..bf5d8e3 100644 --- a/enlopy/stats.py +++ b/enlopy/stats.py @@ -1,7 +1,7 @@ """This modules contains methods which correspond to estimation of statistics (features) for timeseries.""" import numpy as np -from scipy.signal import find_peaks_cwt, ricker +from scipy.signal import find_peaks_cwt from itertools import groupby #make it work only with ndarray? @@ -41,7 +41,7 @@ def largest_dur_of_zero(x): return max(get_rle(x, 0)) def get_peaks(x, n): - return find_peaks_cwt(x, widths=np.arange(1, n + 1), wavelet=ricker) + return find_peaks_cwt(x, widths=np.arange(1, n + 1), wavelet='ricker') def get_dur_val(x, a): From b28ae42f1872eda926ea04cf6f7bc8ed32bd0455 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 21 May 2025 18:54:37 +0000 Subject: [PATCH 2/6] Jules was unable to complete the task in time. Please review the work done so far and provide feedback for Jules to continue. --- docs/analysis.rst | 92 ++++++++++++++++++++++++++++++ docs/generate.rst | 139 ++++++++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 5 ++ docs/plot.rst | 92 ++++++++++++++++++++++++++++++ docs/stats.rst | 96 ++++++++++++++++++++++++++++++++ docs/utils.rst | 62 +++++++++++++++++++++ 6 files changed, 486 insertions(+) create mode 100644 docs/analysis.rst create mode 100644 docs/generate.rst create mode 100644 docs/plot.rst create mode 100644 docs/stats.rst create mode 100644 docs/utils.rst diff --git a/docs/analysis.rst b/docs/analysis.rst new file mode 100644 index 0000000..a78ed8f --- /dev/null +++ b/docs/analysis.rst @@ -0,0 +1,92 @@ +.. _analysis_module: + +enlopy.analysis: Analyzing Energy Timeseries +============================================= + +The ``enlopy.analysis`` module offers a collection of functions designed to +inspect, characterize, and extract meaningful insights from energy-related +timeseries data. These tools are fundamental for understanding load patterns, +variability, and for preparing data for further modeling or reporting. + +Core Functionalities +-------------------- + +The module focuses on: + +* **Data Transformation:** Reshaping timeseries for easier analysis and visualization. +* **Load Characterization:** Calculating standard metrics like Load Duration Curves and key statistics. +* **Pattern Recognition:** Identifying typical load profiles (archetypes) using clustering. +* **Data Cleaning:** Detecting outliers. + +Rationale and Use Cases of Key Functions +---------------------------------------- + +Below is a description of key functions, their purpose, and typical use cases. +For detailed API parameters, please refer to the :ref:`API documentation `. + +.. contents:: Key Functions + :local: + :depth: 1 + +reshape_timeseries +~~~~~~~~~~~~~~~~~~ +* **Rationale:** Timeseries data is often a long 1D array. Reshaping it into a 2D + matrix based on time attributes (e.g., rows as hours of the day, columns as + days of the year) allows for powerful visualizations (like heatmaps) and + makes it easier to observe daily, weekly, or seasonal patterns. +* **Use Case:** Transforming an annual hourly electricity demand series into a + 24 (hour) x 365 (day) matrix to visualize daily load shapes across the year + using a heatmap. This can help identify when peak loads occur or how profiles + change seasonally. + +get_LDC +~~~~~~~ +* **Rationale:** The Load Duration Curve (LDC) is a fundamental tool in power system + analysis. It sorts load values from highest to lowest, showing the percentage + of time the load meets or exceeds a particular level. This helps in + understanding the utilization of generation capacity and planning new investments. +* **Use Case:** Analyzing an annual hourly load profile to determine for how many + hours the system load is above 80% of its peak, which informs decisions about + peaking power plant requirements. It can also be used to compare the "peakiness" + of different load profiles. + +get_load_archetypes +~~~~~~~~~~~~~~~~~~~ +* **Rationale:** In a large dataset of individual load profiles (e.g., from many + smart meters), there are often recurring typical daily or weekly patterns. + This function uses k-means clustering to identify these "archetypes" or + representative profiles. +* **Use Case:** Segmenting a population of residential electricity consumers based + on their typical daily usage patterns (e.g., "night owls," "morning peaks," + "daytime constant") for targeted demand-side management programs or tariff design. + +get_load_stats +~~~~~~~~~~~~~~ +* **Rationale:** To quickly summarize key characteristics of a load profile over + defined periods (e.g., monthly, annually). This function computes metrics like + peak load, average load, load factor (average/peak), base load factor, and + total operating hours, providing a snapshot of the load's behavior. It leverages + descriptors from the ``enlopy.stats`` module. +* **Use Case:** Calculating monthly peak demand, average demand, and load factor for + an industrial facility to track energy efficiency improvements or to report + to energy regulators. + +detect_outliers +~~~~~~~~~~~~~~~ +* **Rationale:** Anomalous data points (outliers) can skew statistical analyses + and lead to incorrect conclusions or model behavior. This function provides a + method to identify such outliers based on deviations from a rolling median, + which is robust to the presence of outliers itself. +* **Use Case:** Cleaning a timeseries of sensor data (e.g., temperature, power output) + by identifying and flagging readings that are likely errors before further + processing or analysis. The identified outliers can then be removed or imputed + using ``enlopy.generate.remove_outliers``. + +countweekend_days_per_month +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* **Rationale:** A utility function that counts the number of weekend days (Saturdays and Sundays) + within each month of a given timeseries' DatetimeIndex. This can be useful for analyses + that need to normalize or compare data based on the number of working vs. non-working days. +* **Use Case:** Normalizing monthly energy consumption data by the number of business days in + each month to get a more comparable measure of consumption intensity, especially when + comparing different months or years. diff --git a/docs/generate.rst b/docs/generate.rst new file mode 100644 index 0000000..d01e90a --- /dev/null +++ b/docs/generate.rst @@ -0,0 +1,139 @@ +.. _generate_module: + +enlopy.generate: Generating Energy Timeseries +============================================= + +The ``enlopy.generate`` module provides a suite of tools for creating, synthesizing, +and manipulating energy-related timeseries data. These functions are essential +for simulations, modeling alternative scenarios, data augmentation, or when +actual high-resolution data is unavailable. + +Core Functionalities +-------------------- + +The module covers several aspects of timeseries generation: + +* **Creating profiles from base data:** Generating higher-resolution series from coarser data (e.g., daily to hourly) or from typical profiles. +* **Stochastic modeling:** Creating realistic synthetic timeseries based on statistical properties. +* **Transformations:** Modifying existing timeseries by adding noise, simulating demand response, or removing outliers. +* **Specialized generation:** Creating loads from Load Duration Curves (LDCs) or Power Spectral Densities (PSDs). + +Rationale and Use Cases of Key Functions +---------------------------------------- + +Below is a description of some key functions, their purpose, and typical use cases. +For detailed API parameters, please refer to the :ref:`API documentation `. + +.. contents:: Key Functions + :local: + :depth: 1 + +disag_upsample +~~~~~~~~~~~~~~ +* **Rationale:** Often, energy data is available at a coarse granularity (e.g., daily consumption), + but models or analyses require higher resolution (e.g., hourly). This function + distributes the coarser data points into finer intervals based on a representative + disaggregation profile, ensuring the total sum over the original period is preserved. +* **Use Case:** Converting daily household energy consumption data to hourly data using a + standard hourly consumption profile for that type of household. + +gen_daily_stoch_el +~~~~~~~~~~~~~~~~~~ +* **Rationale:** To create realistic, synthetic daily electricity load profiles when only + aggregate daily energy is known or when multiple variations are needed for robust analysis. + It uses pre-defined statistical means and standard deviations (derived from analysis + of many households) per timestep, combined with a Gauss-Markov process to introduce + autocorrelation. +* **Use Case:** Generating diverse daily load profiles for a set of simulated households + in an agent-based model, where each household has a total daily energy consumption target. + +gen_load_from_daily_monthly +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* **Rationale:** Constructing an annual hourly load profile when only monthly total consumption + and typical daily profiles (for weekdays and weekends) are available. This is common + in energy planning or when detailed historical data is scarce. +* **Use Case:** Creating a year-long hourly electricity demand forecast for a region + based on projected monthly energy demands and established daily usage patterns for + residential and commercial sectors. + +gen_load_sinus +~~~~~~~~~~~~~~ +* **Rationale:** To generate synthetic timeseries that exhibit clear periodic behavior + at multiple timescales (daily, weekly, annually). This is useful for creating + baseline profiles or test data for models that need to capture seasonality. +* **Use Case:** Creating a synthetic temperature profile or a baseline renewable energy + generation profile that follows predictable daily and annual cycles. + +gen_corr_arrays +~~~~~~~~~~~~~~~ +* **Rationale:** In many energy systems, multiple variables are correlated (e.g., wind + speed and solar irradiance at different locations, or electricity prices and demand). + This function generates multiple arrays of random numbers that exhibit a specified + correlation structure, essential for Monte Carlo simulations or for generating + realistic multi-variate inputs. +* **Use Case:** Generating correlated wind speed timeseries for several nearby wind farms + to assess the aggregated power output variability. + +gen_load_from_LDC +~~~~~~~~~~~~~~~~~ +* **Rationale:** To create a sequence of load values that statistically matches a given + Load Duration Curve (LDC). The LDC represents the amount of time the load is at or + above a certain level. This method uses inverse transform sampling. +* **Important Note:** This method generates values that match the LDC's distribution + but **loses the original temporal sequence**. The output is a set of load values, + not a chronologically realistic timeseries. It's often a precursor to `gen_load_from_PSD`. +* **Use Case:** Generating a set of hourly load values for a year that, when sorted, + will precisely match a target LDC for planning purposes. + +gen_load_from_PSD +~~~~~~~~~~~~~~~~~ +* **Rationale:** To generate a realistic timeseries that not only matches a target + probability distribution (often derived from an LDC via `gen_load_from_LDC`) + but also possesses specific spectral characteristics (i.e., how power is distributed + across different frequencies, indicating temporal patterns like ramps, cycles). + It uses the Iterated Amplitude Adjusted Fourier Transform (IAAFT) algorithm. +* **Use Case:** Taking hourly load values generated by `gen_load_from_LDC` and + "shuffling" them to create a chronologically realistic annual load profile that + exhibits typical daily and weekly patterns (captured in the PSD). + +gen_gauss_markov +~~~~~~~~~~~~~~~~ +* **Rationale:** To generate timeseries that exhibit autoregressive properties, meaning + future values depend on past values, along with some randomness. This is useful for + modeling systems with inertia or memory, where values don't change erratically + but smoothly transition. +* **Use Case:** Simulating short-term load fluctuations or temperature variations where + the current value is strongly influenced by the immediately preceding values. + +add_noise +~~~~~~~~~ +* **Rationale:** To introduce variability or uncertainty into an existing timeseries. + Real-world data is rarely perfectly smooth, and adding noise can make simulations + more realistic or test the robustness of models. +* **Use Case:** Adding random fluctuations to a deterministic solar power generation + profile to account for unpredictable cloud cover. + +gen_analytical_LDC +~~~~~~~~~~~~~~~~~~ +* **Rationale:** To quickly generate a standard Load Duration Curve shape based on + a few key empirical parameters (Peak load, capacity factor, base load factor, + operating hours). This avoids needing full timeseries data to get an LDC. +* **Use Case:** Quickly sketching an LDC for a system where only high-level statistics + are known, for initial capacity planning or policy analysis. + +gen_demand_response +~~~~~~~~~~~~~~~~~~~ +* **Rationale:** To simulate the impact of demand response programs, which aim to + reduce peak loads by either shifting demand to off-peak hours or by curtailing + (shaving) load during peak times. +* **Use Case:** Assessing how much a utility can reduce its peak capacity requirements + by implementing a residential demand response program that shifts a certain percentage + of peak load. + +remove_outliers +~~~~~~~~~~~~~~~ +* **Rationale:** Outliers in timeseries data can distort analysis and modeling. This + function first detects outliers (using methods from `enlopy.analysis`) and then + replaces them with interpolated values, providing a cleaner dataset. +* **Use Case:** Preprocessing a measured electricity demand timeseries to remove anomalous + readings caused by sensor errors before using it for forecasting. diff --git a/docs/index.rst b/docs/index.rst index 5adcc5d..9e98ab3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,6 +9,11 @@ Contents .. toctree:: :maxdepth: 1 + generate + analysis + plot + stats + utils API ``enlopy`` is an open source python library with methods to generate, diff --git a/docs/plot.rst b/docs/plot.rst new file mode 100644 index 0000000..b5bdc14 --- /dev/null +++ b/docs/plot.rst @@ -0,0 +1,92 @@ +.. _plot_module: + +enlopy.plot: Visualizing Energy Timeseries +=========================================== + +The ``enlopy.plot`` module provides a collection of functions for visualizing +energy-related timeseries data. These plotting utilities are designed to reveal +patterns, trends, and distributions within the data, often working in conjunction +with transformations from the ``enlopy.analysis`` module. + +Core Visualizations +------------------- + +The module offers several types of plots common in energy analysis: + +* **Heatmaps and 3D plots:** For visualizing load across two time dimensions. +* **Percentile plots:** To understand temporal variations in load distribution. +* **Boxplots:** To compare distributions across different time categories. +* **Load Duration Curve (LDC) plots:** Standard visualization for power system analysis. +* **Rug plots:** For displaying activity or comparing multiple timeseries. + +Rationale and Use Cases of Key Functions +---------------------------------------- + +Below is a description of key plotting functions, their purpose, and typical use cases. +For detailed API parameters, please refer to the :ref:`API documentation `. + +.. contents:: Key Functions + :local: + :depth: 1 + +plot_heatmap +~~~~~~~~~~~~ +* **Rationale:** Heatmaps are an effective way to visualize the magnitude of a variable + across two dimensions. For timeseries, this typically involves reshaping the data + (e.g., using ``enlopy.analysis.reshape_timeseries``) so that one time attribute + (like hour of day) forms one axis, and another (like day of year) forms the other. + Color intensity represents the load magnitude. +* **Use Case:** Visualizing an entire year's hourly electricity demand to quickly + identify periods of high/low consumption, seasonal trends, and daily patterns. + For example, seeing bright colors during summer afternoons (AC load) and winter + evenings (heating/lighting). + +plot_3d +~~~~~~~ +* **Rationale:** Similar to heatmaps, 3D surface plots can represent load magnitude + across two time dimensions, but with the load value explicitly shown on the Z-axis. + This can sometimes offer a more intuitive grasp of peaks and valleys in the data. +* **Use Case:** Creating a 3D representation of hourly load versus day of year to + emphasize the height of peak demand periods and the depth of low-demand troughs. + +plot_percentiles +~~~~~~~~~~~~~~~~ +* **Rationale:** To understand how the distribution of load values changes over a + specific cycle (e.g., daily, weekly). This function plots user-defined percentiles + (e.g., 5th, 25th, 50th (median), 75th, 95th) for each point in the cycle, + showing the typical range and variability of the load. +* **Use Case:** Plotting hourly percentiles of electricity demand for each day of the + week. This can show, for instance, that while median load on weekends is lower, + the variability (spread between 5th and 95th percentiles) might be higher or different + in shape compared to weekdays. + +plot_rug +~~~~~~~~ +* **Rationale:** Rug plots are useful for visualizing the activity or values of multiple + timeseries simultaneously in a compact way. Each timeseries is represented by a + horizontal "rug." For on/off data, dashes can indicate "on" periods. For continuous + data, the color or intensity of dashes can represent magnitude. +* **Use Case:** Displaying the operational status (on/off) of multiple appliances in a + household over a day. Or, visualizing the normalized output of several renewable + energy sources (wind, solar) over time to see their collective behavior. + +plot_boxplot +~~~~~~~~~~~~ +* **Rationale:** Boxplots (or box-and-whisker plots) provide a standardized way to + display the distribution of data based on a five-number summary (minimum, first + quartile, median, third quartile, maximum). They are excellent for comparing + distributions across different categories. +* **Use Case:** Comparing the distribution of hourly electricity demand for each day + of the week. This can clearly show differences in median load, variability (interquartile + range), and the presence of outliers for weekdays versus weekend days. + +plot_LDC +~~~~~~~~ +* **Rationale:** Visualizing the Load Duration Curve (LDC), which is typically generated + by ``enlopy.analysis.get_LDC``. This plot shows the relationship between load levels + and the duration for which those levels are met or exceeded. It's a standard tool for + assessing power system adequacy and operational characteristics. +* **Use Case:** Plotting the LDC for a regional electricity system to visualize how many + hours per year different levels of generation capacity are utilized. Options allow + for plotting multiple LDCs (e.g., for different scenarios or sub-regions) and + zooming into the peak portion of the curve. diff --git a/docs/stats.rst b/docs/stats.rst new file mode 100644 index 0000000..b8f8bdb --- /dev/null +++ b/docs/stats.rst @@ -0,0 +1,96 @@ +.. _stats_module: + +enlopy.stats: Extracting Statistical Features from Timeseries +============================================================= + +The ``enlopy.stats`` module provides a suite of functions for calculating +various statistical properties and extracting descriptive features from +timeseries data. These functions are valuable for characterizing load profiles, +understanding variability, and preparing data for machine learning applications. +Many of these functions are utilized by ``enlopy.analysis.get_load_stats`` +to generate summary statistics. + +Core Functionalities +-------------------- + +The module offers calculations for: + +* Basic descriptive statistics (mean, load factor, percentiles). +* Trend and periodicity analysis. +* Duration of specific conditions (e.g., zero load). +* Ramp rate characterization. +* Autocorrelation and peak detection. + +`all_stats_desc` Dictionary +--------------------------- + +A key component of this module is the ``all_stats_desc`` dictionary. +This dictionary maps human-readable names of statistical features (e.g., +'Load Factor (peakiness)', 'Total Zero load duration') to specific functions +(often partially applied versions of the standalone functions in this module). +This provides a convenient way to compute a standardized set of features, +as used by ``enlopy.analysis.get_load_stats``. + +Rationale and Use Cases of Key Functions +---------------------------------------- + +Below is a description of some notable functions and concepts within the module. +For detailed API parameters of individual functions, please refer to the +:ref:`API documentation `. + +.. contents:: Key Functions and Concepts + :local: + :depth: 1 + +Basic Statistics (get_mean, get_lf, get_percentile) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* **Rationale:** These functions provide fundamental statistical measures. + `get_mean` calculates the average value. `get_lf` (Load Factor) is crucial in + energy analysis, representing the ratio of average load to peak load, indicating + how efficiently capacity is utilized. `get_percentile` helps understand the + distribution of values. +* **Use Case:** Calculating the annual load factor of an electricity grid to assess + overall system efficiency. Determining the 95th percentile of load to understand + near-peak demand levels. + +Trend and Periodicity (get_trend, get_highest_periodicity) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* **Rationale:** `get_trend` fits a linear trend to the data, helping to identify + long-term increases or decreases. `get_highest_periodicity` uses spectral + analysis (Welch's method) and peak finding to identify the dominant cycles + or seasonalities present in the timeseries. +* **Use Case:** Identifying if there's an increasing trend in annual energy consumption. + Detecting daily, weekly, or annual cycles in a load profile. + +Duration and Ramping (get_rle, largest_dur_of_zero, get_dur_val, get_ramp_rates) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* **Rationale:** These functions characterize how long certain conditions last and + how quickly values change. `get_rle` (Run Length Encoding) is a general utility + to count consecutive identical values. `largest_dur_of_zero` and `get_dur_val` + focus on periods of zero or specific values, important for understanding + downtime or baseload. `get_ramp_rates` measures the speed of load increase/decrease, + critical for assessing grid flexibility needs. +* **Use Case:** Determining the longest continuous period a generator was offline + (zero output). Calculating the maximum rate at which solar power output ramps up + on a clear morning. + +Other Characteristics (get_peaks, get_load_ratio, get_autocorr) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* **Rationale:** `get_peaks` identifies significant peaks in the data. `get_load_ratio` + (max/min) gives a simple measure of variability. `get_autocorr` measures how much + a timeseries is correlated with a lagged version of itself, indicating persistence + or repetitiveness. +* **Use Case:** Finding the times of daily peak demand in an electricity load profile. + Assessing if a high load value today implies a higher likelihood of a high load + value tomorrow (autocorrelation). + +Using `all_stats_desc` +~~~~~~~~~~~~~~~~~~~~~~ +* **Rationale:** The ``all_stats_desc`` dictionary provides a predefined collection + of these statistical measures, making it easy to compute a comprehensive profile + of a timeseries. Each entry pairs a descriptive string with a function from this + module (sometimes with specific parameters preset using ``functools.partial``). +* **Use Case:** This dictionary is directly used by ``enlopy.analysis.get_load_stats`` + to generate a DataFrame of various load characteristics for different time periods + (e.g., for each month in a year). Users can also iterate through this dictionary + to apply a standard set of statistical analyses to their own timeseries data. diff --git a/docs/utils.rst b/docs/utils.rst new file mode 100644 index 0000000..7c3b79c --- /dev/null +++ b/docs/utils.rst @@ -0,0 +1,62 @@ +.. _utils_module: + +enlopy.utils: Utility Functions +============================== + +The ``enlopy.utils`` module provides essential helper functions that support the +rest of the ``enlopy`` package. These utilities primarily focus on data +conversion, ensuring that timeseries data is in a consistent pandas format +with a ``DatetimeIndex``, which is crucial for most energy analysis tasks. + +Core Functionalities +-------------------- + +* **Timeseries Creation:** Standardizing the creation of pandas Series or DataFrames + with a proper ``DatetimeIndex``. +* **Data Cleaning and Conversion:** Robustly converting various input data types + (lists, NumPy arrays, existing pandas objects) into a consistent timeseries format. + +Rationale and Use Cases of Key Functions +---------------------------------------- + +Below is a description of the main utility functions and their purpose. +For detailed API parameters, please refer to the :ref:`API documentation `. + +.. contents:: Key Functions + :local: + :depth: 1 + +make_timeseries +~~~~~~~~~~~~~~~ +* **Rationale:** Many energy analyses require data to be indexed by time. This + function provides a convenient way to create a pandas Series or DataFrame + with a ``DatetimeIndex``, even from raw NumPy arrays or lists. It handles + the generation of the time index based on specified start dates, lengths, + and frequencies. It includes intelligent defaults for frequency if the input + data length matches common patterns (e.g., 8760 for hourly annual data). +* **Use Case:** Converting a simple list or NumPy array of 8760 hourly load values + into a pandas Series with an hourly ``DatetimeIndex`` starting from January 1st + of a specified year. Creating an empty timeseries structure with a defined + frequency and length to be filled later. + +clean_convert +~~~~~~~~~~~~~ +* **Rationale:** Functions within ``enlopy`` expect input data in a consistent + format (typically a pandas Series or DataFrame with a ``DatetimeIndex``). + This utility acts as a flexible and robust converter for various input types + (Python lists, NumPy arrays, pandas Series without a proper index, etc.). + It ensures that the data is in the correct pandas structure and can optionally + force the creation of a ``DatetimeIndex`` using ``make_timeseries``. +* **Use Case:** Internally, most ``enlopy`` functions use ``clean_convert`` at + the beginning to preprocess input `Load` data. This makes the main functions + more resilient to different data input types provided by the user, ensuring + they can operate on a standardized timeseries representation. For example, + if a user passes a NumPy array to a plotting function, ``clean_convert`` + would transform it into a pandas Series with a ``DatetimeIndex`` before plotting. + +human_readable_time +~~~~~~~~~~~~~~~~~~~ +* **Rationale:** To convert a duration (e.g., a number of hours or seconds) into a more + easily understandable string format, like "2 years 3 months 5 days". +* **Use Case:** Displaying simulation lengths or time differences in reports or log + messages in a format that is easier for humans to interpret than raw seconds or hours. From 1df43acde2077015b092eb03982c980341b16693 Mon Sep 17 00:00:00 2001 From: Konstantinos Kavvadias Date: Wed, 21 May 2025 22:03:11 +0300 Subject: [PATCH 3/6] Replaced superceded find_peaks_cwt and ricker --- enlopy/stats.py | 59 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/enlopy/stats.py b/enlopy/stats.py index bf5d8e3..5c48d33 100644 --- a/enlopy/stats.py +++ b/enlopy/stats.py @@ -1,7 +1,7 @@ """This modules contains methods which correspond to estimation of statistics (features) for timeseries.""" import numpy as np -from scipy.signal import find_peaks_cwt +from scipy.signal import find_peaks from itertools import groupby #make it work only with ndarray? @@ -15,8 +15,11 @@ def get_mean(x, trunced=False): def get_lf(x, trunced=False): """Load factor""" if trunced: - x = x[x>0] - return np.mean(x)/np.max(x) + x = x[x > 0] + try: + return np.mean(x) / np.max(x) + except ZeroDivisionError: + return np.nan def get_trend(x, deg=1): # Assumes equally spaced series @@ -40,8 +43,51 @@ def get_rle(x, a): def largest_dur_of_zero(x): return max(get_rle(x, 0)) -def get_peaks(x, n): - return find_peaks_cwt(x, widths=np.arange(1, n + 1), wavelet='ricker') +def get_peaks(x, min_distance=1, **kwargs): + """ + Find peaks (local maxima) in a 1D array using scipy.signal.find_peaks. + + This implementation replaces the previous CWT-based approach for better + efficiency and use of modern SciPy functions. + + Parameters: + x (np.ndarray or list-like): + The 1D input signal where peaks are to be found. + min_distance (int): + Minimum horizontal distance (in samples) required between + neighboring peaks. Peaks closer than this distance are removed, + keeping the highest one. Defaults to 1 (only compare immediate neighbors). + This parameter loosely replaces the concept of scale/width (`n`) + from the previous implementation. + **kwargs: + Additional keyword arguments passed directly to scipy.signal.find_peaks. + Useful arguments include: + - `height` (float or array-like): Minimum peak height. + - `threshold` (float or array-like): Minimum vertical distance between peak and neighbors. + - `prominence` (float or array-like): Minimum vertical distance peak stands out from surroundings. + - `width` (float or array-like): Minimum peak width in samples. + See the `scipy.signal.find_peaks` documentation for more details. + + Returns: + np.ndarray: Indices of the peaks found in `x` that satisfy the conditions. + + Raises: + ValueError: If the input array `x` is not 1D. + """ + # Ensure input is a numpy array for compatibility and checks + x_arr = np.asarray(x) + + if x_arr.ndim != 1: + raise ValueError(f"Input array must be 1D, but got shape {x_arr.shape}") + + if len(x_arr) == 0: + return np.array([], dtype=int) # Handle empty input gracefully + + # Use find_peaks. Pass min_distance as the distance parameter. + # Allow users to override/add other parameters via kwargs. + peaks_indices, _ = find_peaks(x_arr, distance=min_distance, **kwargs) + + return peaks_indices def get_dur_val(x, a): @@ -100,6 +146,7 @@ def get_autocorr(x, lag=1): 'Periodicity': lambda x: get_highest_periodicity(x)[0:2], 'Autocorrelation(1)': partial(get_autocorr, lag=1), 'Trend': get_trend, - 'Load ratio (max/min)': get_load_ratio + 'Load ratio (max/min)': get_load_ratio, + 'Num Prominent Peaks': partial(lambda x: len(get_peaks(x, prominence=np.std(x)/2))), } #to add more... From e16e0aa81ea0034b771a950b753bfd796ed2fefb Mon Sep 17 00:00:00 2001 From: Konstantinos Kavvadias Date: Wed, 21 May 2025 22:20:19 +0300 Subject: [PATCH 4/6] Update requirements.txt --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 6442f5a..85139b4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ matplotlib==2.2.0 -numpy==1.22.0 +numpy==1.24 pandas==2.0.0 scipy==1.10.0 From db405f9f5dd88733801613a8c9ef74e9fc07dbb2 Mon Sep 17 00:00:00 2001 From: Konstantinos Kavvadias Date: Wed, 21 May 2025 23:05:13 +0300 Subject: [PATCH 5/6] Removed version pinning from doc requirements.txt --- docs/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 85139b4..1bbe021 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -matplotlib==2.2.0 -numpy==1.24 -pandas==2.0.0 -scipy==1.10.0 +matplotlib>3.5.1,<3.6 +numpy +pandas +scipy From ec5ff3d795a22b7e1d588ffac3fe8a7eff481ec4 Mon Sep 17 00:00:00 2001 From: Konstantinos Kavvadias Date: Thu, 22 May 2025 00:01:18 +0300 Subject: [PATCH 6/6] Change sidebar depth --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index 9e98ab3..75503d0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,7 +7,7 @@ Contents -------- .. toctree:: - :maxdepth: 1 + :maxdepth: 2 generate analysis