Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
11043f8
perf: accumulate moments with multithreading
Alvaro-Kothe Mar 14, 2026
2c9de5c
fix: omit warnings when OpenMP not used
Alvaro-Kothe Mar 14, 2026
69786e8
fix: try to fix msvc error
Alvaro-Kothe Mar 14, 2026
3464b24
chore: rename output and don't assign 0.0 on else
Alvaro-Kothe Mar 14, 2026
a86820e
test: use almost_equal instead of strict equality
Alvaro-Kothe Mar 14, 2026
3485af7
perf: use AVX2 instructions
Alvaro-Kothe Mar 17, 2026
f7c0d85
wip: neon SIMD
Alvaro-Kothe Mar 19, 2026
c508a10
refactor: use vector extensions
Alvaro-Kothe Mar 20, 2026
e35e337
fix: fix musl build
Alvaro-Kothe Mar 20, 2026
7c5b6e9
fix: remove unnecessary undef
Alvaro-Kothe Mar 20, 2026
39fcf5f
chore: remove unused include
Alvaro-Kothe Mar 20, 2026
0406565
fix: check for vector attribute
Alvaro-Kothe Mar 20, 2026
bc951a4
fix: improve typedef for different platforms
Alvaro-Kothe Mar 20, 2026
c2c832a
chore: clarify vectorized in gcc and undefine v_select
Alvaro-Kothe Mar 20, 2026
1d7e108
perf: vectorize mask handling
Alvaro-Kothe Mar 20, 2026
40fd153
chore: remove inlines in favor of compiler discretion
Alvaro-Kothe Mar 20, 2026
7d3f286
chore: comments
Alvaro-Kothe Mar 20, 2026
6f9224e
fix: fix loop bounds and avoid unnecessary round
Alvaro-Kothe Mar 21, 2026
f3ba6ab
chore: add comments after endif
Alvaro-Kothe Mar 21, 2026
18566d2
refactor: use `size_t` instead of signed integer for array size
Alvaro-Kothe Mar 22, 2026
9254c3c
fix: cast type to size_t to avoid conversion warnings
Alvaro-Kothe Mar 22, 2026
933db46
add note about performance
Alvaro-Kothe Mar 22, 2026
5acaa4c
feat: make OpenMP optional and disabled by default
Alvaro-Kothe Mar 22, 2026
d62ac23
refactor: use feature instead of boolean for openmp
Alvaro-Kothe Mar 22, 2026
bc43aaa
perf: use OpenMP automatically
Alvaro-Kothe Mar 23, 2026
baf66c4
fix: use meson.options to fix the build error
Alvaro-Kothe Mar 23, 2026
893796f
docs(moments): remove outdated comment about MSVC
Alvaro-Kothe Mar 24, 2026
57d3cb1
fix: don't force compiler for vector extension
Alvaro-Kothe Mar 24, 2026
10e14e5
chore: better variable names in aggregations.pyx
Alvaro-Kothe Mar 24, 2026
1f0953c
refactor: simplify diff
Alvaro-Kothe Apr 3, 2026
f6c2e2d
perf: remove OpenMP
Alvaro-Kothe Apr 3, 2026
58ba947
fix: use long instead of int64_t for apple clang
Alvaro-Kothe Apr 3, 2026
7c2e83e
fix: fix some -Wconversion warnings
Alvaro-Kothe Apr 3, 2026
6269bdc
fix: fix types for 32 bit
Alvaro-Kothe Apr 3, 2026
2521665
revert test changes
Alvaro-Kothe Apr 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 24 additions & 31 deletions pandas/_libs/algos.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ from libc.math cimport (
from numpy cimport (
float64_t,
int64_t,
uint8_t,
)

from pandas._libs.dtypes cimport (
Expand All @@ -17,37 +18,29 @@ from pandas._libs.dtypes cimport (
cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) noexcept nogil


@cython.cdivision(True)
cdef inline void moments_add_value(
float64_t val,
int64_t* nobs,
float64_t* mean,
float64_t* m2,
float64_t* m3,
float64_t* m4,
int max_moment,
) noexcept nogil:
cdef:
float64_t n
float64_t delta = val - mean[0]
float64_t delta_n, term1

nobs[0] += 1
n = <float64_t>nobs[0]
delta_n = delta / n
term1 = delta * delta_n * (n - 1.0)

if max_moment >= 4:
m4[0] += delta_n * (
-4.0 * m3[0]
+ delta_n * (
6.0 * m2[0] + term1 * (n * n - 3.0 * n + 3.0)
)
)
if max_moment >= 3:
m3[0] += delta_n * (term1 * (n - 2.0) - 3.0 * m2[0])
m2[0] += term1
mean[0] += delta_n
cdef extern from "pandas/moments.h":
void moments_add_value(
float64_t val,
int64_t* nobs,
float64_t* mean,
float64_t* m2,
float64_t* m3,
float64_t* m4,
int max_moment,
) noexcept nogil

void accumulate_moments_scalar(
size_t n,
const float64_t* values,
bint skipna,
const uint8_t* mask,
int64_t* nobs,
float64_t* mean,
float64_t* m2,
float64_t* m3,
float64_t* m4,
int max_moment,
) noexcept nogil


@cython.cdivision(True)
Expand Down
53 changes: 20 additions & 33 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1600,33 +1600,6 @@ def diff_2d(
# ----------------------------------------------------------------------


@cython.boundscheck(False)
@cython.wraparound(False)
cdef void accumulate_moments_scalar(
const float64_t[:] values,
bint skipna,
const uint8_t[:] mask,
int64_t* nobs,
float64_t* mean,
float64_t* m2,
float64_t* m3,
float64_t* m4,
int max_moment,
) noexcept nogil:
cdef:
Py_ssize_t i, n = len(values)
bint uses_mask = mask is not None
float64_t val

for i in range(n):
val = values[i]
if uses_mask and mask[i]:
val = NaN
if skipna and isnan(val):
continue
moments_add_value(val, nobs, mean, m2, m3, m4, max_moment)


@cython.boundscheck(False)
@cython.wraparound(False)
cdef void accumulate_moments_axis(
Expand Down Expand Up @@ -1658,11 +1631,17 @@ cdef void accumulate_moments_axis(
nouter = nrows
ninner = ncols

# PERF: The outer dimension is embarrassingly parallel
for i in range(nouter):
if max_moment >= 3:
m3_ptr = &m3[i]
if max_moment >= 4:
m4_ptr = &m4[i]
# PERF: If it's possible to enforce contiguity in the last dimension,
# it's possible to use accumulate_moments_scalar with SIMD instructions.
# It may create more threads if OMP_NESTED is enabled and the outer loop
# is run in parallel.
# If it doesn't run in parallel, it will at least use SIMD instructions.
for j in range(ninner):
val = values[j, i] if axis == 0 else values[i, j]
if uses_mask and (mask[j, i] if axis == 0 else mask[i, j]):
Expand All @@ -1676,33 +1655,41 @@ cdef void accumulate_moments_axis(
@cython.boundscheck(False)
@cython.wraparound(False)
def scalar_skew(
const float64_t[:] values,
const float64_t[::1] values,
bint skipna,
const uint8_t[:] mask,
const uint8_t[::1] mask,
) -> float:
cdef:
int64_t nobs = 0
float64_t mean = 0.0, m2 = 0.0, m3 = 0.0
size_t n = <size_t>values.shape[0]
const float64_t* p_values = &values[0]
const uint8_t* p_mask = &mask[0] if mask is not None else NULL

with nogil:
accumulate_moments_scalar(values, skipna, mask, &nobs, &mean, &m2, &m3, NULL, 3)
accumulate_moments_scalar(n, p_values, skipna, p_mask,
&nobs, &mean, &m2, &m3, NULL, 3)

return calc_skew(nobs, m2, m3)


@cython.boundscheck(False)
@cython.wraparound(False)
def scalar_kurt(
const float64_t[:] values,
const float64_t[::1] values,
bint skipna,
const uint8_t[:] mask,
const uint8_t[::1] mask,
) -> float:
cdef:
int64_t nobs = 0
float64_t mean = 0.0, m2 = 0.0, m3 = 0.0, m4 = 0.0
size_t n = <size_t>values.shape[0]
const float64_t* p_values = &values[0]
const uint8_t* p_mask = &mask[0] if mask is not None else NULL

with nogil:
accumulate_moments_scalar(values, skipna, mask, &nobs, &mean, &m2, &m3, &m4, 4)
accumulate_moments_scalar(n, p_values, skipna, p_mask,
&nobs, &mean, &m2, &m3, &m4, 4)

return calc_kurt(nobs, m2, m4)

Expand Down
61 changes: 61 additions & 0 deletions pandas/_libs/include/pandas/moments.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
Copyright (c) 2026, PyData Development Team
All rights reserved.

Distributed under the terms of the BSD Simplified License.

The full license is in the LICENSE file, distributed with this software.
*/

#pragma once

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
// prevent name mangling
extern "C" {
#endif

// NOTE: It may be better to use the Moments struct
// instead of using multiple double references
// and a signed value to nobs.
typedef struct {
uint64_t n;
double mean;
double m2;
double m3;
double m4;
} Moments;

static inline void moments_add_value(double val, int64_t *nobs, double *mean,
double *m2, double *m3, double *m4,
int max_moment) {
double delta = val - *mean;
(*nobs)++;
double n = (double)(*nobs);
double delta_n = delta / n;
double term1 = delta * delta_n * (n - 1.0);

if (max_moment >= 4) {
*m4 += delta_n * (-4.0 * *m3 +
delta_n * (6.0 * *m2 + term1 * (n * n - 3.0 * n + 3.0)));
}
if (max_moment >= 3) {
*m3 += delta_n * (term1 * (n - 2.0) - 3.0 * *m2);
}
*m2 += term1;
*mean += delta_n;
}

Moments moments_merge(Moments a, Moments b, int max_moment);

/// Compute central moments until `max_moment` using `n` elements from `values`.
void accumulate_moments_scalar(size_t n, const double *values, bool skipna,
const uint8_t *mask, int64_t *nobs, double *mean,
double *m2, double *m3, double *m4,
int max_moment);
#ifdef __cplusplus
}
#endif
7 changes: 6 additions & 1 deletion pandas/_libs/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,12 @@ libs_sources = {
# Dict of extension name -> dict of {sources, include_dirs, and deps}
# numpy include dir is implicitly included
'algos': {
'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper],
'sources': [
'algos.pyx',
'src/moments.c',
_algos_common_helper,
_algos_take_helper,
],
'deps': [_khash_primitive_helper_dep, m_dep],
},
'arrays': {'sources': ['arrays.pyx']},
Expand Down
Loading
Loading