diff --git a/LICENSES/XSIMD_LICENSE b/LICENSES/XSIMD_LICENSE new file mode 100644 index 0000000000000..eee7a54bc956b --- /dev/null +++ b/LICENSES/XSIMD_LICENSE @@ -0,0 +1,29 @@ +Copyright (c) 2016, Johan Mabille, Sylvain Corlay, Wolf Vollprecht and Martin Renou +Copyright (c) 2016, QuantStack +Copyright (c) 2018, Serge Guelton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/meson.build b/meson.build index d2874e85cc3a7..33790ca88fb3f 100644 --- a/meson.build +++ b/meson.build @@ -37,6 +37,7 @@ add_project_arguments( ) cc = meson.get_compiler('c') +cxx = meson.get_compiler('cpp') if cc.get_id() == 'msvc' # Tracking issue: https://github.com/pandas-dev/pandas/issues/63701 # Ignore some MSVC specific warnings: @@ -44,8 +45,9 @@ if cc.get_id() == 'msvc' # C4267: conversion from `size_t` to smaller type. # C4551: occurs due to Cython generating code with (void)func. # https://github.com/cython/cython/issues/3579 + # C4146: unary minus operator applied to unsigned type. Occurs in xsimd. add_project_arguments( - ['/wd4244', '/wd4267', '/wd4551'], + ['/wd4244', '/wd4267', '/wd4551', '/wd4146'], language: ['c', 'cpp'], ) endif diff --git a/pandas/_libs/include/pandas/parser/simd_scan.h b/pandas/_libs/include/pandas/parser/simd_scan.h new file mode 100644 index 0000000000000..77ba06e4b8c23 --- /dev/null +++ b/pandas/_libs/include/pandas/parser/simd_scan.h @@ -0,0 +1,40 @@ +/* +Copyright (c) 2026, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Minimum bytes the scanner can process in one call. Callers should +// fall through to the scalar path when fewer bytes remain. +#define PD_SCAN_MIN_BYTES 16 + +typedef struct pd_scanner pd_scanner; + +// Build a scanner that halts on any of `n` special bytes. Supported +// values for `n` are 2 (quoted-field scan) and 6 (unquoted-field scan). +// Returns NULL on allocation failure or unsupported `n`. +pd_scanner *pd_scanner_create(const char *chars, int n); + +// Free a scanner. Accepts NULL. +void pd_scanner_destroy(pd_scanner *scanner); + +// Returns the byte offset of the first special char in data[0..len), +// or `len` if no special char was found within full SIMD chunks. The +// trailing =14.2') + +subdir('simd') subdir('tslibs') @@ -93,8 +96,9 @@ libs_sources = { 'lib.pyx', 'src/parser/tokenizer.c', 'src/parser/fast_float_strtod.cpp', + 'src/parser/simd_scan.cpp', ], - 'deps': [fast_float_dep], + 'deps': [fast_float_dep, xsimd_dep], }, 'missing': {'sources': ['missing.pyx']}, 'pandas_datetime': { @@ -109,19 +113,21 @@ libs_sources = { 'sources': [ 'src/parser/tokenizer.c', 'src/parser/fast_float_strtod.cpp', + 'src/parser/simd_scan.cpp', 'src/parser/io.c', 'src/parser/pd_parser.c', ], - 'deps': [fast_float_dep], + 'deps': [fast_float_dep, xsimd_dep], }, 'parsers': { 'sources': [ 'parsers.pyx', 'src/parser/tokenizer.c', 'src/parser/fast_float_strtod.cpp', + 'src/parser/simd_scan.cpp', 'src/parser/io.c', ], - 'deps': [fast_float_dep, _khash_primitive_helper_dep], + 'deps': [fast_float_dep, xsimd_dep, _khash_primitive_helper_dep], }, '_ujson': { 'sources': [ diff --git a/pandas/_libs/simd/meson.build b/pandas/_libs/simd/meson.build new file mode 100644 index 0000000000000..21c7f59624bda --- /dev/null +++ b/pandas/_libs/simd/meson.build @@ -0,0 +1,32 @@ +# All architectures we might support +# Key is the architecture name used in file suffixes and macros +is_msvc_syntax = cxx.get_argument_syntax() == 'msvc' +simd_x86_flags = { + 'sse2': is_msvc_syntax ? ['/arch:SSE2'] : ['-msse2'], + 'avx2': is_msvc_syntax ? ['/arch:AVX2'] : ['-mavx2'], + 'avx512cd': is_msvc_syntax ? ['/arch:AVX512'] : ['-mavx512cd'], +} + +simd_config = configuration_data() +supported_simd_archs = {} +if host_machine.cpu_family() == 'aarch64' + supported_simd_archs += {'neon': []} + simd_config.set('PANDAS_HAVE_NEON', 1) +elif host_machine.cpu_family() in ['x86', 'x86_64'] + foreach name, flags : simd_x86_flags + if cxx.has_multi_arguments(flags) + supported_simd_archs += {name: flags} + simd_config.set('PANDAS_HAVE_@0@'.format(name.to_upper()), 1) + endif + endforeach +endif + +# Ensure scalar version on all architectures for now... +simd_config.set('PANDAS_HAVE_SCALAR', 1) + +configure_file( + output: 'pandas_simd_config.h', + configuration: simd_config, +) + +simd_config_inc = include_directories('.') diff --git a/pandas/_libs/src/parser/simd_scan.cpp b/pandas/_libs/src/parser/simd_scan.cpp new file mode 100644 index 0000000000000..bf2f7a14dd7dd --- /dev/null +++ b/pandas/_libs/src/parser/simd_scan.cpp @@ -0,0 +1,92 @@ +/* +Copyright (c) 2026, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#include "pandas/parser/simd_scan.h" + +#include + +#include +#include + +#if defined(_MSC_VER) +# include +#endif + +namespace { + +using batch_u8 = xsimd::batch; +constexpr std::size_t kStep = batch_u8::size; + +static_assert(kStep >= PD_SCAN_MIN_BYTES, + "xsimd batch must be at least 16 lanes wide"); + +static inline unsigned ctz64(std::uint64_t value) { +#if defined(_MSC_VER) + unsigned long index; + _BitScanForward64(&index, value); + return static_cast(index); +#else + return static_cast(__builtin_ctzll(value)); +#endif +} + +template +static inline std::size_t scan_impl(const batch_u8 *v, const char *data, + std::size_t len) { + const auto *p = reinterpret_cast(data); + std::size_t i = 0; + for (; i + kStep <= len; i += kStep) { + const auto chunk = batch_u8::load_unaligned(p + i); + auto mask = (chunk == v[0]); + for (int j = 1; j < N; ++j) { + mask = mask | (chunk == v[j]); + } + if (xsimd::any(mask)) { + return i + ctz64(mask.mask()); + } + } + return i; +} + +} // namespace + +struct pd_scanner { + batch_u8 v[6]; + int n; +}; + +extern "C" { + +pd_scanner *pd_scanner_create(const char *chars, int n) { + if (n != 2 && n != 6) + return nullptr; + auto *scanner = new (std::nothrow) pd_scanner; + if (!scanner) + return nullptr; + scanner->n = n; + for (int j = 0; j < n; ++j) { + scanner->v[j] = batch_u8::broadcast(static_cast(chars[j])); + } + return scanner; +} + +void pd_scanner_destroy(pd_scanner *scanner) { delete scanner; } + +size_t pd_scanner_scan(const pd_scanner *scanner, const char *data, + size_t len) { + switch (scanner->n) { + case 2: + return scan_impl<2>(scanner->v, data, len); + case 6: + return scan_impl<6>(scanner->v, data, len); + } + return len; +} + +} // extern "C" diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 5a717936b9b13..02189556eebb7 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -25,6 +25,7 @@ GitHub. See Python Software Foundation License and BSD licenses for these. #include #include +#include "pandas/parser/simd_scan.h" #include "pandas/portable.h" #include "pandas/vendored/klib/khash.h" // for kh_int64_t, kh_destroy_int64 @@ -583,6 +584,8 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes, ((!delim_whitespace && c == delimiter) || (delim_whitespace && isblank(c))) #define _TOKEN_CLEANUP() \ + pd_scanner_destroy(unquoted_scanner); \ + pd_scanner_destroy(quoted_scanner); \ self->stream_len = slen; \ self->datapos = i; @@ -633,7 +636,27 @@ static int tokenize_bytes(parser_t *self, uint64_t line_limit, const bool has_skip = (self->skipfunc != NULL || self->skipset != NULL || self->skip_first_N_rows >= 0); + // Build SIMD scanners over the chars that halt a bulk scan. Disabled + // features alias to lineterminator so the scanners always see 6/2 chars + // and the call sites stay branch-free. + const char unquoted_chars[6] = { + delimiter, + lineterminator, + has_carriage ? carriage_symbol : lineterminator, + (self->quoting != QUOTE_NONE) ? self->quotechar : lineterminator, + has_escape ? escape_symbol : lineterminator, + has_comment ? comment_symbol : lineterminator, + }; + const char quoted_chars[2] = { + (self->quoting != QUOTE_NONE) ? self->quotechar : lineterminator, + has_escape ? escape_symbol : lineterminator, + }; + pd_scanner *unquoted_scanner = pd_scanner_create(unquoted_chars, 6); + pd_scanner *quoted_scanner = pd_scanner_create(quoted_chars, 2); + if (make_stream_space(self, self->datalen - self->datapos) < 0) { + pd_scanner_destroy(unquoted_scanner); + pd_scanner_destroy(quoted_scanner); const size_t bufsize = 100; self->error_msg = malloc(bufsize); snprintf(self->error_msg, bufsize, "out of memory"); @@ -922,8 +945,23 @@ static int tokenize_bytes(parser_t *self, uint64_t line_limit, // normal character - save in field PUSH_CHAR(c); - // Bulk scan: copy remaining ordinary characters directly, - // bypassing the per-char state machine overhead. + // SIMD bulk scan: process a full SIMD chunk at a time, copying + // normal characters directly without state-machine overhead. + if (unquoted_scanner && !self->delim_whitespace) { + size_t remaining = self->datalen - (i + 1); + if (remaining >= PD_SCAN_MIN_BYTES) { + size_t skip = pd_scanner_scan(unquoted_scanner, buf, remaining); + if (skip > 0) { + memcpy(stream, buf, skip); + stream += skip; + slen += skip; + buf += skip; + i += skip; + } + } + } + // Scalar bulk scan fallback: copy remaining ordinary characters + // directly, bypassing the per-char state machine overhead. while (i + 1 < self->datalen && !(breaks_field_scan[(uint8_t)*buf] & 0x1)) { *stream++ = *buf++; @@ -950,8 +988,23 @@ static int tokenize_bytes(parser_t *self, uint64_t line_limit, // normal character - save in field PUSH_CHAR(c); - // Bulk scan: copy remaining ordinary characters directly, - // bypassing the per-char state machine overhead. + // SIMD bulk scan for quoted fields: only quote and escape + // chars are special, so use a lighter scan. + if (quoted_scanner) { + size_t remaining = self->datalen - (i + 1); + if (remaining >= PD_SCAN_MIN_BYTES) { + size_t skip = pd_scanner_scan(quoted_scanner, buf, remaining); + if (skip > 0) { + memcpy(stream, buf, skip); + stream += skip; + slen += skip; + buf += skip; + i += skip; + } + } + } + // Scalar bulk scan fallback: copy remaining ordinary characters + // directly, bypassing the per-char state machine overhead. while (i + 1 < self->datalen && !(breaks_field_scan[(uint8_t)*buf] & 0x2)) { *stream++ = *buf++; diff --git a/pandas/_libs/tslibs/meson.build b/pandas/_libs/tslibs/meson.build index ded333efd7e60..e60aa496941f7 100644 --- a/pandas/_libs/tslibs/meson.build +++ b/pandas/_libs/tslibs/meson.build @@ -14,8 +14,9 @@ tslibs_sources = { 'parsing.pyx', '../src/parser/tokenizer.c', '../src/parser/fast_float_strtod.cpp', + '../src/parser/simd_scan.cpp', ], - 'deps': [fast_float_dep], + 'deps': [fast_float_dep, xsimd_dep], }, 'period': {'sources': ['period.pyx']}, 'strptime': {'sources': ['strptime.pyx']}, diff --git a/pyproject.toml b/pyproject.toml index 9160526cf0f1a..40cfca5ac638b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ license-files = [ "LICENSES/PYUPGRADE_LICENSE", # MIT "LICENSES/SAS7BDAT_LICENSE", # MIT "LICENSES/ULTRAJSON_LICENSE", # BSD-3-Clause AND TCL + "LICENSES/XSIMD_LICENSE", # BSD-3-Clause "subprojects/fast_float-*/LICENSE-APACHE", # Apache-2.0 "subprojects/fast_float-*/LICENSE-BOOST", # BSL "subprojects/fast_float-*/LICENSE-MIT", # MIT diff --git a/subprojects/packagefiles/xsimd/meson.build b/subprojects/packagefiles/xsimd/meson.build new file mode 100644 index 0000000000000..595fb6122ef51 --- /dev/null +++ b/subprojects/packagefiles/xsimd/meson.build @@ -0,0 +1,12 @@ +project( + 'xsimd', + 'cpp', + meson_version: '>=0.58.0', + license: 'BSD-3-Clause', + version: '14.2.0', +) + +xsimd_inc = include_directories('include') + +xsimd_dep = declare_dependency(include_directories: xsimd_inc) +meson.override_dependency('xsimd', xsimd_dep) diff --git a/subprojects/xsimd.wrap b/subprojects/xsimd.wrap new file mode 100644 index 0000000000000..39706456be925 --- /dev/null +++ b/subprojects/xsimd.wrap @@ -0,0 +1,9 @@ +[wrap-file] +directory = xsimd-14.2.0 +source_url = https://github.com/xtensor-stack/xsimd/archive/refs/tags/14.2.0.tar.gz +source_filename = xsimd-14.2.0.tar.gz +source_hash = 21e841ab684b05331e81e7f782431753a029ef7b7d9d6d3ddab837e7782a40ee +patch_directory = xsimd + +[provide] +dependency_names = xsimd