From e0a6007be32dfafc644396ce4671b23dcd24d417 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 9 Jun 2026 18:26:08 +0100 Subject: [PATCH 1/5] dict slice optimization Signed-off-by: Adam Gutglick --- vortex-array/src/arrays/dict/compute/slice.rs | 72 ++++++++++++++----- 1 file changed, 55 insertions(+), 17 deletions(-) diff --git a/vortex-array/src/arrays/dict/compute/slice.rs b/vortex-array/src/arrays/dict/compute/slice.rs index 509fa7535d8..f9a34b9c09f 100644 --- a/vortex-array/src/arrays/dict/compute/slice.rs +++ b/vortex-array/src/arrays/dict/compute/slice.rs @@ -3,6 +3,7 @@ use std::ops::Range; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use crate::ArrayRef; @@ -12,31 +13,32 @@ use crate::arrays::Constant; use crate::arrays::ConstantArray; use crate::arrays::Dict; use crate::arrays::DictArray; +use crate::arrays::Primitive; use crate::arrays::dict::DictArraySlotsExt; use crate::arrays::slice::SliceReduce; +use crate::expr::stats::Precision; +use crate::expr::stats::Stat; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; impl SliceReduce for Dict { fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { - let sliced_code = array.codes().slice(range)?; + if let Some(code) = array.codes().as_opt::() { + return slice_constant_code(array, code.scalar(), range.len()); + } + + let sliced_code = if let Some(codes) = array.codes().as_typed::() { + let sliced_code = ::slice(codes, range)? + .vortex_expect("Primitive SliceReduce should always return Some"); + inherit_slice_stats(array.codes(), &sliced_code); + sliced_code + } else { + array.codes().slice(range)? + }; + // TODO(joe): if the range is size 1 replace with a constant array if let Some(code) = sliced_code.as_opt::() { - let code = code.scalar().as_primitive().as_::(); - return if let Some(code) = code { - let values = array.values().slice(code..code + 1)?; - Ok(Some( - DictArray::new( - ConstantArray::new(0u8, sliced_code.len()).into_array(), - values, - ) - .into_array(), - )) - } else { - Ok(Some( - ConstantArray::new(Scalar::null(array.dtype().clone()), sliced_code.len()) - .into_array(), - )) - }; + return slice_constant_code(array, code.scalar(), sliced_code.len()); } // SAFETY: slicing the codes preserves invariants. Ok(Some( @@ -45,6 +47,42 @@ impl SliceReduce for Dict { } } +fn inherit_slice_stats(source: &ArrayRef, sliced: &ArrayRef) { + source.statistics().with_iter(|iter| { + sliced + .statistics() + .inherit(iter.filter(|(stat, value)| is_inheritable_true_slice_stat(*stat, value))); + }); +} + +fn is_inheritable_true_slice_stat(stat: Stat, value: &Precision) -> bool { + matches!( + stat, + Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted + ) && value + .as_ref() + .as_exact() + .is_some_and(|value| matches!(value, ScalarValue::Bool(true))) +} + +fn slice_constant_code( + array: ArrayView<'_, Dict>, + code: &Scalar, + len: usize, +) -> VortexResult> { + let code = code.as_primitive().as_::(); + if let Some(code) = code { + let values = array.values().slice(code..code + 1)?; + Ok(Some( + DictArray::new(ConstantArray::new(0u8, len).into_array(), values).into_array(), + )) + } else { + Ok(Some( + ConstantArray::new(Scalar::null(array.dtype().clone()), len).into_array(), + )) + } +} + #[cfg(test)] mod tests { use vortex_buffer::buffer; From 5afc71894d613bfe42c2ef786dc4c09994c875ad Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 9 Jun 2026 18:36:07 +0100 Subject: [PATCH 2/5] new benchmark Signed-off-by: Adam Gutglick --- vortex-array/benches/slice_dict_primitive.rs | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/vortex-array/benches/slice_dict_primitive.rs b/vortex-array/benches/slice_dict_primitive.rs index 79240bc0a0f..20a371e670e 100644 --- a/vortex-array/benches/slice_dict_primitive.rs +++ b/vortex-array/benches/slice_dict_primitive.rs @@ -8,7 +8,9 @@ use divan::Bencher; use vortex_array::ArrayRef; use vortex_array::IntoArray; use vortex_array::arrays::DictArray; +use vortex_array::arrays::Primitive; use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::slice::SliceReduce; fn main() { divan::main(); @@ -46,6 +48,29 @@ fn slice_primitive_tight_loop(bencher: Bencher, len: usize) { }); } +#[divan::bench(args = ARRAY_LENGTHS)] +fn slice_primitive_reduce_tight_loop(bencher: Bencher, len: usize) { + let arr = build_primitive(len); + let slice_len = 64; + + let num_slices = len / slice_len; + + bencher + .with_inputs(|| (&arr, Vec::::with_capacity(num_slices))) + .bench_refs(|(arr, out)| { + out.clear(); + let mut offset = 0; + while offset + slice_len <= len { + out.push( + ::slice(arr.as_view(), offset..offset + slice_len) + .unwrap() + .unwrap(), + ); + offset += slice_len; + } + }); +} + #[divan::bench(args = ARRAY_LENGTHS)] fn slice_dict_tight_loop(bencher: Bencher, len: usize) { let dict = build_dict(len).into_array(); From d57a42d70f44e6a145a86da5a6ac7d1800c1b21f Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 9 Jun 2026 18:41:16 +0100 Subject: [PATCH 3/5] Furthur optimizations Signed-off-by: Adam Gutglick --- vortex-array/src/array/erased.rs | 13 +++++------ .../src/arrays/primitive/compute/slice.rs | 22 +++++++++---------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index 76031a1fee2..da167d8d9a2 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -47,13 +47,13 @@ use crate::arrays::VarBinView; use crate::buffer::BufferHandle; use crate::builders::ArrayBuilder; use crate::dtype::DType; -use crate::dtype::Nullability; use crate::expr::stats::Precision; use crate::expr::stats::Stat; use crate::expr::stats::StatsProviderExt; use crate::matcher::Matcher; use crate::optimizer::ArrayOptimizer; use crate::scalar::Scalar; +use crate::scalar::ScalarValue; use crate::stats::StatsSetRef; use crate::validity::Validity; @@ -239,13 +239,10 @@ impl ArrayRef { matches!( stat, Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted - ) && value.as_ref().as_exact().is_some_and(|v| { - Scalar::try_new(DType::Bool(Nullability::NonNullable), Some(v.clone())) - .vortex_expect("A stat that was expected to be a boolean stat was not") - .as_bool() - .value() - .unwrap_or_default() - }) + ) && value + .as_ref() + .as_exact() + .is_some_and(|v| matches!(v, ScalarValue::Bool(true))) })); }); } diff --git a/vortex-array/src/arrays/primitive/compute/slice.rs b/vortex-array/src/arrays/primitive/compute/slice.rs index 1830ef3f8c6..d3a04e877e3 100644 --- a/vortex-array/src/arrays/primitive/compute/slice.rs +++ b/vortex-array/src/arrays/primitive/compute/slice.rs @@ -11,19 +11,19 @@ use crate::array::ArrayView; use crate::arrays::Primitive; use crate::arrays::PrimitiveArray; use crate::arrays::slice::SliceReduce; -use crate::dtype::NativePType; -use crate::match_each_native_ptype; impl SliceReduce for Primitive { fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { - let result = match_each_native_ptype!(array.ptype(), |T| { - PrimitiveArray::from_buffer_handle( - array.buffer_handle().slice_typed::(range.clone()), - T::PTYPE, - array.validity()?.slice(range)?, - ) - .into_array() - }); - Ok(Some(result)) + let byte_width = array.ptype().byte_width(); + let byte_range = range.start * byte_width..range.end * byte_width; + let values = array.buffer_handle().slice(byte_range); + let validity = array.validity()?.slice(range)?; + + // SAFETY: slicing an existing PrimitiveArray on element boundaries preserves the buffer + // alignment, ptype, length, and validity invariants. + Ok(Some( + unsafe { PrimitiveArray::new_unchecked_from_handle(values, array.ptype(), validity) } + .into_array(), + )) } } From 33967286f5ce5f2e7de04cc4515c193efe4df716 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 9 Jun 2026 19:08:36 +0100 Subject: [PATCH 4/5] unchecked slices Signed-off-by: Adam Gutglick --- vortex-array/src/arrays/bool/compute/rules.rs | 18 ++++++++----- vortex-array/src/arrays/bool/compute/slice.rs | 18 ++++++++----- .../src/arrays/varbinview/compute/slice.rs | 26 +++++++++++-------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/vortex-array/src/arrays/bool/compute/rules.rs b/vortex-array/src/arrays/bool/compute/rules.rs index ed2e7a4dc28..9e8e3151c94 100644 --- a/vortex-array/src/arrays/bool/compute/rules.rs +++ b/vortex-array/src/arrays/bool/compute/rules.rs @@ -47,12 +47,16 @@ impl ArrayParentReduceRule for BoolMaskedValidityRule { // Merge the parent's validity mask into the child's validity // TODO(joe): make this lazy - Ok(Some( - BoolArray::new( - array.to_bit_buffer(), - array.validity()?.and(parent.validity()?)?, - ) - .into_array(), - )) + // Safety: + // we know all elements are valid, the AND operation will fail if mismatched. + unsafe { + Ok(Some( + BoolArray::new_unchecked( + array.to_bit_buffer(), + array.validity()?.and(parent.validity()?)?, + ) + .into_array(), + )) + } } } diff --git a/vortex-array/src/arrays/bool/compute/slice.rs b/vortex-array/src/arrays/bool/compute/slice.rs index 0ef11bb3085..869c7fe85d9 100644 --- a/vortex-array/src/arrays/bool/compute/slice.rs +++ b/vortex-array/src/arrays/bool/compute/slice.rs @@ -15,12 +15,16 @@ use crate::arrays::slice::SliceReduce; impl SliceReduce for Bool { fn slice(array: ArrayView<'_, Bool>, range: Range) -> VortexResult> { - Ok(Some( - BoolArray::new( - array.to_bit_buffer().slice(range.clone()), - array.validity()?.slice(range)?, - ) - .into_array(), - )) + // Safety: + // range is verified in the callers and is the same for both bits and validity. + unsafe { + Ok(Some( + BoolArray::new_unchecked( + array.to_bit_buffer().slice(range.clone()), + array.validity()?.slice(range)?, + ) + .into_array(), + )) + } } } diff --git a/vortex-array/src/arrays/varbinview/compute/slice.rs b/vortex-array/src/arrays/varbinview/compute/slice.rs index 19f5a0d41b0..0eaf7b46724 100644 --- a/vortex-array/src/arrays/varbinview/compute/slice.rs +++ b/vortex-array/src/arrays/varbinview/compute/slice.rs @@ -16,16 +16,20 @@ use crate::arrays::varbinview::BinaryView; impl SliceReduce for VarBinView { fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { - Ok(Some( - VarBinViewArray::new_handle( - array - .views_handle() - .slice_typed::(range.clone()), - Arc::clone(array.data_buffers()), - array.dtype().clone(), - array.validity()?.slice(range)?, - ) - .into_array(), - )) + // Safety: + // range is validated within bounds, and is shared between all children. + unsafe { + Ok(Some( + VarBinViewArray::new_handle_unchecked( + array + .views_handle() + .slice_typed::(range.clone()), + Arc::clone(array.data_buffers()), + array.dtype().clone(), + array.validity()?.slice(range)?, + ) + .into_array(), + )) + } } } From 915c696d83ede734cf6bde27653612f957f2e3ad Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 10 Jun 2026 11:44:43 +0100 Subject: [PATCH 5/5] Move things around Signed-off-by: Adam Gutglick --- vortex-array/src/arrays/bool/compute/rules.rs | 15 +++++------ vortex-array/src/arrays/bool/compute/slice.rs | 15 +++++------ vortex-array/src/arrays/dict/compute/slice.rs | 8 +++--- .../src/arrays/primitive/compute/slice.rs | 12 +++++---- .../src/arrays/varbinview/compute/slice.rs | 25 +++++++++---------- 5 files changed, 36 insertions(+), 39 deletions(-) diff --git a/vortex-array/src/arrays/bool/compute/rules.rs b/vortex-array/src/arrays/bool/compute/rules.rs index 9e8e3151c94..43f811785ea 100644 --- a/vortex-array/src/arrays/bool/compute/rules.rs +++ b/vortex-array/src/arrays/bool/compute/rules.rs @@ -45,18 +45,15 @@ impl ArrayParentReduceRule for BoolMaskedValidityRule { return Ok(None); } + let bit_buffer = array.to_bit_buffer(); // Merge the parent's validity mask into the child's validity // TODO(joe): make this lazy + let validity = array.validity()?.and(parent.validity()?)?; + // Safety: // we know all elements are valid, the AND operation will fail if mismatched. - unsafe { - Ok(Some( - BoolArray::new_unchecked( - array.to_bit_buffer(), - array.validity()?.and(parent.validity()?)?, - ) - .into_array(), - )) - } + let array = unsafe { BoolArray::new_unchecked(bit_buffer, validity).into_array() }; + + Ok(Some(array)) } } diff --git a/vortex-array/src/arrays/bool/compute/slice.rs b/vortex-array/src/arrays/bool/compute/slice.rs index 869c7fe85d9..0b38d2a6117 100644 --- a/vortex-array/src/arrays/bool/compute/slice.rs +++ b/vortex-array/src/arrays/bool/compute/slice.rs @@ -15,16 +15,13 @@ use crate::arrays::slice::SliceReduce; impl SliceReduce for Bool { fn slice(array: ArrayView<'_, Bool>, range: Range) -> VortexResult> { + let bit_buffer = array.to_bit_buffer().slice(range.clone()); + let validity = array.validity()?.slice(range)?; + // Safety: // range is verified in the callers and is the same for both bits and validity. - unsafe { - Ok(Some( - BoolArray::new_unchecked( - array.to_bit_buffer().slice(range.clone()), - array.validity()?.slice(range)?, - ) - .into_array(), - )) - } + let array = unsafe { BoolArray::new_unchecked(bit_buffer, validity).into_array() }; + + Ok(Some(array)) } } diff --git a/vortex-array/src/arrays/dict/compute/slice.rs b/vortex-array/src/arrays/dict/compute/slice.rs index f9a34b9c09f..1552c9c9fea 100644 --- a/vortex-array/src/arrays/dict/compute/slice.rs +++ b/vortex-array/src/arrays/dict/compute/slice.rs @@ -30,6 +30,7 @@ impl SliceReduce for Dict { let sliced_code = if let Some(codes) = array.codes().as_typed::() { let sliced_code = ::slice(codes, range)? .vortex_expect("Primitive SliceReduce should always return Some"); + // Because we specialize the primitive branch here, we have to make sure to handle the stat inheritance inherit_slice_stats(array.codes(), &sliced_code); sliced_code } else { @@ -41,9 +42,10 @@ impl SliceReduce for Dict { return slice_constant_code(array, code.scalar(), sliced_code.len()); } // SAFETY: slicing the codes preserves invariants. - Ok(Some( - unsafe { DictArray::new_unchecked(sliced_code, array.values().clone()) }.into_array(), - )) + let array = + unsafe { DictArray::new_unchecked(sliced_code, array.values().clone()).into_array() }; + + Ok(Some(array)) } } diff --git a/vortex-array/src/arrays/primitive/compute/slice.rs b/vortex-array/src/arrays/primitive/compute/slice.rs index d3a04e877e3..4775adba749 100644 --- a/vortex-array/src/arrays/primitive/compute/slice.rs +++ b/vortex-array/src/arrays/primitive/compute/slice.rs @@ -19,11 +19,13 @@ impl SliceReduce for Primitive { let values = array.buffer_handle().slice(byte_range); let validity = array.validity()?.slice(range)?; - // SAFETY: slicing an existing PrimitiveArray on element boundaries preserves the buffer + // SAFETY: + //slicing an existing PrimitiveArray on element boundaries preserves the buffer // alignment, ptype, length, and validity invariants. - Ok(Some( - unsafe { PrimitiveArray::new_unchecked_from_handle(values, array.ptype(), validity) } - .into_array(), - )) + let array = unsafe { + PrimitiveArray::new_unchecked_from_handle(values, array.ptype(), validity).into_array() + }; + + Ok(Some(array)) } } diff --git a/vortex-array/src/arrays/varbinview/compute/slice.rs b/vortex-array/src/arrays/varbinview/compute/slice.rs index 0eaf7b46724..e53da7a6eb3 100644 --- a/vortex-array/src/arrays/varbinview/compute/slice.rs +++ b/vortex-array/src/arrays/varbinview/compute/slice.rs @@ -16,20 +16,19 @@ use crate::arrays::varbinview::BinaryView; impl SliceReduce for VarBinView { fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { + let views = array + .views_handle() + .slice_typed::(range.clone()); + let data_buffers = Arc::clone(array.data_buffers()); + let dtype = array.dtype().clone(); + let validity = array.validity()?.slice(range)?; + // Safety: // range is validated within bounds, and is shared between all children. - unsafe { - Ok(Some( - VarBinViewArray::new_handle_unchecked( - array - .views_handle() - .slice_typed::(range.clone()), - Arc::clone(array.data_buffers()), - array.dtype().clone(), - array.validity()?.slice(range)?, - ) - .into_array(), - )) - } + let array = unsafe { + VarBinViewArray::new_handle_unchecked(views, data_buffers, dtype, validity).into_array() + }; + + Ok(Some(array)) } }