-
Notifications
You must be signed in to change notification settings - Fork 169
Optimize slice for dict and minor changes in other arrays #8321
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
|
|
||
| use std::ops::Range; | ||
|
|
||
| use vortex_error::VortexExpect; | ||
| use vortex_error::VortexResult; | ||
|
|
||
| use crate::ArrayRef; | ||
|
|
@@ -12,35 +13,74 @@ use crate::arrays::Constant; | |
| use crate::arrays::ConstantArray; | ||
| use crate::arrays::Dict; | ||
| use crate::arrays::DictArray; | ||
| use crate::arrays::Primitive; | ||
| use crate::arrays::dict::DictArraySlotsExt; | ||
| use crate::arrays::slice::SliceReduce; | ||
| use crate::expr::stats::Precision; | ||
| use crate::expr::stats::Stat; | ||
| use crate::scalar::Scalar; | ||
| use crate::scalar::ScalarValue; | ||
|
|
||
| impl SliceReduce for Dict { | ||
| fn slice(array: ArrayView<'_, Self>, range: Range<usize>) -> VortexResult<Option<ArrayRef>> { | ||
| let sliced_code = array.codes().slice(range)?; | ||
| if let Some(code) = array.codes().as_opt::<Constant>() { | ||
| return slice_constant_code(array, code.scalar(), range.len()); | ||
| } | ||
|
|
||
| let sliced_code = if let Some(codes) = array.codes().as_typed::<Primitive>() { | ||
| let sliced_code = <Primitive as SliceReduce>::slice(codes, range)? | ||
| .vortex_expect("Primitive SliceReduce should always return Some"); | ||
| // Because we specialize the primitive branch here, we have to make sure to handle the stat inheritance | ||
| inherit_slice_stats(array.codes(), &sliced_code); | ||
| sliced_code | ||
| } else { | ||
| array.codes().slice(range)? | ||
| }; | ||
|
|
||
| // TODO(joe): if the range is size 1 replace with a constant array | ||
| if let Some(code) = sliced_code.as_opt::<Constant>() { | ||
| let code = code.scalar().as_primitive().as_::<usize>(); | ||
| return if let Some(code) = code { | ||
| let values = array.values().slice(code..code + 1)?; | ||
| Ok(Some( | ||
| DictArray::new( | ||
| ConstantArray::new(0u8, sliced_code.len()).into_array(), | ||
| values, | ||
| ) | ||
| .into_array(), | ||
| )) | ||
| } else { | ||
| Ok(Some( | ||
| ConstantArray::new(Scalar::null(array.dtype().clone()), sliced_code.len()) | ||
| .into_array(), | ||
| )) | ||
| }; | ||
| return slice_constant_code(array, code.scalar(), sliced_code.len()); | ||
|
Comment on lines
41
to
+42
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question (not review): do we have the optimization where if we slice something it can BECOME a constant array? The only cases I can think of this happening would be in runend where we happen to slice into a run
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so, but maybe I was just looking in the wrong place. |
||
| } | ||
| // SAFETY: slicing the codes preserves invariants. | ||
| let array = | ||
| unsafe { DictArray::new_unchecked(sliced_code, array.values().clone()).into_array() }; | ||
|
|
||
| Ok(Some(array)) | ||
| } | ||
| } | ||
|
|
||
| fn inherit_slice_stats(source: &ArrayRef, sliced: &ArrayRef) { | ||
| source.statistics().with_iter(|iter| { | ||
| sliced | ||
| .statistics() | ||
| .inherit(iter.filter(|(stat, value)| is_inheritable_true_slice_stat(*stat, value))); | ||
| }); | ||
| } | ||
|
|
||
| fn is_inheritable_true_slice_stat(stat: Stat, value: &Precision<ScalarValue>) -> bool { | ||
| matches!( | ||
| stat, | ||
| Stat::IsConstant | Stat::IsSorted | Stat::IsStrictSorted | ||
| ) && value | ||
| .as_ref() | ||
| .as_exact() | ||
| .is_some_and(|value| matches!(value, ScalarValue::Bool(true))) | ||
| } | ||
|
AdamGS marked this conversation as resolved.
|
||
|
|
||
| fn slice_constant_code( | ||
| array: ArrayView<'_, Dict>, | ||
| code: &Scalar, | ||
| len: usize, | ||
| ) -> VortexResult<Option<ArrayRef>> { | ||
| let code = code.as_primitive().as_::<usize>(); | ||
| if let Some(code) = code { | ||
| let values = array.values().slice(code..code + 1)?; | ||
| Ok(Some( | ||
| DictArray::new(ConstantArray::new(0u8, len).into_array(), values).into_array(), | ||
| )) | ||
| } else { | ||
|
AdamGS marked this conversation as resolved.
|
||
| Ok(Some( | ||
| unsafe { DictArray::new_unchecked(sliced_code, array.values().clone()) }.into_array(), | ||
| ConstantArray::new(Scalar::null(array.dtype().clone()), len).into_array(), | ||
| )) | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nice