diff --git a/vortex-array/src/arrays/listview/rebuild.rs b/vortex-array/src/arrays/listview/rebuild.rs index 63ed33c6498..7ef752e4dcb 100644 --- a/vortex-array/src/arrays/listview/rebuild.rs +++ b/vortex-array/src/arrays/listview/rebuild.rs @@ -210,10 +210,14 @@ impl ListViewArray { let mut new_sizes = BufferMut::::with_capacity(len); let mut take_indices = BufferMut::::with_capacity(self.elements().len()); - let validity = self.validity()?; + // Resolve validity to a mask once instead of probing it per row: `execute_is_valid` + // executes a scalar on every call for array-backed validity, which is O(len) work repeated + // `len` times. + let validity = self.validity()?.execute_mask(len, ctx)?; + let mut n_elements = NewOffset::zero(); for index in 0..len { - if !validity.execute_is_valid(index, ctx)? { + if !validity.value(index) { new_offsets.push(n_elements); new_sizes.push(S::zero()); continue; @@ -292,10 +296,12 @@ impl ListViewArray { let mut new_elements_builder = builder_with_capacity(element_dtype.as_ref(), self.elements().len()); - let validity = self.validity()?; + // Resolve validity to a mask once instead of probing it per row (see `rebuild_with_take`). + let validity = self.validity()?.execute_mask(len, ctx)?; + let mut n_elements = NewOffset::zero(); for index in 0..len { - if !validity.execute_is_valid(index, ctx)? { + if !validity.value(index) { // For NULL lists, place them after the previous item's data to maintain the // no-overlap invariant for zero-copy to `ListArray` arrays. new_offsets.push(n_elements); diff --git a/vortex-array/src/arrays/varbin/compute/filter.rs b/vortex-array/src/arrays/varbin/compute/filter.rs index 69e0125037c..a0c78a64363 100644 --- a/vortex-array/src/arrays/varbin/compute/filter.rs +++ b/vortex-array/src/arrays/varbin/compute/filter.rs @@ -24,7 +24,6 @@ use crate::arrays::varbin::builder::VarBinBuilder; use crate::dtype::DType; use crate::dtype::IntegerPType; use crate::match_each_integer_ptype; -use crate::validity::Validity; impl FilterKernel for VarBin { fn filter( @@ -170,9 +169,11 @@ fn filter_select_var_bin_by_index( offsets.as_slice::(), values.bytes().as_slice(), mask_indices, - values.validity()?, + values + .varbin_validity() + .execute_mask(values.as_ref().len(), ctx) + .vortex_expect("Failed to compute validity mask"), selection_count, - ctx, ) }) } @@ -182,25 +183,37 @@ fn filter_select_var_bin_by_index_primitive_offset( offsets: &[O], data: &[u8], mask_indices: &[usize], - // TODO(ngates): pass LogicalValidity instead - validity: Validity, + mask: Mask, selection_count: usize, - ctx: &mut ExecutionCtx, ) -> VortexResult { + let value_at = |idx: usize| -> VortexResult<&[u8]> { + let start = offsets[idx] + .to_usize() + .ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx]))?; + let end = offsets[idx + 1].to_usize().ok_or_else(|| { + vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1]) + })?; + Ok(&data[start..end]) + }; + let mut builder = VarBinBuilder::::with_capacity(selection_count); - for idx in mask_indices.iter().copied() { - if validity.execute_is_valid(idx, ctx)? { - let (start, end) = ( - offsets[idx].to_usize().ok_or_else(|| { - vortex_err!("Failed to convert offset to usize: {}", offsets[idx]) - })?, - offsets[idx + 1].to_usize().ok_or_else(|| { - vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1]) - })?, - ); - builder.append_value(&data[start..end]) - } else { - builder.append_null() + match mask.bit_buffer() { + AllOr::All => { + for idx in mask_indices.iter().copied() { + builder.append_value(value_at(idx)?) + } + } + AllOr::None => { + builder.append_n_nulls(selection_count); + } + AllOr::Some(validity) => { + for idx in mask_indices.iter().copied() { + if validity.value(idx) { + builder.append_value(value_at(idx)?) + } else { + builder.append_null() + } + } } } Ok(builder.finish(dtype)) diff --git a/vortex-array/src/display/mod.rs b/vortex-array/src/display/mod.rs index 3d121cf80b9..0134a28558c 100644 --- a/vortex-array/src/display/mod.rs +++ b/vortex-array/src/display/mod.rs @@ -579,19 +579,22 @@ impl ArrayRef { } #[cfg(feature = "table-display")] DisplayOptions::TableDisplay => { + use vortex_mask::Mask; + + use crate::arrays::StructArray; use crate::arrays::struct_::StructArrayExt; - #[expect(deprecated)] - use crate::canonical::ToCanonical as _; use crate::dtype::DType; let mut builder = tabled::builder::Builder::default(); + // Reuse a single execution context across all per-row accesses below. + let mut ctx = LEGACY_SESSION.create_execution_ctx(); // Special logic for struct arrays. let DType::Struct(sf, _) = self.dtype() else { // For non-struct arrays, simply display a single column table without header. for row_idx in 0..self.len() { let value = self - .execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_scalar(row_idx, &mut ctx) .map_or_else(|e| format!(""), |s| s.to_string()); builder.push_record([value]); } @@ -602,22 +605,27 @@ impl ArrayRef { return write!(f, "{table}"); }; - #[expect(deprecated)] - let struct_ = self.to_struct(); + let struct_ = match self.clone().execute::(&mut ctx) { + Ok(struct_) => struct_, + Err(e) => return write!(f, ""), + }; builder.push_record(sf.names().iter().map(|name| name.to_string())); + // Resolve validity to a mask once instead of probing it per row. + let validity = self + .validity() + .and_then(|v| v.execute_mask(self.len(), &mut ctx)) + .unwrap_or_else(|_| Mask::new_false(self.len())); + for row_idx in 0..self.len() { - if !self - .is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx()) - .unwrap_or(false) - { + if !validity.value(row_idx) { let null_row = vec!["null".to_string(); sf.names().len()]; builder.push_record(null_row); } else { - let mut row = Vec::new(); + let mut row = Vec::with_capacity(struct_.struct_fields().nfields()); for field_array in StructArrayExt::iter_unmasked_fields(&struct_) { let value = field_array - .execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx()) + .execute_scalar(row_idx, &mut ctx) .map_or_else(|e| format!(""), |s| s.to_string()); row.push(value); } @@ -634,10 +642,7 @@ impl ArrayRef { } for row_idx in 0..self.len() { - if !self - .is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx()) - .unwrap_or(false) - { + if !validity.value(row_idx) { table.modify( (1 + row_idx, 0), tabled::settings::Span::column(sf.names().len() as isize), diff --git a/vortex-tensor/src/scalar_fns/l2_denorm.rs b/vortex-tensor/src/scalar_fns/l2_denorm.rs index 94e397a276d..a814af33f33 100644 --- a/vortex-tensor/src/scalar_fns/l2_denorm.rs +++ b/vortex-tensor/src/scalar_fns/l2_denorm.rs @@ -441,6 +441,10 @@ pub fn normalize_as_l2_denorm( let normalized_dtype = input.dtype().as_nonnullable(); let flat = extract_flat_elements(input.storage_array(), tensor_flat_size, ctx)?; + // Resolve validity to a mask once rather than probing it per row (each `Validity::is_valid` + // executes a scalar for array-backed validity). + let norms_valid = norms_validity.execute_mask(row_count, ctx)?; + // Normalize all of the vectors. let normalized = match_each_float_ptype!(flat.ptype(), |T| { let norm_values = primitive_norms.as_slice::(); @@ -448,7 +452,7 @@ pub fn normalize_as_l2_denorm( let total_elements = row_count * tensor_flat_size; let mut elements = BufferMut::::with_capacity(total_elements); for i in 0..row_count { - let is_valid = norms_validity.execute_is_valid(i, ctx)?; + let is_valid = norms_valid.value(i); let norm = norm_values[i]; // SAFETY: We allocated `row_count * tensor_flat_size` capacity and push exactly @@ -637,12 +641,14 @@ pub fn validate_l2_normalized_rows_against_norms( Some(norms) => normalized_validity.and(norms.validity()?)?, None => normalized_validity, }; + // Resolve validity to a mask once rather than probing it per row. + let combined_valid = combined_validity.execute_mask(row_count, ctx)?; match_each_float_ptype!(element_ptype, |T| { let stored_norms = norms.as_ref().map(|norms| norms.as_slice::()); for i in 0..row_count { - if !combined_validity.execute_is_valid(i, ctx)? { + if !combined_valid.value(i) { continue; } diff --git a/vortex-tui/src/browse/ui/layouts.rs b/vortex-tui/src/browse/ui/layouts.rs index c3245446197..14c9531645c 100644 --- a/vortex-tui/src/browse/ui/layouts.rs +++ b/vortex-tui/src/browse/ui/layouts.rs @@ -27,6 +27,8 @@ use ratatui::widgets::Table; use ratatui::widgets::Widget; use ratatui::widgets::Wrap; use vortex::array::ArrayRef; +use vortex::array::Canonical; +use vortex::array::IntoArray; use vortex::array::VortexSessionExecute; use vortex::array::arrays::StructArray; use vortex::array::arrays::struct_::StructArrayExt; @@ -161,7 +163,19 @@ fn render_array(app: &AppState, area: Rect, buf: &mut Buffer, is_stats_table: bo assert_eq!(app.cursor.dtype(), array.dtype()); - let field_arrays: Vec = struct_array.unmasked_fields().to_vec(); + // Canonicalize each field once so the per-row `execute_scalar` calls below do not + // re-decode the field encodings for every row. + let field_arrays: Vec = struct_array + .unmasked_fields() + .iter() + .map(|field| { + field + .clone() + .execute::(&mut ctx) + .vortex_expect("failed to canonicalize field array") + .into_array() + }) + .collect(); // TODO: trim the number of displayed rows and allow paging through column stats. let mut rows = Vec::with_capacity(array.len());