Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions vortex-array/src/arrays/listview/rebuild.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,14 @@ impl ListViewArray {
let mut new_sizes = BufferMut::<S>::with_capacity(len);
let mut take_indices = BufferMut::<u64>::with_capacity(self.elements().len());

let validity = self.validity()?;
// Resolve validity to a mask once instead of probing it per row: `execute_is_valid`
// executes a scalar on every call for array-backed validity, which is O(len) work repeated
// `len` times.
let validity = self.validity()?.execute_mask(len, ctx)?;

let mut n_elements = NewOffset::zero();
for index in 0..len {
if !validity.execute_is_valid(index, ctx)? {
if !validity.value(index) {
new_offsets.push(n_elements);
new_sizes.push(S::zero());
continue;
Expand Down Expand Up @@ -292,10 +296,12 @@ impl ListViewArray {
let mut new_elements_builder =
builder_with_capacity(element_dtype.as_ref(), self.elements().len());

let validity = self.validity()?;
// Resolve validity to a mask once instead of probing it per row (see `rebuild_with_take`).
let validity = self.validity()?.execute_mask(len, ctx)?;

let mut n_elements = NewOffset::zero();
for index in 0..len {
if !validity.execute_is_valid(index, ctx)? {
if !validity.value(index) {
// For NULL lists, place them after the previous item's data to maintain the
// no-overlap invariant for zero-copy to `ListArray` arrays.
new_offsets.push(n_elements);
Expand Down
51 changes: 32 additions & 19 deletions vortex-array/src/arrays/varbin/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use crate::arrays::varbin::builder::VarBinBuilder;
use crate::dtype::DType;
use crate::dtype::IntegerPType;
use crate::match_each_integer_ptype;
use crate::validity::Validity;

impl FilterKernel for VarBin {
fn filter(
Expand Down Expand Up @@ -170,9 +169,11 @@ fn filter_select_var_bin_by_index(
offsets.as_slice::<O>(),
values.bytes().as_slice(),
mask_indices,
values.validity()?,
values
.varbin_validity()
.execute_mask(values.as_ref().len(), ctx)
.vortex_expect("Failed to compute validity mask"),
selection_count,
ctx,
)
})
}
Expand All @@ -182,25 +183,37 @@ fn filter_select_var_bin_by_index_primitive_offset<O: IntegerPType>(
offsets: &[O],
data: &[u8],
mask_indices: &[usize],
// TODO(ngates): pass LogicalValidity instead
validity: Validity,
mask: Mask,
selection_count: usize,
ctx: &mut ExecutionCtx,
) -> VortexResult<VarBinArray> {
let value_at = |idx: usize| -> VortexResult<&[u8]> {
let start = offsets[idx]
.to_usize()
.ok_or_else(|| vortex_err!("Failed to convert offset to usize: {}", offsets[idx]))?;
let end = offsets[idx + 1].to_usize().ok_or_else(|| {
vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1])
})?;
Ok(&data[start..end])
};

let mut builder = VarBinBuilder::<O>::with_capacity(selection_count);
for idx in mask_indices.iter().copied() {
if validity.execute_is_valid(idx, ctx)? {
let (start, end) = (
offsets[idx].to_usize().ok_or_else(|| {
vortex_err!("Failed to convert offset to usize: {}", offsets[idx])
})?,
offsets[idx + 1].to_usize().ok_or_else(|| {
vortex_err!("Failed to convert offset to usize: {}", offsets[idx + 1])
})?,
);
builder.append_value(&data[start..end])
} else {
builder.append_null()
match mask.bit_buffer() {
AllOr::All => {
for idx in mask_indices.iter().copied() {
builder.append_value(value_at(idx)?)
}
}
AllOr::None => {
builder.append_n_nulls(selection_count);
}
AllOr::Some(validity) => {
for idx in mask_indices.iter().copied() {
if validity.value(idx) {
builder.append_value(value_at(idx)?)
} else {
builder.append_null()
}
}
}
}
Ok(builder.finish(dtype))
Expand Down
35 changes: 20 additions & 15 deletions vortex-array/src/display/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,19 +579,22 @@ impl ArrayRef {
}
#[cfg(feature = "table-display")]
DisplayOptions::TableDisplay => {
use vortex_mask::Mask;

use crate::arrays::StructArray;
use crate::arrays::struct_::StructArrayExt;
#[expect(deprecated)]
use crate::canonical::ToCanonical as _;
use crate::dtype::DType;

let mut builder = tabled::builder::Builder::default();
// Reuse a single execution context across all per-row accesses below.
let mut ctx = LEGACY_SESSION.create_execution_ctx();

// Special logic for struct arrays.
let DType::Struct(sf, _) = self.dtype() else {
// For non-struct arrays, simply display a single column table without header.
for row_idx in 0..self.len() {
let value = self
.execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
.execute_scalar(row_idx, &mut ctx)
.map_or_else(|e| format!("<error: {e}>"), |s| s.to_string());
builder.push_record([value]);
}
Expand All @@ -602,22 +605,27 @@ impl ArrayRef {
return write!(f, "{table}");
};

#[expect(deprecated)]
let struct_ = self.to_struct();
let struct_ = match self.clone().execute::<StructArray>(&mut ctx) {
Ok(struct_) => struct_,
Err(e) => return write!(f, "<error: {e}>"),
};
builder.push_record(sf.names().iter().map(|name| name.to_string()));

// Resolve validity to a mask once instead of probing it per row.
let validity = self
.validity()
.and_then(|v| v.execute_mask(self.len(), &mut ctx))
.unwrap_or_else(|_| Mask::new_false(self.len()));

for row_idx in 0..self.len() {
if !self
.is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
.unwrap_or(false)
{
if !validity.value(row_idx) {
let null_row = vec!["null".to_string(); sf.names().len()];
builder.push_record(null_row);
} else {
let mut row = Vec::new();
let mut row = Vec::with_capacity(struct_.struct_fields().nfields());
for field_array in StructArrayExt::iter_unmasked_fields(&struct_) {
let value = field_array
.execute_scalar(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
.execute_scalar(row_idx, &mut ctx)
.map_or_else(|e| format!("<error: {e}>"), |s| s.to_string());
row.push(value);
}
Expand All @@ -634,10 +642,7 @@ impl ArrayRef {
}

for row_idx in 0..self.len() {
if !self
.is_valid(row_idx, &mut LEGACY_SESSION.create_execution_ctx())
.unwrap_or(false)
{
if !validity.value(row_idx) {
table.modify(
(1 + row_idx, 0),
tabled::settings::Span::column(sf.names().len() as isize),
Expand Down
10 changes: 8 additions & 2 deletions vortex-tensor/src/scalar_fns/l2_denorm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -441,14 +441,18 @@ pub fn normalize_as_l2_denorm(
let normalized_dtype = input.dtype().as_nonnullable();
let flat = extract_flat_elements(input.storage_array(), tensor_flat_size, ctx)?;

// Resolve validity to a mask once rather than probing it per row (each `Validity::is_valid`
// executes a scalar for array-backed validity).
let norms_valid = norms_validity.execute_mask(row_count, ctx)?;

// Normalize all of the vectors.
let normalized = match_each_float_ptype!(flat.ptype(), |T| {
let norm_values = primitive_norms.as_slice::<T>();

let total_elements = row_count * tensor_flat_size;
let mut elements = BufferMut::<T>::with_capacity(total_elements);
for i in 0..row_count {
let is_valid = norms_validity.execute_is_valid(i, ctx)?;
let is_valid = norms_valid.value(i);
let norm = norm_values[i];

// SAFETY: We allocated `row_count * tensor_flat_size` capacity and push exactly
Expand Down Expand Up @@ -637,12 +641,14 @@ pub fn validate_l2_normalized_rows_against_norms(
Some(norms) => normalized_validity.and(norms.validity()?)?,
None => normalized_validity,
};
// Resolve validity to a mask once rather than probing it per row.
let combined_valid = combined_validity.execute_mask(row_count, ctx)?;

match_each_float_ptype!(element_ptype, |T| {
let stored_norms = norms.as_ref().map(|norms| norms.as_slice::<T>());

for i in 0..row_count {
if !combined_validity.execute_is_valid(i, ctx)? {
if !combined_valid.value(i) {
continue;
}

Expand Down
16 changes: 15 additions & 1 deletion vortex-tui/src/browse/ui/layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ use ratatui::widgets::Table;
use ratatui::widgets::Widget;
use ratatui::widgets::Wrap;
use vortex::array::ArrayRef;
use vortex::array::Canonical;
use vortex::array::IntoArray;
use vortex::array::VortexSessionExecute;
use vortex::array::arrays::StructArray;
use vortex::array::arrays::struct_::StructArrayExt;
Expand Down Expand Up @@ -161,7 +163,19 @@ fn render_array(app: &AppState, area: Rect, buf: &mut Buffer, is_stats_table: bo

assert_eq!(app.cursor.dtype(), array.dtype());

let field_arrays: Vec<ArrayRef> = struct_array.unmasked_fields().to_vec();
// Canonicalize each field once so the per-row `execute_scalar` calls below do not
// re-decode the field encodings for every row.
let field_arrays: Vec<ArrayRef> = struct_array
.unmasked_fields()
.iter()
.map(|field| {
field
.clone()
.execute::<Canonical>(&mut ctx)
.vortex_expect("failed to canonicalize field array")
.into_array()
})
.collect();

// TODO: trim the number of displayed rows and allow paging through column stats.
let mut rows = Vec::with_capacity(array.len());
Expand Down
Loading