From 4906359e09410f5ecfb9ad293df906fc33d12b91 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 10 Jun 2026 12:41:26 +0000 Subject: [PATCH] Rename Validity::no_nulls to definitely_no_nulls and add execute_no_nulls The boolean returned by no_nulls() only proves the absence of nulls for the NonNullable and AllValid variants; a Validity::Array may still resolve to all-valid once executed. As validity arrays become lazy this distinction matters, so the cheap check is renamed to definitely_no_nulls() and documented as conservative. For call sites that need a definitive answer, add execute_no_nulls(), which executes the validity into a Mask via execute_mask(). The CUDA ListView export ensure-check is switched to the exact variant since a false answer there is a hard error rather than a slow path. https://claude.ai/code/session_01VPQ7dfZtijfrsjAipwXvEj Signed-off-by: Joe Isaacs --- encodings/parquet-variant/src/array.rs | 4 ++-- encodings/zstd/src/compute/cast.rs | 2 +- .../fns/all_non_distinct/filter.rs | 4 ++-- vortex-array/src/arrays/dict/compute/cast.rs | 2 +- .../arrays/filter/execute/take/fixed_width.rs | 4 ++-- vortex-array/src/validity.rs | 21 +++++++++++++++++-- vortex-cuda/src/arrow/list_view.rs | 2 +- 7 files changed, 28 insertions(+), 11 deletions(-) diff --git a/encodings/parquet-variant/src/array.rs b/encodings/parquet-variant/src/array.rs index d7004af4ef8..37247430c37 100644 --- a/encodings/parquet-variant/src/array.rs +++ b/encodings/parquet-variant/src/array.rs @@ -241,7 +241,7 @@ fn logical_shredded_from_parquet_field( let validity = field_struct.validity()?; // `unmasked_field_by_name_opt` intentionally ignores the parent struct validity. // Reapply it here so null wrapper rows become null typed/raw rows downstream. - let typed_value = if validity.no_nulls() { + let typed_value = if validity.definitely_no_nulls() { typed_value.clone() } else { typed_value @@ -251,7 +251,7 @@ fn logical_shredded_from_parquet_field( let value = field_struct .unmasked_field_by_name_opt("value") .map(|value| { - if validity.no_nulls() { + if validity.definitely_no_nulls() { Ok(value.clone()) } else { value.clone().mask(validity.to_array(value.len())) diff --git a/encodings/zstd/src/compute/cast.rs b/encodings/zstd/src/compute/cast.rs index e295e556566..6cbe4f37795 100644 --- a/encodings/zstd/src/compute/cast.rs +++ b/encodings/zstd/src/compute/cast.rs @@ -41,7 +41,7 @@ impl CastReduce for Zstd { child_to_validity(array.slots()[0].as_ref(), array.dtype().nullability()); let has_nulls = !unsliced_validity .slice(array.slice_start()..array.slice_stop())? - .no_nulls(); + .definitely_no_nulls(); // We don't attempt to handle casting when there are nulls. if has_nulls { diff --git a/vortex-array/src/aggregate_fn/fns/all_non_distinct/filter.rs b/vortex-array/src/aggregate_fn/fns/all_non_distinct/filter.rs index 9ad4ea2cc30..b9aacd56163 100644 --- a/vortex-array/src/aggregate_fn/fns/all_non_distinct/filter.rs +++ b/vortex-array/src/aggregate_fn/fns/all_non_distinct/filter.rs @@ -14,7 +14,7 @@ pub(super) fn shared_validity_mask( ) -> VortexResult> { let lhs_validity = lhs.validity()?; let rhs_validity = rhs.validity()?; - if lhs_validity.no_nulls() && rhs_validity.no_nulls() { + if lhs_validity.definitely_no_nulls() && rhs_validity.definitely_no_nulls() { return Ok(Some(Mask::new_true(lhs.len()))); } @@ -33,7 +33,7 @@ pub(super) fn filter_valid_rows_if_needed( ctx: &mut ExecutionCtx, ) -> VortexResult> { let validity = lhs.validity()?; - if validity.no_nulls() { + if validity.definitely_no_nulls() { return Ok(None); } diff --git a/vortex-array/src/arrays/dict/compute/cast.rs b/vortex-array/src/arrays/dict/compute/cast.rs index c269b214e19..490f1f31549 100644 --- a/vortex-array/src/arrays/dict/compute/cast.rs +++ b/vortex-array/src/arrays/dict/compute/cast.rs @@ -18,7 +18,7 @@ impl CastReduce for Dict { fn cast(array: ArrayView<'_, Dict>, dtype: &DType) -> VortexResult> { // Can have un-reference null values making the cast of values fail without a possible mask. // TODO(joe): optimize this, could look at accessible values and fill_null not those? - if !dtype.is_nullable() && !array.values().validity()?.no_nulls() { + if !dtype.is_nullable() && !array.values().validity()?.definitely_no_nulls() { return Ok(None); } // Cast the dictionary values to the target type diff --git a/vortex-array/src/arrays/filter/execute/take/fixed_width.rs b/vortex-array/src/arrays/filter/execute/take/fixed_width.rs index edafbeb9abd..3e508abd2c5 100644 --- a/vortex-array/src/arrays/filter/execute/take/fixed_width.rs +++ b/vortex-array/src/arrays/filter/execute/take/fixed_width.rs @@ -105,7 +105,7 @@ where take_filtered_values::(&child, filter, ranks, None)? }; - let output_validity = if child_validity.no_nulls() { + let output_validity = if child_validity.definitely_no_nulls() { ranks_validity.union_nullability(child_validity.nullability()) } else { let translated_indices = @@ -120,7 +120,7 @@ where AllOr::Some(buf) => { let taken = take_filtered_values(child.as_slice(), filter, ranks, Some(buf))?; - let output_validity = if child_validity.no_nulls() { + let output_validity = if child_validity.definitely_no_nulls() { ranks_validity.union_nullability(child_validity.nullability()) } else { let translated_indices = diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 31fe82cf357..f3a77b4759e 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -112,13 +112,30 @@ impl Validity { } } - /// Returns `true` if this validity guarantees no null values, i.e. it is either + /// Returns `true` if this validity *definitely* contains no null values, i.e. it is either /// [`Validity::NonNullable`] or [`Validity::AllValid`]. + /// + /// Returning `false` does not prove the presence of nulls: a [`Validity::Array`] may still + /// resolve to all-valid once executed. Callers must treat `false` as "unknown without + /// compute" and either fall back to a null-handling path or execute the validity. #[inline] - pub fn no_nulls(&self) -> bool { + pub fn definitely_no_nulls(&self) -> bool { matches!(self, Self::NonNullable | Self::AllValid) } + /// Returns whether this validity contains no null values, executing the validity array if + /// necessary. + /// + /// This is the exact counterpart to [`Self::definitely_no_nulls`]: use it when the caller + /// needs a definitive answer rather than a cheap, conservative one. + pub fn execute_no_nulls(&self, length: usize, ctx: &mut ExecutionCtx) -> VortexResult { + match self { + Self::NonNullable | Self::AllValid => Ok(true), + Self::AllInvalid => Ok(length == 0), + Self::Array(_) => Ok(self.execute_mask(length, ctx)?.all_true()), + } + } + /// The union nullability and validity. #[inline] pub fn union_nullability(self, nullability: Nullability) -> Self { diff --git a/vortex-cuda/src/arrow/list_view.rs b/vortex-cuda/src/arrow/list_view.rs index b14859dfbd5..b18aaf8c015 100644 --- a/vortex-cuda/src/arrow/list_view.rs +++ b/vortex-cuda/src/arrow/list_view.rs @@ -457,7 +457,7 @@ async fn rebuild_primitive_list_view_child( } = elements.into_data_parts(); vortex_ensure!( - validity.no_nulls(), + validity.execute_no_nulls(elements_len, ctx.execution_ctx())?, "cannot export non-contiguous device-resident ListViewArray with nullable {child_name}: GPU child validity rebuild is not implemented" );