diff --git a/vortex-array/src/arrays/scalar_fn/vtable/validity.rs b/vortex-array/src/arrays/scalar_fn/vtable/validity.rs index 2ac376155e3..fd5bad28054 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/validity.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/validity.rs @@ -2,17 +2,22 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_error::VortexResult; +use vortex_error::vortex_bail; use crate::ArrayRef; use crate::IntoArray; use crate::LEGACY_SESSION; use crate::VortexSessionExecute; +use crate::array::Array; use crate::array::ArrayView; use crate::array::ValidityVTable; +use crate::array::child_to_validity; +use crate::arrays::ConstantArray; use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::scalar_fn::vtable::ArrayExpr; use crate::arrays::scalar_fn::vtable::FakeEq; use crate::arrays::scalar_fn::vtable::ScalarFn; +use crate::dtype::Nullability; use crate::expr::Expression; use crate::expr::lit; use crate::scalar_fn::TypedScalarFnInstance; @@ -21,6 +26,36 @@ use crate::scalar_fn::fns::literal::Literal; use crate::scalar_fn::fns::root::Root; use crate::validity::Validity; +/// Convert an expression tree into a lazy array DAG without executing it. +/// +/// This assumes all leaf expressions are either ArrayExpr (wrapping actual arrays) or Literals. +fn expr_to_lazy_array(expr: &Expression, row_count: usize) -> VortexResult { + // Handle Root expression - this should not happen in validity expressions + if expr.is::() { + vortex_bail!("Root expression cannot be converted in validity context"); + } + + // Handle Literal expression - create a constant array + if expr.is::() { + let scalar = expr.as_::(); + return Ok(ConstantArray::new(scalar.clone(), row_count).into_array()); + } + + // Handle ArrayExpr leaves - unwrap the array they hold + if expr.is::() { + return Ok(expr.as_::().0.clone()); + } + + // Recursively convert child expressions into lazy input arrays + let children: Vec = expr + .children() + .iter() + .map(|child| expr_to_lazy_array(child, row_count)) + .collect::>()?; + + Ok(Array::::try_new(expr.scalar_fn().clone(), children, row_count)?.into_array()) +} + /// Execute an expression tree recursively. /// /// This assumes all leaf expressions are either ArrayExpr (wrapping actual arrays) or Literals. @@ -29,13 +64,13 @@ fn execute_expr(expr: &Expression, row_count: usize) -> VortexResult { // Handle Root expression - this should not happen in validity expressions if expr.is::() { - vortex_error::vortex_bail!("Root expression cannot be executed in validity context"); + vortex_bail!("Root expression cannot be executed in validity context"); } // Handle Literal expression - create a constant array if expr.is::() { let scalar = expr.as_::(); - return Ok(crate::arrays::ConstantArray::new(scalar.clone(), row_count).into_array()); + return Ok(ConstantArray::new(scalar.clone(), row_count).into_array()); } // Recursively execute child expressions to get input arrays @@ -66,9 +101,26 @@ impl ValidityVTable for ScalarFn { .collect::>()?; let expr = Expression::try_new(array.scalar_fn().clone(), inputs)?; - let validity_expr = array.scalar_fn().validity(&expr)?; - // Execute the validity expression. All leaves are ArrayExpr nodes. - Ok(Validity::Array(execute_expr(&validity_expr, array.len())?)) + match array.scalar_fn().validity_opt(&expr)? { + Some(validity_expr) => { + // The function defines its validity as an expression over its inputs, so we can + // represent it as a lazy array DAG without executing anything. If the expression + // is already a constant it is folded back into AllValid/AllInvalid. + let validity_array = expr_to_lazy_array(&validity_expr, array.len())?; + Ok(child_to_validity( + Some(&validity_array), + Nullability::Nullable, + )) + } + None => { + // The function's validity can only be determined by executing the function + // itself (e.g. Kleene logic and/or). Representing that lazily would create a + // self-referential array (is_not_null over this very expression), so execute it + // eagerly instead. + let validity_expr = array.scalar_fn().validity(&expr)?; + Ok(Validity::Array(execute_expr(&validity_expr, array.len())?)) + } + } } } diff --git a/vortex-array/src/scalar_fn/erased.rs b/vortex-array/src/scalar_fn/erased.rs index 10e82d25455..5c95a4fa225 100644 --- a/vortex-array/src/scalar_fn/erased.rs +++ b/vortex-array/src/scalar_fn/erased.rs @@ -134,12 +134,22 @@ impl ScalarFnRef { /// Transforms the expression into one representing the validity of this expression. pub fn validity(&self, expr: &Expression) -> VortexResult { - Ok(self.0.validity(expr)?.unwrap_or_else(|| { + Ok(self.validity_opt(expr)?.unwrap_or_else(|| { // TODO(ngates): make validity a mandatory method on VTable to avoid this fallback. IsNotNull.new_expr(EmptyOptions, [expr.clone()]) })) } + /// Transforms the expression into one representing the validity of this expression, + /// returning `None` if the function does not define a validity expression. + /// + /// When `None` is returned, the validity can only be determined by executing the + /// expression itself (e.g. Kleene logic `and`/`or`), and [`Self::validity`] falls back to + /// `is_not_null` over the expression. + pub fn validity_opt(&self, expr: &Expression) -> VortexResult> { + self.0.validity(expr) + } + /// Execute the expression given the input arguments. pub fn execute( &self,