Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 57 additions & 5 deletions vortex-array/src/arrays/scalar_fn/vtable/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,22 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_error::VortexResult;
use vortex_error::vortex_bail;

use crate::ArrayRef;
use crate::IntoArray;
use crate::LEGACY_SESSION;
use crate::VortexSessionExecute;
use crate::array::Array;
use crate::array::ArrayView;
use crate::array::ValidityVTable;
use crate::array::child_to_validity;
use crate::arrays::ConstantArray;
use crate::arrays::scalar_fn::ScalarFnArrayExt;
use crate::arrays::scalar_fn::vtable::ArrayExpr;
use crate::arrays::scalar_fn::vtable::FakeEq;
use crate::arrays::scalar_fn::vtable::ScalarFn;
use crate::dtype::Nullability;
use crate::expr::Expression;
use crate::expr::lit;
use crate::scalar_fn::TypedScalarFnInstance;
Expand All @@ -21,6 +26,36 @@ use crate::scalar_fn::fns::literal::Literal;
use crate::scalar_fn::fns::root::Root;
use crate::validity::Validity;

/// Convert an expression tree into a lazy array DAG without executing it.
///
/// This assumes all leaf expressions are either ArrayExpr (wrapping actual arrays) or Literals.
fn expr_to_lazy_array(expr: &Expression, row_count: usize) -> VortexResult<ArrayRef> {
// Handle Root expression - this should not happen in validity expressions
if expr.is::<Root>() {
vortex_bail!("Root expression cannot be converted in validity context");
}

// Handle Literal expression - create a constant array
if expr.is::<Literal>() {
let scalar = expr.as_::<Literal>();
return Ok(ConstantArray::new(scalar.clone(), row_count).into_array());
}

// Handle ArrayExpr leaves - unwrap the array they hold
if expr.is::<ArrayExpr>() {
return Ok(expr.as_::<ArrayExpr>().0.clone());
}

// Recursively convert child expressions into lazy input arrays
let children: Vec<ArrayRef> = expr
.children()
.iter()
.map(|child| expr_to_lazy_array(child, row_count))
.collect::<VortexResult<_>>()?;

Ok(Array::<ScalarFn>::try_new(expr.scalar_fn().clone(), children, row_count)?.into_array())
}

/// Execute an expression tree recursively.
///
/// This assumes all leaf expressions are either ArrayExpr (wrapping actual arrays) or Literals.
Expand All @@ -29,13 +64,13 @@ fn execute_expr(expr: &Expression, row_count: usize) -> VortexResult<ArrayRef> {

// Handle Root expression - this should not happen in validity expressions
if expr.is::<Root>() {
vortex_error::vortex_bail!("Root expression cannot be executed in validity context");
vortex_bail!("Root expression cannot be executed in validity context");
}

// Handle Literal expression - create a constant array
if expr.is::<Literal>() {
let scalar = expr.as_::<Literal>();
return Ok(crate::arrays::ConstantArray::new(scalar.clone(), row_count).into_array());
return Ok(ConstantArray::new(scalar.clone(), row_count).into_array());
}

// Recursively execute child expressions to get input arrays
Expand Down Expand Up @@ -66,9 +101,26 @@ impl ValidityVTable<ScalarFn> for ScalarFn {
.collect::<VortexResult<_>>()?;

let expr = Expression::try_new(array.scalar_fn().clone(), inputs)?;
let validity_expr = array.scalar_fn().validity(&expr)?;

// Execute the validity expression. All leaves are ArrayExpr nodes.
Ok(Validity::Array(execute_expr(&validity_expr, array.len())?))
match array.scalar_fn().validity_opt(&expr)? {
Some(validity_expr) => {
// The function defines its validity as an expression over its inputs, so we can
// represent it as a lazy array DAG without executing anything. If the expression
// is already a constant it is folded back into AllValid/AllInvalid.
let validity_array = expr_to_lazy_array(&validity_expr, array.len())?;
Ok(child_to_validity(
Some(&validity_array),
Nullability::Nullable,
))
}
None => {
// The function's validity can only be determined by executing the function
// itself (e.g. Kleene logic and/or). Representing that lazily would create a
// self-referential array (is_not_null over this very expression), so execute it
// eagerly instead.
let validity_expr = array.scalar_fn().validity(&expr)?;
Ok(Validity::Array(execute_expr(&validity_expr, array.len())?))
}
}
}
}
12 changes: 11 additions & 1 deletion vortex-array/src/scalar_fn/erased.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,22 @@ impl ScalarFnRef {

/// Transforms the expression into one representing the validity of this expression.
pub fn validity(&self, expr: &Expression) -> VortexResult<Expression> {
Ok(self.0.validity(expr)?.unwrap_or_else(|| {
Ok(self.validity_opt(expr)?.unwrap_or_else(|| {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you want to remove the TODO?

// TODO(ngates): make validity a mandatory method on VTable to avoid this fallback.
IsNotNull.new_expr(EmptyOptions, [expr.clone()])
}))
}

/// Transforms the expression into one representing the validity of this expression,
/// returning `None` if the function does not define a validity expression.
///
/// When `None` is returned, the validity can only be determined by executing the
/// expression itself (e.g. Kleene logic `and`/`or`), and [`Self::validity`] falls back to
/// `is_not_null` over the expression.
pub fn validity_opt(&self, expr: &Expression) -> VortexResult<Option<Expression>> {
self.0.validity(expr)
}

/// Execute the expression given the input arguments.
pub fn execute(
&self,
Expand Down
Loading