From 7532b8b3aa564c6e8f8e102cc43a9087fab9b92c Mon Sep 17 00:00:00 2001 From: Nemo Yu Date: Tue, 9 Jun 2026 16:10:01 -0400 Subject: [PATCH 1/5] feat(vortex-geo): native Point type with planar ST_Distance Adds a GeoArrow-style `Point` extension type (Struct, dimension-ready) and the planar `GeoDistance` scalar function between two point columns. Signed-off-by: Nemo Yu --- vortex-geo/src/extension/coordinate.rs | 188 +++++++++++++++++++++++++ vortex-geo/src/extension/mod.rs | 5 + vortex-geo/src/extension/point.rs | 154 ++++++++++++++++++++ vortex-geo/src/lib.rs | 10 +- vortex-geo/src/scalar_fn/distance.rs | 167 ++++++++++++++++++++++ vortex-geo/src/scalar_fn/mod.rs | 9 ++ 6 files changed, 532 insertions(+), 1 deletion(-) create mode 100644 vortex-geo/src/extension/coordinate.rs create mode 100644 vortex-geo/src/extension/point.rs create mode 100644 vortex-geo/src/scalar_fn/distance.rs create mode 100644 vortex-geo/src/scalar_fn/mod.rs diff --git a/vortex-geo/src/extension/coordinate.rs b/vortex-geo/src/extension/coordinate.rs new file mode 100644 index 00000000000..3599c003e71 --- /dev/null +++ b/vortex-geo/src/extension/coordinate.rs @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! The coordinate building block shared by geometry extension types: the `Struct` +//! storage, its [`Dimension`], the decoded [`Coordinate`] value, and the readers that decode it. +//! `z`/`m` are optional, so all four GeoArrow dimensions share one value type — no third-party deps. + +use std::fmt::Display; +use std::fmt::Formatter; + +use vortex_array::ArrayRef; +use vortex_array::Canonical; +use vortex_array::ExecutionCtx; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::extension::ExtensionArrayExt; +use vortex_array::arrays::struct_::StructArrayExt; +use vortex_array::dtype::DType; +use vortex_array::dtype::FieldNames; +use vortex_array::dtype::Nullability; +use vortex_array::dtype::PType; +use vortex_array::dtype::StructFields; +use vortex_array::scalar::Scalar; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_err; + +/// Coordinate dimensions, matching GeoArrow. Field order is fixed: x, y, then z before m. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Dimension { + /// 2D: `x`, `y`. + Xy, + /// 3D with elevation: `x`, `y`, `z`. + Xyz, + /// 3D with a measure: `x`, `y`, `m`. + Xym, + /// 4D: `x`, `y`, `z`, `m`. + Xyzm, +} + +impl Dimension { + /// The coordinate struct field names for this dimension, in GeoArrow order. + pub fn field_names(self) -> &'static [&'static str] { + match self { + Dimension::Xy => &["x", "y"], + Dimension::Xyz => &["x", "y", "z"], + Dimension::Xym => &["x", "y", "m"], + Dimension::Xyzm => &["x", "y", "z", "m"], + } + } + + /// Recover the dimension from a coordinate's field names, in GeoArrow order. + pub fn from_field_names(names: &[&str]) -> VortexResult { + Ok(match names { + ["x", "y"] => Dimension::Xy, + ["x", "y", "z"] => Dimension::Xyz, + ["x", "y", "m"] => Dimension::Xym, + ["x", "y", "z", "m"] => Dimension::Xyzm, + _ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"), + }) + } +} + +/// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct Coordinate { + /// The x (longitude/easting) ordinate. + pub x: f64, + /// The y (latitude/northing) ordinate. + pub y: f64, + /// The optional z (elevation) ordinate. + pub z: Option, + /// The optional m (measure) ordinate. + pub m: Option, +} + +impl Coordinate { + /// A 2D coordinate (no `z`/`m`). + pub fn xy(x: f64, y: f64) -> Self { + Coordinate { + x, + y, + z: None, + m: None, + } + } +} + +impl Display for Coordinate { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "POINT({} {})", self.x, self.y) + } +} + +/// The coordinate storage dtype for a dimension: `Struct` of non-nullable f64. +pub fn coordinate_dtype(dim: Dimension, nullability: Nullability) -> DType { + let names = dim.field_names(); + let fields = std::iter::repeat_n( + DType::Primitive(PType::F64, Nullability::NonNullable), + names.len(), + ) + .collect::>(); + DType::Struct( + StructFields::new(FieldNames::from(names), fields), + nullability, + ) +} + +/// Validate that `dtype` is a coordinate struct of non-nullable `f64` fields, returning its +/// [`Dimension`]. Any of the four GeoArrow dimensions validates. +pub fn coordinate_dimension(dtype: &DType) -> VortexResult { + let DType::Struct(fields, _) = dtype else { + vortex_bail!("coordinate storage must be a Struct, was {dtype}"); + }; + let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect(); + for (i, field) in fields.fields().enumerate() { + if !matches!( + field, + DType::Primitive(PType::F64, Nullability::NonNullable) + ) { + vortex_bail!( + "coordinate field {} must be non-nullable f64, was {field}", + names[i] + ); + } + } + Dimension::from_field_names(&names) +} + +/// Decode a [`Coordinate`] from a coordinate `Struct` scalar (`z`/`m` read iff +/// present, so the same decoder serves every dimension). +pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult { + let fields = scalar.as_struct(); + let required = |name: &str| -> VortexResult { + f64::try_from( + &fields + .field(name) + .ok_or_else(|| vortex_err!("coordinate missing {name}"))?, + ) + }; + let optional = |name: &str| -> VortexResult> { + fields + .field(name) + .map(|value| f64::try_from(&value)) + .transpose() + }; + Ok(Coordinate { + x: required("x")?, + y: required("y")?, + z: optional("z")?, + m: optional("m")?, + }) +} + +/// Decode a [`Coordinate`] from an extension-typed point scalar (unwrapped to its coordinate +/// storage) or a bare coordinate `Struct` scalar. The per-row decode used by the distance fns. +pub fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult { + match scalar.dtype().as_extension_opt() { + Some(_) => coordinate_from_struct(&scalar.as_extension().to_storage_scalar()), + None => coordinate_from_struct(scalar), + } +} + +/// Canonicalize a point column once and return its flat `x`/`y` `f64` columns. The bulk counterpart +/// to [`coordinate_from_scalar`]; distance is planar, so `z`/`m` are ignored. +pub(crate) fn xy_columns( + points: &ArrayRef, + ctx: &mut ExecutionCtx, +) -> VortexResult<(PrimitiveArray, PrimitiveArray)> { + let storage = points + .clone() + .execute::(ctx)? + .into_extension() + .storage_array() + .clone() + .execute::(ctx)? + .into_struct(); + let xs = storage + .unmasked_field_by_name("x")? + .clone() + .execute::(ctx)? + .into_primitive(); + let ys = storage + .unmasked_field_by_name("y")? + .clone() + .execute::(ctx)? + .into_primitive(); + Ok((xs, ys)) +} diff --git a/vortex-geo/src/extension/mod.rs b/vortex-geo/src/extension/mod.rs index f08b76ae83d..e4373eb4011 100644 --- a/vortex-geo/src/extension/mod.rs +++ b/vortex-geo/src/extension/mod.rs @@ -1,10 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +mod coordinate; +mod point; mod wkb; use std::fmt::Display; +pub(crate) use coordinate::xy_columns; +pub use coordinate::*; +pub use point::*; pub use wkb::*; /// Extension metadata that is common to all the geospatial extension types. diff --git a/vortex-geo/src/extension/point.rs b/vortex-geo/src/extension/point.rs new file mode 100644 index 00000000000..49867adfebd --- /dev/null +++ b/vortex-geo/src/extension/point.rs @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! The [`Point`] geometry extension type (`vortex.geo.point`): a location stored columnarly as +//! `Struct` of `f64`, tagged with [`GeoMetadata`] (CRS). + +use prost::Message; +use vortex_array::dtype::extension::ExtDType; +use vortex_array::dtype::extension::ExtId; +use vortex_array::dtype::extension::ExtVTable; +use vortex_array::scalar::Scalar; +use vortex_array::scalar::ScalarValue; +use vortex_error::VortexResult; + +use super::GeoMetadata; +use super::coordinate::Coordinate; +use super::coordinate::coordinate_dimension; +use super::coordinate::coordinate_from_struct; + +/// A single location: `geoarrow.point`, stored as `Struct` of `f64`. +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +pub struct Point; + +impl ExtVTable for Point { + type Metadata = GeoMetadata; + type NativeValue<'a> = Coordinate; + + fn id(&self) -> ExtId { + ExtId::new_static("vortex.geo.point") + } + + fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult> { + Ok(metadata.encode_to_vec()) + } + + fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult { + Ok(GeoMetadata::decode(metadata)?) + } + + fn validate_dtype(ext_dtype: &ExtDType) -> VortexResult<()> { + coordinate_dimension(ext_dtype.storage_dtype()).map(|_| ()) + } + + fn unpack_native<'a>( + ext_dtype: &'a ExtDType, + storage_value: &'a ScalarValue, + ) -> VortexResult { + let storage = Scalar::try_new( + ext_dtype.storage_dtype().clone(), + Some(storage_value.clone()), + )?; + coordinate_from_struct(&storage) + } +} + +#[cfg(test)] +mod tests { + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::ExtensionArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::arrays::StructArray; + use vortex_array::dtype::DType; + use vortex_array::dtype::Nullability; + use vortex_array::dtype::PType; + use vortex_array::dtype::extension::ExtDType; + use vortex_array::session::ArraySession; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use super::Point; + use crate::extension::Coordinate; + use crate::extension::Dimension; + use crate::extension::GeoMetadata; + use crate::extension::coordinate_dimension; + use crate::extension::coordinate_dtype; + use crate::extension::coordinate_from_scalar; + + fn geo_meta() -> GeoMetadata { + GeoMetadata { + crs: Some("EPSG:4326".to_string()), + } + } + + /// `Point` accepts every GeoArrow dimension; the storage carries the canonical field names and + /// the dimension round-trips, so a z/m swap or a mislabel would be caught. + #[test] + fn point_validates_every_dimension() -> VortexResult<()> { + let cases = [ + (Dimension::Xy, ["x", "y"].as_slice()), + (Dimension::Xyz, ["x", "y", "z"].as_slice()), + (Dimension::Xym, ["x", "y", "m"].as_slice()), + (Dimension::Xyzm, ["x", "y", "z", "m"].as_slice()), + ]; + for (dim, expected_fields) in cases { + let storage = coordinate_dtype(dim, Nullability::NonNullable); + let DType::Struct(fields, _) = &storage else { + unreachable!("coordinate_dtype builds a struct"); + }; + let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect(); + assert_eq!(names.as_slice(), expected_fields); + assert_eq!(coordinate_dimension(&storage)?, dim); + ExtDType::::try_new(geo_meta(), storage)?; + } + Ok(()) + } + + /// Invalid storage is rejected at dtype construction: both non-struct storage and a struct whose + /// fields are not GeoArrow coordinates. + #[test] + fn point_rejects_invalid_storage() -> VortexResult<()> { + let primitive = DType::Primitive(PType::F64, Nullability::NonNullable); + assert!(ExtDType::::try_new(geo_meta(), primitive).is_err()); + + let wrong_fields = StructArray::from_fields(&[ + ("a", PrimitiveArray::from_iter(vec![0.0f64]).into_array()), + ("b", PrimitiveArray::from_iter(vec![0.0f64]).into_array()), + ])? + .into_array(); + assert!(ExtDType::::try_new(geo_meta(), wrong_fields.dtype().clone()).is_err()); + Ok(()) + } + + /// A `Point` column round-trips through scalar execution back to the original coordinates. + #[test] + fn point_unpacks_coordinates() -> VortexResult<()> { + let session = VortexSession::empty().with::(); + let mut ctx = session.create_execution_ctx(); + + let storage = StructArray::from_fields(&[ + ( + "x", + PrimitiveArray::from_iter(vec![1.0f64, -111.7610]).into_array(), + ), + ( + "y", + PrimitiveArray::from_iter(vec![2.0f64, 34.8697]).into_array(), + ), + ])? + .into_array(); + let dtype = ExtDType::::try_new(geo_meta(), storage.dtype().clone())?; + let points = ExtensionArray::new(dtype.erased(), storage).into_array(); + + assert_eq!( + coordinate_from_scalar(&points.execute_scalar(0, &mut ctx)?)?, + Coordinate::xy(1.0, 2.0) + ); + assert_eq!( + coordinate_from_scalar(&points.execute_scalar(1, &mut ctx)?)?, + Coordinate::xy(-111.7610, 34.8697) + ); + Ok(()) + } +} diff --git a/vortex-geo/src/lib.rs b/vortex-geo/src/lib.rs index 513caf85d92..90f93dfa2f8 100644 --- a/vortex-geo/src/lib.rs +++ b/vortex-geo/src/lib.rs @@ -5,17 +5,25 @@ use std::sync::Arc; use vortex_array::arrow::ArrowSessionExt; use vortex_array::dtype::session::DTypeSessionExt; +use vortex_array::scalar_fn::session::ScalarFnSessionExt; use vortex_session::VortexSession; +use crate::extension::Point; use crate::extension::WellKnownBinary; +use crate::scalar_fn::GeoDistance; pub mod extension; +pub mod scalar_fn; /// Set up a session with support for geospatial extension types, encodings and layouts. pub fn initialize(session: &VortexSession) { - // register geospatial extension types + // Register the geospatial extension types. session.dtypes().register(WellKnownBinary); session.arrow().register_exporter(Arc::new(WellKnownBinary)); session.arrow().register_importer(Arc::new(WellKnownBinary)); + session.dtypes().register(Point); + + // Register the geometry scalar functions. + session.scalar_fns().register(GeoDistance); } #[cfg(test)] diff --git a/vortex-geo/src/scalar_fn/distance.rs b/vortex-geo/src/scalar_fn/distance.rs new file mode 100644 index 00000000000..9dcb0036fec --- /dev/null +++ b/vortex-geo/src/scalar_fn/distance.rs @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Planar distance between the paired points of two columns. + +use vortex_array::ArrayRef; +use vortex_array::ExecutionCtx; +use vortex_array::IntoArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::ScalarFnArray; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::dtype::PType; +use vortex_array::scalar_fn::Arity; +use vortex_array::scalar_fn::ChildName; +use vortex_array::scalar_fn::EmptyOptions; +use vortex_array::scalar_fn::ExecutionArgs; +use vortex_array::scalar_fn::ScalarFnId; +use vortex_array::scalar_fn::ScalarFnVTable; +use vortex_array::scalar_fn::TypedScalarFnInstance; +use vortex_error::VortexResult; +use vortex_session::VortexSession; + +use crate::extension::xy_columns; + +/// Planar Euclidean distance between `(ax, ay)` and `(bx, by)`. +fn euclidean_distance(ax: f64, ay: f64, bx: f64, by: f64) -> f64 { + let dx = ax - bx; + let dy = ay - by; + (dx * dx + dy * dy).sqrt() +} + +/// Expression computing the planar distance between the paired points of two columns. A constant +/// query point is just a [`ConstantArray`](vortex_array::arrays::ConstantArray) operand. +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +pub struct GeoDistance; + +impl GeoDistance { + /// A lazy `ScalarFnArray` computing the distance between each row of `a` and `b`. + pub fn try_new_array(a: ArrayRef, b: ArrayRef, len: usize) -> VortexResult { + ScalarFnArray::try_new( + TypedScalarFnInstance::new(GeoDistance, EmptyOptions).erased(), + vec![a, b], + len, + ) + } +} + +impl ScalarFnVTable for GeoDistance { + type Options = EmptyOptions; + + fn id(&self) -> ScalarFnId { + ScalarFnId::new("vortex.geo.distance") + } + + fn serialize(&self, _: &Self::Options) -> VortexResult>> { + Ok(Some(vec![])) + } + + fn deserialize(&self, _: &[u8], _: &VortexSession) -> VortexResult { + Ok(EmptyOptions) + } + + fn arity(&self, _: &Self::Options) -> Arity { + Arity::Exact(2) + } + + fn child_name(&self, _: &Self::Options, child_idx: usize) -> ChildName { + match child_idx { + 0 => ChildName::from("a"), + 1 => ChildName::from("b"), + _ => unreachable!("distance has exactly two children"), + } + } + + fn return_dtype(&self, _: &Self::Options, _: &[DType]) -> VortexResult { + Ok(DType::Primitive(PType::F64, Nullability::NonNullable)) + } + + fn execute( + &self, + _: &Self::Options, + args: &dyn ExecutionArgs, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + // Bulk path: one tight loop over the flat x/y slices, straight into the output buffer. + let (ax, ay) = xy_columns(&args.get(0)?, ctx)?; + let (bx, by) = xy_columns(&args.get(1)?, ctx)?; + let a = ax.as_slice::().iter().zip(ay.as_slice::()); + let b = bx.as_slice::().iter().zip(by.as_slice::()); + let distances = a + .zip(b) + .map(|((&ax, &ay), (&bx, &by))| euclidean_distance(ax, ay, bx, by)); + Ok(PrimitiveArray::from_iter(distances).into_array()) + } +} + +#[cfg(test)] +mod tests { + use vortex_array::ArrayRef; + use vortex_array::ExecutionCtx; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::ConstantArray; + use vortex_array::arrays::ExtensionArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::arrays::StructArray; + use vortex_array::dtype::extension::ExtDType; + use vortex_array::session::ArraySession; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use super::GeoDistance; + use super::euclidean_distance; + use crate::extension::GeoMetadata; + use crate::extension::Point; + + /// A `Point` column (CRS `EPSG:4326`) over the given x/y coordinates. + fn point_column(xs: Vec, ys: Vec) -> VortexResult { + let storage = StructArray::from_fields(&[ + ("x", PrimitiveArray::from_iter(xs).into_array()), + ("y", PrimitiveArray::from_iter(ys).into_array()), + ])? + .into_array(); + let metadata = GeoMetadata { + crs: Some("EPSG:4326".to_string()), + }; + let dtype = ExtDType::::try_new(metadata, storage.dtype().clone())?; + Ok(ExtensionArray::new(dtype.erased(), storage).into_array()) + } + + /// A constant `Point` column of length `len`, every row at `(x, y)`. + fn point_constant( + x: f64, + y: f64, + len: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + let single = point_column(vec![x], vec![y])?.execute_scalar(0, ctx)?; + Ok(ConstantArray::new(single, len).into_array()) + } + + /// The kernel computes planar Euclidean distance (the 3–4–5 triangle). + #[test] + fn euclidean_distance_is_planar() { + assert_eq!(euclidean_distance(0.0, 0.0, 3.0, 4.0), 5.0); + assert_eq!(euclidean_distance(1.5, -1.5, 1.5, -1.5), 0.0); + } + + /// `GeoDistance` returns the per-row distance between two point columns (here the second is a + /// constant query point). + #[test] + fn distance_over_points() -> VortexResult<()> { + let session = VortexSession::empty().with::(); + let mut ctx = session.create_execution_ctx(); + + let a = point_column(vec![0.0, 3.0, 0.0, 3.0], vec![0.0, 0.0, 4.0, 4.0])?; + let b = point_constant(0.0, 0.0, 4, &mut ctx)?; + let distance = GeoDistance::try_new_array(a, b, 4)?.into_array(); + + let got: Vec = (0..4) + .map(|idx| f64::try_from(&distance.execute_scalar(idx, &mut ctx)?)) + .collect::>()?; + assert_eq!(got, vec![0.0, 3.0, 4.0, 5.0]); + Ok(()) + } +} diff --git a/vortex-geo/src/scalar_fn/mod.rs b/vortex-geo/src/scalar_fn/mod.rs new file mode 100644 index 00000000000..ee9d849204d --- /dev/null +++ b/vortex-geo/src/scalar_fn/mod.rs @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Geometry scalar functions over the [`Point`](crate::extension::Point) type. Currently +//! [`GeoDistance`], the planar distance between two point columns. + +pub mod distance; + +pub use distance::GeoDistance; From 88cb5a2e41610d596ed2c8b8c4a994b5cf84987d Mon Sep 17 00:00:00 2001 From: Nemo Yu Date: Tue, 9 Jun 2026 17:51:27 -0400 Subject: [PATCH 2/5] feat(vortex-geo): GeoDistance from a point column to a constant query point GeoDistance computes the planar distance from each point in a column to a single constant query point (e.g. `ST_Distance(column, point)`). The second operand must be a constant: it is decoded once and broadcast over the column rather than materialized to one identical row per output element. Column-to- column distance is unsupported and errors. `try_new_array` now infers the output length from the point column instead of taking it as an explicit parameter. Signed-off-by: Nemo Yu --- vortex-geo/src/scalar_fn/distance.rs | 81 +++++++++++++++++++++------- 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/vortex-geo/src/scalar_fn/distance.rs b/vortex-geo/src/scalar_fn/distance.rs index 9dcb0036fec..64e533e3af8 100644 --- a/vortex-geo/src/scalar_fn/distance.rs +++ b/vortex-geo/src/scalar_fn/distance.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Planar distance between the paired points of two columns. +//! Planar distance from a point column to a constant query point. use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; @@ -19,8 +19,10 @@ use vortex_array::scalar_fn::ScalarFnId; use vortex_array::scalar_fn::ScalarFnVTable; use vortex_array::scalar_fn::TypedScalarFnInstance; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_session::VortexSession; +use crate::extension::coordinate_from_scalar; use crate::extension::xy_columns; /// Planar Euclidean distance between `(ax, ay)` and `(bx, by)`. @@ -30,14 +32,17 @@ fn euclidean_distance(ax: f64, ay: f64, bx: f64, by: f64) -> f64 { (dx * dx + dy * dy).sqrt() } -/// Expression computing the planar distance between the paired points of two columns. A constant -/// query point is just a [`ConstantArray`](vortex_array::arrays::ConstantArray) operand. +/// Planar distance from each point in a point column to a single constant query point. The first +/// operand is the point column, the second the constant query point; column-to-column distance is +/// not supported. #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] pub struct GeoDistance; impl GeoDistance { - /// A lazy `ScalarFnArray` computing the distance between each row of `a` and `b`. - pub fn try_new_array(a: ArrayRef, b: ArrayRef, len: usize) -> VortexResult { + /// A lazy `ScalarFnArray` computing the distance from each row of the point column `a` to the + /// constant query point `b`. The output length is taken from `a`. + pub fn try_new_array(a: ArrayRef, b: ArrayRef) -> VortexResult { + let len = a.len(); ScalarFnArray::try_new( TypedScalarFnInstance::new(GeoDistance, EmptyOptions).erased(), vec![a, b], @@ -83,14 +88,18 @@ impl ScalarFnVTable for GeoDistance { args: &dyn ExecutionArgs, ctx: &mut ExecutionCtx, ) -> VortexResult { - // Bulk path: one tight loop over the flat x/y slices, straight into the output buffer. - let (ax, ay) = xy_columns(&args.get(0)?, ctx)?; - let (bx, by) = xy_columns(&args.get(1)?, ctx)?; - let a = ax.as_slice::().iter().zip(ay.as_slice::()); - let b = bx.as_slice::().iter().zip(by.as_slice::()); - let distances = a - .zip(b) - .map(|((&ax, &ay), (&bx, &by))| euclidean_distance(ax, ay, bx, by)); + // `a` is the point column; `b` is the constant query point, decoded once and broadcast. + let points = args.get(0)?; + let Some(query) = args.get(1)?.as_constant() else { + vortex_bail!("GeoDistance requires a constant query point as its second operand"); + }; + let query = coordinate_from_scalar(&query)?; + let (xs, ys) = xy_columns(&points, ctx)?; + let distances = xs + .as_slice::() + .iter() + .zip(ys.as_slice::()) + .map(|(&x, &y)| euclidean_distance(x, y, query.x, query.y)); Ok(PrimitiveArray::from_iter(distances).into_array()) } } @@ -98,6 +107,7 @@ impl ScalarFnVTable for GeoDistance { #[cfg(test)] mod tests { use vortex_array::ArrayRef; + use vortex_array::Canonical; use vortex_array::ExecutionCtx; use vortex_array::IntoArray; use vortex_array::VortexSessionExecute; @@ -140,6 +150,15 @@ mod tests { Ok(ConstantArray::new(single, len).into_array()) } + /// Execute a `GeoDistance` array and read back its per-row `f64` distances. + fn distances(distance: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult> { + Ok(distance + .execute::(ctx)? + .into_primitive() + .as_slice::() + .to_vec()) + } + /// The kernel computes planar Euclidean distance (the 3–4–5 triangle). #[test] fn euclidean_distance_is_planar() { @@ -156,12 +175,38 @@ mod tests { let a = point_column(vec![0.0, 3.0, 0.0, 3.0], vec![0.0, 0.0, 4.0, 4.0])?; let b = point_constant(0.0, 0.0, 4, &mut ctx)?; - let distance = GeoDistance::try_new_array(a, b, 4)?.into_array(); + let distance = GeoDistance::try_new_array(a, b)?.into_array(); + + assert_eq!(distances(distance, &mut ctx)?, vec![0.0, 3.0, 4.0, 5.0]); + Ok(()) + } + + /// Without a constant query point on either side, column-to-column distance is unsupported and + /// the kernel errors rather than computing it. + #[test] + fn distance_requires_constant_query_point() -> VortexResult<()> { + let session = VortexSession::empty().with::(); + let mut ctx = session.create_execution_ctx(); + + let a = point_column(vec![0.0, 1.0], vec![0.0, 1.0])?; + let b = point_column(vec![3.0, 1.0], vec![4.0, 1.0])?; + let distance = GeoDistance::try_new_array(a, b)?.into_array(); + + assert!(distance.execute::(&mut ctx).is_err()); + Ok(()) + } + + /// Two constant operands: every row has the same distance. + #[test] + fn distance_between_two_constants() -> VortexResult<()> { + let session = VortexSession::empty().with::(); + let mut ctx = session.create_execution_ctx(); + + let a = point_constant(0.0, 0.0, 3, &mut ctx)?; + let b = point_constant(3.0, 4.0, 3, &mut ctx)?; + let distance = GeoDistance::try_new_array(a, b)?.into_array(); - let got: Vec = (0..4) - .map(|idx| f64::try_from(&distance.execute_scalar(idx, &mut ctx)?)) - .collect::>()?; - assert_eq!(got, vec![0.0, 3.0, 4.0, 5.0]); + assert_eq!(distances(distance, &mut ctx)?, vec![5.0, 5.0, 5.0]); Ok(()) } } From 87894300ec5e80580ffa958c0662afabbe98bded Mon Sep 17 00:00:00 2001 From: Nemo Yu Date: Wed, 10 Jun 2026 14:00:15 -0400 Subject: [PATCH 3/5] fix: review comments && column to column distance --- vortex-geo/src/extension/coordinate.rs | 87 ++++++++++---------- vortex-geo/src/extension/mod.rs | 4 +- vortex-geo/src/extension/point.rs | 37 +++++---- vortex-geo/src/lib.rs | 2 +- vortex-geo/src/scalar_fn/distance.rs | 105 ++++++++++++++++++------- vortex-geo/src/scalar_fn/mod.rs | 5 +- 6 files changed, 142 insertions(+), 98 deletions(-) diff --git a/vortex-geo/src/extension/coordinate.rs b/vortex-geo/src/extension/coordinate.rs index 3599c003e71..9d073ad4f99 100644 --- a/vortex-geo/src/extension/coordinate.rs +++ b/vortex-geo/src/extension/coordinate.rs @@ -1,24 +1,22 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! The coordinate building block shared by geometry extension types: the `Struct` -//! storage, its [`Dimension`], the decoded [`Coordinate`] value, and the readers that decode it. -//! `z`/`m` are optional, so all four GeoArrow dimensions share one value type — no third-party deps. +//! Coordinate building blocks for geometry extension types: the `Struct` storage, +//! its [`Dimension`], and the decoded [`Coordinate`] value. use std::fmt::Display; use std::fmt::Formatter; use vortex_array::ArrayRef; -use vortex_array::Canonical; use vortex_array::ExecutionCtx; +use vortex_array::arrays::ExtensionArray; use vortex_array::arrays::PrimitiveArray; +use vortex_array::arrays::StructArray; use vortex_array::arrays::extension::ExtensionArrayExt; use vortex_array::arrays::struct_::StructArrayExt; use vortex_array::dtype::DType; -use vortex_array::dtype::FieldNames; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; -use vortex_array::dtype::StructFields; use vortex_array::scalar::Scalar; use vortex_error::VortexResult; use vortex_error::vortex_bail; @@ -26,7 +24,7 @@ use vortex_error::vortex_err; /// Coordinate dimensions, matching GeoArrow. Field order is fixed: x, y, then z before m. #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Dimension { +pub(crate) enum Dimension { /// 2D: `x`, `y`. Xy, /// 3D with elevation: `x`, `y`, `z`. @@ -38,18 +36,8 @@ pub enum Dimension { } impl Dimension { - /// The coordinate struct field names for this dimension, in GeoArrow order. - pub fn field_names(self) -> &'static [&'static str] { - match self { - Dimension::Xy => &["x", "y"], - Dimension::Xyz => &["x", "y", "z"], - Dimension::Xym => &["x", "y", "m"], - Dimension::Xyzm => &["x", "y", "z", "m"], - } - } - /// Recover the dimension from a coordinate's field names, in GeoArrow order. - pub fn from_field_names(names: &[&str]) -> VortexResult { + pub(crate) fn from_field_names(names: &[&str]) -> VortexResult { Ok(match names { ["x", "y"] => Dimension::Xy, ["x", "y", "z"] => Dimension::Xyz, @@ -61,16 +49,19 @@ impl Dimension { } /// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them. +/// +/// This is the native value produced when unpacking a [`Point`](crate::extension::Point) scalar; +/// the rest of the coordinate machinery is crate-internal. #[derive(Debug, Clone, Copy, PartialEq)] pub struct Coordinate { /// The x (longitude/easting) ordinate. - pub x: f64, + x: f64, /// The y (latitude/northing) ordinate. - pub y: f64, + y: f64, /// The optional z (elevation) ordinate. - pub z: Option, + z: Option, /// The optional m (measure) ordinate. - pub m: Option, + m: Option, } impl Coordinate { @@ -83,6 +74,26 @@ impl Coordinate { m: None, } } + + /// The x (longitude/easting) ordinate. + pub fn x(&self) -> f64 { + self.x + } + + /// The y (latitude/northing) ordinate. + pub fn y(&self) -> f64 { + self.y + } + + /// The z (elevation) ordinate, if the dimension includes one. + pub fn z(&self) -> Option { + self.z + } + + /// The m (measure) ordinate, if the dimension includes one. + pub fn m(&self) -> Option { + self.m + } } impl Display for Coordinate { @@ -91,23 +102,9 @@ impl Display for Coordinate { } } -/// The coordinate storage dtype for a dimension: `Struct` of non-nullable f64. -pub fn coordinate_dtype(dim: Dimension, nullability: Nullability) -> DType { - let names = dim.field_names(); - let fields = std::iter::repeat_n( - DType::Primitive(PType::F64, Nullability::NonNullable), - names.len(), - ) - .collect::>(); - DType::Struct( - StructFields::new(FieldNames::from(names), fields), - nullability, - ) -} - /// Validate that `dtype` is a coordinate struct of non-nullable `f64` fields, returning its /// [`Dimension`]. Any of the four GeoArrow dimensions validates. -pub fn coordinate_dimension(dtype: &DType) -> VortexResult { +pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult { let DType::Struct(fields, _) = dtype else { vortex_bail!("coordinate storage must be a Struct, was {dtype}"); }; @@ -153,7 +150,7 @@ pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult VortexResult { +pub(crate) fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult { match scalar.dtype().as_extension_opt() { Some(_) => coordinate_from_struct(&scalar.as_extension().to_storage_scalar()), None => coordinate_from_struct(scalar), @@ -161,28 +158,24 @@ pub fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult { } /// Canonicalize a point column once and return its flat `x`/`y` `f64` columns. The bulk counterpart -/// to [`coordinate_from_scalar`]; distance is planar, so `z`/`m` are ignored. +/// to [`coordinate_from_scalar`]; distances use only `x`/`y`, so `z`/`m` are ignored. pub(crate) fn xy_columns( points: &ArrayRef, ctx: &mut ExecutionCtx, ) -> VortexResult<(PrimitiveArray, PrimitiveArray)> { let storage = points .clone() - .execute::(ctx)? - .into_extension() + .execute::(ctx)? .storage_array() .clone() - .execute::(ctx)? - .into_struct(); + .execute::(ctx)?; let xs = storage .unmasked_field_by_name("x")? .clone() - .execute::(ctx)? - .into_primitive(); + .execute::(ctx)?; let ys = storage .unmasked_field_by_name("y")? .clone() - .execute::(ctx)? - .into_primitive(); + .execute::(ctx)?; Ok((xs, ys)) } diff --git a/vortex-geo/src/extension/mod.rs b/vortex-geo/src/extension/mod.rs index e4373eb4011..d69dd239d14 100644 --- a/vortex-geo/src/extension/mod.rs +++ b/vortex-geo/src/extension/mod.rs @@ -1,14 +1,12 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -mod coordinate; +pub(crate) mod coordinate; mod point; mod wkb; use std::fmt::Display; -pub(crate) use coordinate::xy_columns; -pub use coordinate::*; pub use point::*; pub use wkb::*; diff --git a/vortex-geo/src/extension/point.rs b/vortex-geo/src/extension/point.rs index 49867adfebd..b24d99e86a6 100644 --- a/vortex-geo/src/extension/point.rs +++ b/vortex-geo/src/extension/point.rs @@ -61,20 +61,21 @@ mod tests { use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::StructArray; use vortex_array::dtype::DType; + use vortex_array::dtype::FieldNames; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; + use vortex_array::dtype::StructFields; use vortex_array::dtype::extension::ExtDType; use vortex_array::session::ArraySession; use vortex_error::VortexResult; use vortex_session::VortexSession; use super::Point; - use crate::extension::Coordinate; - use crate::extension::Dimension; use crate::extension::GeoMetadata; - use crate::extension::coordinate_dimension; - use crate::extension::coordinate_dtype; - use crate::extension::coordinate_from_scalar; + use crate::extension::coordinate::Coordinate; + use crate::extension::coordinate::Dimension; + use crate::extension::coordinate::coordinate_dimension; + use crate::extension::coordinate::coordinate_from_scalar; fn geo_meta() -> GeoMetadata { GeoMetadata { @@ -82,8 +83,21 @@ mod tests { } } - /// `Point` accepts every GeoArrow dimension; the storage carries the canonical field names and - /// the dimension round-trips, so a z/m swap or a mislabel would be caught. + /// A coordinate storage dtype with the given field names, non-nullable `f64` per field. + fn coordinate_dtype(names: &[&'static str]) -> DType { + let fields = std::iter::repeat_n( + DType::Primitive(PType::F64, Nullability::NonNullable), + names.len(), + ) + .collect::>(); + DType::Struct( + StructFields::new(FieldNames::from(names), fields), + Nullability::NonNullable, + ) + } + + /// `Point` accepts every GeoArrow dimension; the canonical field names round-trip to their + /// dimension, so a z/m swap or a mislabel would be caught. #[test] fn point_validates_every_dimension() -> VortexResult<()> { let cases = [ @@ -92,13 +106,8 @@ mod tests { (Dimension::Xym, ["x", "y", "m"].as_slice()), (Dimension::Xyzm, ["x", "y", "z", "m"].as_slice()), ]; - for (dim, expected_fields) in cases { - let storage = coordinate_dtype(dim, Nullability::NonNullable); - let DType::Struct(fields, _) = &storage else { - unreachable!("coordinate_dtype builds a struct"); - }; - let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect(); - assert_eq!(names.as_slice(), expected_fields); + for (dim, names) in cases { + let storage = coordinate_dtype(names); assert_eq!(coordinate_dimension(&storage)?, dim); ExtDType::::try_new(geo_meta(), storage)?; } diff --git a/vortex-geo/src/lib.rs b/vortex-geo/src/lib.rs index 90f93dfa2f8..9d0cde26f9e 100644 --- a/vortex-geo/src/lib.rs +++ b/vortex-geo/src/lib.rs @@ -10,7 +10,7 @@ use vortex_session::VortexSession; use crate::extension::Point; use crate::extension::WellKnownBinary; -use crate::scalar_fn::GeoDistance; +use crate::scalar_fn::distance::GeoDistance; pub mod extension; pub mod scalar_fn; diff --git a/vortex-geo/src/scalar_fn/distance.rs b/vortex-geo/src/scalar_fn/distance.rs index 64e533e3af8..90b95e1ca4a 100644 --- a/vortex-geo/src/scalar_fn/distance.rs +++ b/vortex-geo/src/scalar_fn/distance.rs @@ -1,16 +1,18 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Planar distance from a point column to a constant query point. +//! Straight-line (Euclidean) distance between points; "planar" distance in GIS terms. use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; use vortex_array::IntoArray; +use vortex_array::arrays::ConstantArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::ScalarFnArray; use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; +use vortex_array::scalar::Scalar; use vortex_array::scalar_fn::Arity; use vortex_array::scalar_fn::ChildName; use vortex_array::scalar_fn::EmptyOptions; @@ -19,28 +21,29 @@ use vortex_array::scalar_fn::ScalarFnId; use vortex_array::scalar_fn::ScalarFnVTable; use vortex_array::scalar_fn::TypedScalarFnInstance; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_session::VortexSession; -use crate::extension::coordinate_from_scalar; -use crate::extension::xy_columns; +use crate::extension::coordinate::coordinate_from_scalar; +use crate::extension::coordinate::xy_columns; -/// Planar Euclidean distance between `(ax, ay)` and `(bx, by)`. +/// Straight-line (L2) distance between `(ax, ay)` and `(bx, by)`. fn euclidean_distance(ax: f64, ay: f64, bx: f64, by: f64) -> f64 { let dx = ax - bx; let dy = ay - by; (dx * dx + dy * dy).sqrt() } -/// Planar distance from each point in a point column to a single constant query point. The first -/// operand is the point column, the second the constant query point; column-to-column distance is -/// not supported. +/// Straight-line (Euclidean) distance between two point operands — "planar" distance in GIS terms +/// (e.g. PostGIS `ST_Distance`). No geodesic correction, and `z`/`m` are ignored. +/// +/// The operands are two point columns of equal length; either (or both) may be constant, in which +/// case the constant query point is decoded once and broadcast. #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] pub struct GeoDistance; impl GeoDistance { - /// A lazy `ScalarFnArray` computing the distance from each row of the point column `a` to the - /// constant query point `b`. The output length is taken from `a`. + /// A lazy `ScalarFnArray` computing the per-row distance between the point columns `a` and + /// `b`; either may be constant. The output length is taken from `a`. pub fn try_new_array(a: ArrayRef, b: ArrayRef) -> VortexResult { let len = a.len(); ScalarFnArray::try_new( @@ -88,22 +91,53 @@ impl ScalarFnVTable for GeoDistance { args: &dyn ExecutionArgs, ctx: &mut ExecutionCtx, ) -> VortexResult { - // `a` is the point column; `b` is the constant query point, decoded once and broadcast. - let points = args.get(0)?; - let Some(query) = args.get(1)?.as_constant() else { - vortex_bail!("GeoDistance requires a constant query point as its second operand"); - }; - let query = coordinate_from_scalar(&query)?; - let (xs, ys) = xy_columns(&points, ctx)?; - let distances = xs - .as_slice::() - .iter() - .zip(ys.as_slice::()) - .map(|(&x, &y)| euclidean_distance(x, y, query.x, query.y)); - Ok(PrimitiveArray::from_iter(distances).into_array()) + let a = args.get(0)?; + let b = args.get(1)?; + match (a.as_constant(), b.as_constant()) { + (Some(qa), Some(qb)) => { + let qa = coordinate_from_scalar(&qa)?; + let qb = coordinate_from_scalar(&qb)?; + let distance = euclidean_distance(qa.x(), qa.y(), qb.x(), qb.y()); + Ok(ConstantArray::new( + Scalar::primitive(distance, Nullability::NonNullable), + a.len(), + ) + .into_array()) + } + (Some(query), None) => distances_to_constant(&b, &query, ctx), + (None, Some(query)) => distances_to_constant(&a, &query, ctx), + (None, None) => { + let (axs, ays) = xy_columns(&a, ctx)?; + let (bxs, bys) = xy_columns(&b, ctx)?; + let distances = axs + .as_slice::() + .iter() + .zip(ays.as_slice::()) + .zip(bxs.as_slice::().iter().zip(bys.as_slice::())) + .map(|((&ax, &ay), (&bx, &by))| euclidean_distance(ax, ay, bx, by)); + Ok(PrimitiveArray::from_iter(distances).into_array()) + } + } } } +/// Distance from each row of `points` to a constant `query` point, decoded once and broadcast. +/// Distance is symmetric, so this serves a constant on either side. +fn distances_to_constant( + points: &ArrayRef, + query: &Scalar, + ctx: &mut ExecutionCtx, +) -> VortexResult { + let query = coordinate_from_scalar(query)?; + let (xs, ys) = xy_columns(points, ctx)?; + let distances = xs + .as_slice::() + .iter() + .zip(ys.as_slice::()) + .map(|(&x, &y)| euclidean_distance(x, y, query.x(), query.y())); + Ok(PrimitiveArray::from_iter(distances).into_array()) +} + #[cfg(test)] mod tests { use vortex_array::ArrayRef; @@ -159,9 +193,9 @@ mod tests { .to_vec()) } - /// The kernel computes planar Euclidean distance (the 3–4–5 triangle). + /// The kernel computes straight-line distance (the 3–4–5 triangle). #[test] - fn euclidean_distance_is_planar() { + fn euclidean_distance_is_straight_line() { assert_eq!(euclidean_distance(0.0, 0.0, 3.0, 4.0), 5.0); assert_eq!(euclidean_distance(1.5, -1.5, 1.5, -1.5), 0.0); } @@ -181,10 +215,9 @@ mod tests { Ok(()) } - /// Without a constant query point on either side, column-to-column distance is unsupported and - /// the kernel errors rather than computing it. + /// Column-to-column distance pairs corresponding rows of the two columns. #[test] - fn distance_requires_constant_query_point() -> VortexResult<()> { + fn distance_between_columns() -> VortexResult<()> { let session = VortexSession::empty().with::(); let mut ctx = session.create_execution_ctx(); @@ -192,7 +225,21 @@ mod tests { let b = point_column(vec![3.0, 1.0], vec![4.0, 1.0])?; let distance = GeoDistance::try_new_array(a, b)?.into_array(); - assert!(distance.execute::(&mut ctx).is_err()); + assert_eq!(distances(distance, &mut ctx)?, vec![5.0, 0.0]); + Ok(()) + } + + /// The constant query point may be either operand; distance is symmetric. + #[test] + fn distance_with_constant_first_operand() -> VortexResult<()> { + let session = VortexSession::empty().with::(); + let mut ctx = session.create_execution_ctx(); + + let a = point_constant(0.0, 0.0, 4, &mut ctx)?; + let b = point_column(vec![0.0, 3.0, 0.0, 3.0], vec![0.0, 0.0, 4.0, 4.0])?; + let distance = GeoDistance::try_new_array(a, b)?.into_array(); + + assert_eq!(distances(distance, &mut ctx)?, vec![0.0, 3.0, 4.0, 5.0]); Ok(()) } diff --git a/vortex-geo/src/scalar_fn/mod.rs b/vortex-geo/src/scalar_fn/mod.rs index ee9d849204d..385208f1991 100644 --- a/vortex-geo/src/scalar_fn/mod.rs +++ b/vortex-geo/src/scalar_fn/mod.rs @@ -1,9 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Geometry scalar functions over the [`Point`](crate::extension::Point) type. Currently -//! [`GeoDistance`], the planar distance between two point columns. +//! Geometry scalar functions over the [`Point`](crate::extension::Point) type. pub mod distance; - -pub use distance::GeoDistance; From 549f5c4d85f17696c6cced0c0d7537c840b896a2 Mon Sep 17 00:00:00 2001 From: Nemo Yu Date: Wed, 10 Jun 2026 14:18:22 -0400 Subject: [PATCH 4/5] fix: better explanation on dimension z and m --- vortex-geo/src/extension/coordinate.rs | 26 ++++++++++++++++---------- vortex-geo/src/extension/point.rs | 8 +++++--- vortex-geo/src/scalar_fn/distance.rs | 2 +- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/vortex-geo/src/extension/coordinate.rs b/vortex-geo/src/extension/coordinate.rs index 9d073ad4f99..ffc0e7ca887 100644 --- a/vortex-geo/src/extension/coordinate.rs +++ b/vortex-geo/src/extension/coordinate.rs @@ -1,8 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Coordinate building blocks for geometry extension types: the `Struct` storage, +//! Coordinate building blocks for geometry extension types: the `Struct` storage, //! its [`Dimension`], and the decoded [`Coordinate`] value. +//! +//! The coordinate fields, where `?` marks an optional field, are: +//! - `x` — longitude or easting +//! - `y` — latitude or northing +//! - `z?` — elevation +//! - `m?` — measure: an arbitrary per-point value such as distance along a route or a timestamp use std::fmt::Display; use std::fmt::Formatter; @@ -22,7 +28,7 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_err; -/// Coordinate dimensions, matching GeoArrow. Field order is fixed: x, y, then z before m. +/// Coordinate dimensions, matching GeoArrow. Field order is fixed: `x`, `y`, then `z` before `m`. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum Dimension { /// 2D: `x`, `y`. @@ -48,7 +54,7 @@ impl Dimension { } } -/// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them. +/// A decoded coordinate. `z?`/`m?` are `Some` iff the storage dimension includes them. /// /// This is the native value produced when unpacking a [`Point`](crate::extension::Point) scalar; /// the rest of the coordinate machinery is crate-internal. @@ -58,14 +64,14 @@ pub struct Coordinate { x: f64, /// The y (latitude/northing) ordinate. y: f64, - /// The optional z (elevation) ordinate. + /// The optional `z?` (elevation) ordinate. z: Option, - /// The optional m (measure) ordinate. + /// The optional `m?` (measure) ordinate. m: Option, } impl Coordinate { - /// A 2D coordinate (no `z`/`m`). + /// A 2D coordinate (`z?`/`m?` unset). pub fn xy(x: f64, y: f64) -> Self { Coordinate { x, @@ -85,12 +91,12 @@ impl Coordinate { self.y } - /// The z (elevation) ordinate, if the dimension includes one. + /// The `z?` (elevation) ordinate, if the dimension includes one. pub fn z(&self) -> Option { self.z } - /// The m (measure) ordinate, if the dimension includes one. + /// The `m?` (measure) ordinate, if the dimension includes one. pub fn m(&self) -> Option { self.m } @@ -123,7 +129,7 @@ pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult { Dimension::from_field_names(&names) } -/// Decode a [`Coordinate`] from a coordinate `Struct` scalar (`z`/`m` read iff +/// Decode a [`Coordinate`] from a coordinate `Struct` scalar (`z?`/`m?` read iff /// present, so the same decoder serves every dimension). pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult { let fields = scalar.as_struct(); @@ -158,7 +164,7 @@ pub(crate) fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult` of `f64`, tagged with [`GeoMetadata`] (CRS). +//! `Struct` of `f64`, tagged with [`GeoMetadata`] (CRS). `z?` is an optional +//! elevation and `m?` an optional measure — an arbitrary per-point value such as distance along a +//! route or a timestamp. use prost::Message; use vortex_array::dtype::extension::ExtDType; @@ -17,7 +19,7 @@ use super::coordinate::Coordinate; use super::coordinate::coordinate_dimension; use super::coordinate::coordinate_from_struct; -/// A single location: `geoarrow.point`, stored as `Struct` of `f64`. +/// A single location: `geoarrow.point`, stored as `Struct` of `f64`. #[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] pub struct Point; @@ -97,7 +99,7 @@ mod tests { } /// `Point` accepts every GeoArrow dimension; the canonical field names round-trip to their - /// dimension, so a z/m swap or a mislabel would be caught. + /// dimension, so a `z?`/`m?` swap or a mislabel would be caught. #[test] fn point_validates_every_dimension() -> VortexResult<()> { let cases = [ diff --git a/vortex-geo/src/scalar_fn/distance.rs b/vortex-geo/src/scalar_fn/distance.rs index 90b95e1ca4a..3efff131341 100644 --- a/vortex-geo/src/scalar_fn/distance.rs +++ b/vortex-geo/src/scalar_fn/distance.rs @@ -34,7 +34,7 @@ fn euclidean_distance(ax: f64, ay: f64, bx: f64, by: f64) -> f64 { } /// Straight-line (Euclidean) distance between two point operands — "planar" distance in GIS terms -/// (e.g. PostGIS `ST_Distance`). No geodesic correction, and `z`/`m` are ignored. +/// (e.g. PostGIS `ST_Distance`). No geodesic correction, and `z?`/`m?` are ignored. /// /// The operands are two point columns of equal length; either (or both) may be constant, in which /// case the constant query point is decoded once and broadcast. From ec9587574076b5ea0e7dc6bf1ce44a4ec7b2ce5d Mon Sep 17 00:00:00 2001 From: Nemo Yu Date: Wed, 10 Jun 2026 14:51:49 -0400 Subject: [PATCH 5/5] fix: changes requested, remove internal clone --- vortex-geo/src/extension/coordinate.rs | 82 ++++++++++++++------------ vortex-geo/src/scalar_fn/distance.rs | 15 ++--- 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/vortex-geo/src/extension/coordinate.rs b/vortex-geo/src/extension/coordinate.rs index ffc0e7ca887..45f9aefc45d 100644 --- a/vortex-geo/src/extension/coordinate.rs +++ b/vortex-geo/src/extension/coordinate.rs @@ -26,6 +26,7 @@ use vortex_array::dtype::PType; use vortex_array::scalar::Scalar; use vortex_error::VortexResult; use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; use vortex_error::vortex_err; /// Coordinate dimensions, matching GeoArrow. Field order is fixed: `x`, `y`, then `z` before `m`. @@ -61,13 +62,13 @@ impl Dimension { #[derive(Debug, Clone, Copy, PartialEq)] pub struct Coordinate { /// The x (longitude/easting) ordinate. - x: f64, + pub x: f64, /// The y (latitude/northing) ordinate. - y: f64, + pub y: f64, /// The optional `z?` (elevation) ordinate. - z: Option, + pub z: Option, /// The optional `m?` (measure) ordinate. - m: Option, + pub m: Option, } impl Coordinate { @@ -80,31 +81,16 @@ impl Coordinate { m: None, } } - - /// The x (longitude/easting) ordinate. - pub fn x(&self) -> f64 { - self.x - } - - /// The y (latitude/northing) ordinate. - pub fn y(&self) -> f64 { - self.y - } - - /// The `z?` (elevation) ordinate, if the dimension includes one. - pub fn z(&self) -> Option { - self.z - } - - /// The `m?` (measure) ordinate, if the dimension includes one. - pub fn m(&self) -> Option { - self.m - } } impl Display for Coordinate { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "POINT({} {})", self.x, self.y) + fn fmt(&self, fmt: &mut Formatter<'_>) -> std::fmt::Result { + match (self.z, self.m) { + (None, None) => write!(fmt, "POINT({} {})", self.x, self.y), + (Some(z), None) => write!(fmt, "POINT Z ({} {} {})", self.x, self.y, z), + (None, Some(m)) => write!(fmt, "POINT M ({} {} {})", self.x, self.y, m), + (Some(z), Some(m)) => write!(fmt, "POINT ZM ({} {} {} {})", self.x, self.y, z, m), + } } } @@ -116,15 +102,14 @@ pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult { }; let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect(); for (i, field) in fields.fields().enumerate() { - if !matches!( - field, - DType::Primitive(PType::F64, Nullability::NonNullable) - ) { - vortex_bail!( - "coordinate field {} must be non-nullable f64, was {field}", - names[i] - ); - } + vortex_ensure!( + matches!( + field, + DType::Primitive(PType::F64, Nullability::NonNullable) + ), + "coordinate field {} must be non-nullable f64, was {field}", + names[i] + ); } Dimension::from_field_names(&names) } @@ -157,8 +142,8 @@ pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult VortexResult { - match scalar.dtype().as_extension_opt() { - Some(_) => coordinate_from_struct(&scalar.as_extension().to_storage_scalar()), + match scalar.as_extension_opt() { + Some(ext_scalar) => coordinate_from_struct(&ext_scalar.to_storage_scalar()), None => coordinate_from_struct(scalar), } } @@ -185,3 +170,26 @@ pub(crate) fn xy_columns( .execute::(ctx)?; Ok((xs, ys)) } + +#[cfg(test)] +mod tests { + use super::Coordinate; + + /// Display emits WKT, including `z?`/`m?` when present. + #[test] + fn display_is_wkt() { + let coordinate = |z, m| Coordinate { + x: 1.0, + y: 2.0, + z, + m, + }; + assert_eq!(coordinate(None, None).to_string(), "POINT(1 2)"); + assert_eq!(coordinate(Some(3.0), None).to_string(), "POINT Z (1 2 3)"); + assert_eq!(coordinate(None, Some(4.0)).to_string(), "POINT M (1 2 4)"); + assert_eq!( + coordinate(Some(3.0), Some(4.0)).to_string(), + "POINT ZM (1 2 3 4)" + ); + } +} diff --git a/vortex-geo/src/scalar_fn/distance.rs b/vortex-geo/src/scalar_fn/distance.rs index 3efff131341..0fda4fea6b2 100644 --- a/vortex-geo/src/scalar_fn/distance.rs +++ b/vortex-geo/src/scalar_fn/distance.rs @@ -6,6 +6,7 @@ use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; use vortex_array::IntoArray; +use vortex_array::arrays::Constant; use vortex_array::arrays::ConstantArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::arrays::ScalarFnArray; @@ -93,19 +94,19 @@ impl ScalarFnVTable for GeoDistance { ) -> VortexResult { let a = args.get(0)?; let b = args.get(1)?; - match (a.as_constant(), b.as_constant()) { + match (a.as_opt::(), b.as_opt::()) { (Some(qa), Some(qb)) => { - let qa = coordinate_from_scalar(&qa)?; - let qb = coordinate_from_scalar(&qb)?; - let distance = euclidean_distance(qa.x(), qa.y(), qb.x(), qb.y()); + let qa = coordinate_from_scalar(qa.scalar())?; + let qb = coordinate_from_scalar(qb.scalar())?; + let distance = euclidean_distance(qa.x, qa.y, qb.x, qb.y); Ok(ConstantArray::new( Scalar::primitive(distance, Nullability::NonNullable), a.len(), ) .into_array()) } - (Some(query), None) => distances_to_constant(&b, &query, ctx), - (None, Some(query)) => distances_to_constant(&a, &query, ctx), + (Some(query), None) => distances_to_constant(&b, query.scalar(), ctx), + (None, Some(query)) => distances_to_constant(&a, query.scalar(), ctx), (None, None) => { let (axs, ays) = xy_columns(&a, ctx)?; let (bxs, bys) = xy_columns(&b, ctx)?; @@ -134,7 +135,7 @@ fn distances_to_constant( .as_slice::() .iter() .zip(ys.as_slice::()) - .map(|(&x, &y)| euclidean_distance(x, y, query.x(), query.y())); + .map(|(&x, &y)| euclidean_distance(x, y, query.x, query.y)); Ok(PrimitiveArray::from_iter(distances).into_array()) }