From c1672801e91ac814d81833f9dc9b72d76a1086f4 Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Sun, 28 Dec 2025 10:18:02 -0800 Subject: [PATCH 01/11] feat: implement ST_AsGeoJSON --- Cargo.toml | 1 + rust/sedona-functions/Cargo.toml | 2 + rust/sedona-functions/src/lib.rs | 1 + rust/sedona-functions/src/register.rs | 1 + rust/sedona-functions/src/st_asgeojson.rs | 465 ++++++++++++++++++++++ 5 files changed, 470 insertions(+) create mode 100644 rust/sedona-functions/src/st_asgeojson.rs diff --git a/Cargo.toml b/Cargo.toml index 9780fbe16..a84e149cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -105,6 +105,7 @@ geos = { git="https://github.com/georust/geos.git", rev="47afbad2483e489911ddb45 geo-types = "0.7.17" geo-traits = "0.3.0" geo = "0.31.0" +geojson = "0.24.2" geo-index = { version = "0.3.2", features = ["use-geo_0_31"] } diff --git a/rust/sedona-functions/Cargo.toml b/rust/sedona-functions/Cargo.toml index 9fb7f35f4..72972c9e7 100644 --- a/rust/sedona-functions/Cargo.toml +++ b/rust/sedona-functions/Cargo.toml @@ -45,6 +45,8 @@ arrow-buffer = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } geo-traits = { workspace = true } +geo-types = { workspace = true } +geojson = { workspace = true } sedona-common = { workspace = true } sedona-expr = { workspace = true } sedona-geometry = { workspace = true } diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 44c8ad027..fde750736 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -27,6 +27,7 @@ pub mod st_analyze_agg; mod st_area; mod st_asbinary; mod st_astext; +mod st_asgeojson; mod st_azimuth; mod st_buffer; mod st_centroid; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index ff4395787..9fce9f5f6 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -64,6 +64,7 @@ pub fn default_function_set() -> FunctionSet { crate::sd_order::sd_order_udf, crate::st_area::st_area_udf, crate::st_asbinary::st_asbinary_udf, + crate::st_asgeojson::st_asgeojson_udf, crate::st_astext::st_astext_udf, crate::st_azimuth::st_azimuth_udf, crate::st_buffer::st_buffer_udf, diff --git a/rust/sedona-functions/src/st_asgeojson.rs b/rust/sedona-functions/src/st_asgeojson.rs new file mode 100644 index 000000000..6c36fa98a --- /dev/null +++ b/rust/sedona-functions/src/st_asgeojson.rs @@ -0,0 +1,465 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::sync::Arc; + +use crate::executor::WkbExecutor; +use arrow_array::builder::StringBuilder; +use arrow_schema::DataType; +use datafusion_common::error::{DataFusionError, Result}; +use datafusion_expr::{ + scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, +}; +use geo_traits::to_geo::{ + ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPoint, ToGeoMultiPolygon, ToGeoPoint, + ToGeoPolygon, +}; +use geo_traits::{GeometryCollectionTrait, GeometryTrait, GeometryType}; +use geo_types::Geometry; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// Output format type for GeoJSON +#[derive(Debug, Clone, Copy, PartialEq)] +enum GeoJsonType { + Simple, + Feature, + FeatureCollection, +} + +impl GeoJsonType { + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "simple" => Ok(GeoJsonType::Simple), + "feature" => Ok(GeoJsonType::Feature), + "featurecollection" => Ok(GeoJsonType::FeatureCollection), + _ => Err(DataFusionError::Execution(format!( + "Invalid GeoJSON type '{}'. Valid options are: 'Simple', 'Feature', 'FeatureCollection'", + s + ))), + } + } +} + +/// ST_AsGeoJSON() scalar UDF implementation +/// +/// An implementation of GeoJSON writing using the geojson crate. +pub fn st_asgeojson_udf() -> SedonaScalarUDF { + let udf = SedonaScalarUDF::new( + "st_asgeojson", + vec![ + Arc::new(STAsGeoJSON {}), + Arc::new(STAsGeoJSONWithType {}), + ], + Volatility::Immutable, + Some(st_asgeojson_doc()), + ); + udf +} + +fn st_asgeojson_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Return the GeoJSON representation of a geometry or geography", + "ST_AsGeoJSON (A: Geometry [, type: String])", + ) + .with_argument("geom", "geometry: Input geometry or geography") + .with_argument("type", "string (optional): Output type - 'Simple' (default), 'Feature', or 'FeatureCollection'") + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0))") + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'Feature')") + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'FeatureCollection')") + .with_related_udf("ST_GeomFromGeoJSON") + .build() +} + +#[derive(Debug)] +struct STAsGeoJSON {} + +impl SedonaScalarKernel for STAsGeoJSON { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry_or_geography()], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + convert_to_geojson(arg_types, args, GeoJsonType::Simple) + } +} + +#[derive(Debug)] +struct STAsGeoJSONWithType {} + +impl SedonaScalarKernel for STAsGeoJSONWithType { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ + ArgMatcher::is_geometry_or_geography(), + ArgMatcher::is_string(), + ], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + // Extract the type parameter + let geojson_type = match &args[1] { + ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(Some(type_str))) => { + GeoJsonType::from_str(type_str.as_str())? + } + ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(None)) => { + GeoJsonType::Simple // Default to Simple if NULL + } + _ => { + return Err(DataFusionError::Execution( + "Second argument to ST_AsGeoJSON must be a string literal".to_string(), + )); + } + }; + + convert_to_geojson(&arg_types[..1], &args[..1], geojson_type) + } +} + +fn convert_to_geojson( + arg_types: &[SedonaType], + args: &[ColumnarValue], + geojson_type: GeoJsonType, +) -> Result { + let executor = WkbExecutor::new(arg_types, args); + + // Estimate the minimum probable memory requirement of the output. + // GeoJSON is typically longer than WKT due to JSON formatting. + // Feature and FeatureCollection add extra wrapping + let base_size = match geojson_type { + GeoJsonType::Simple => 50, + GeoJsonType::Feature => 100, + GeoJsonType::FeatureCollection => 150, + }; + let min_probable_geojson_size = executor.num_iterations() * base_size; + + // Initialize an output builder of the appropriate type + let mut builder = + StringBuilder::with_capacity(executor.num_iterations(), min_probable_geojson_size); + + executor.execute_wkb_void(|maybe_item| { + match maybe_item { + Some(item) => { + // Convert WKB geometry to geo_types::Geometry using geo-traits + let geo_geometry = wkb_to_geometry(item)?; + + match geo_geometry { + Some(geom) => { + // Convert geo_types::Geometry to geojson::Geometry + let geojson_geom: geojson::Geometry = (&geom).try_into() + .map_err(|e| DataFusionError::Execution(format!("Failed to convert to GeoJSON: {:?}", e)))?; + + // Wrap the geometry based on the type parameter and serialize + let geojson_output = match geojson_type { + GeoJsonType::Simple => { + geojson_geom + } + GeoJsonType::Feature => { + let feature = geojson::Feature { + bbox: None, + geometry: Some(geojson_geom), + id: None, + properties: None, + foreign_members: None, + }; + let json_str = serde_json::to_string(&feature) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); + return Ok(()); + } + GeoJsonType::FeatureCollection => { + let feature = geojson::Feature { + bbox: None, + geometry: Some(geojson_geom), + id: None, + properties: None, + foreign_members: None, + }; + let feature_collection = geojson::FeatureCollection { + bbox: None, + features: vec![feature], + foreign_members: None, + }; + let json_str = serde_json::to_string(&feature_collection) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); + return Ok(()); + } + }; + + // For Simple type, serialize the geometry + let json_str = serde_json::to_string(&geojson_output) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); + } + None => { + return Err(DataFusionError::NotImplemented( + "Unsupported geometry type for GeoJSON conversion".to_string(), + )); + } + } + } + None => builder.append_null(), + }; + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) +} + +/// Convert a WKB geometry to geo_types::Geometry, including GeometryCollection support +fn wkb_to_geometry(item: impl GeometryTrait) -> Result> { + let geo_geometry = match item.as_type() { + GeometryType::Point(geom) => geom.try_to_point().map(Geometry::Point), + GeometryType::LineString(geom) => Some(Geometry::LineString(geom.to_line_string())), + GeometryType::Polygon(geom) => Some(Geometry::Polygon(geom.to_polygon())), + GeometryType::MultiPoint(geom) => geom.try_to_multi_point().map(Geometry::MultiPoint), + GeometryType::MultiLineString(geom) => { + Some(Geometry::MultiLineString(geom.to_multi_line_string())) + } + GeometryType::MultiPolygon(geom) => { + Some(Geometry::MultiPolygon(geom.to_multi_polygon())) + } + GeometryType::GeometryCollection(geom) => convert_geometry_collection(geom)?, + _ => None, + }; + Ok(geo_geometry) +} + +/// Convert a GeometryCollection to geo_types::Geometry +/// Handles up to 1 level of nesting to avoid compiler issues with recursion +fn convert_geometry_collection>( + geom: &GC, +) -> Result> { + let geometries: Result> = geom + .geometries() + .map(|child| { + let child_geom = match child.as_type() { + GeometryType::Point(g) => g.try_to_point().map(Geometry::Point), + GeometryType::LineString(g) => Some(Geometry::LineString(g.to_line_string())), + GeometryType::Polygon(g) => Some(Geometry::Polygon(g.to_polygon())), + GeometryType::MultiPoint(g) => g.try_to_multi_point().map(Geometry::MultiPoint), + GeometryType::MultiLineString(g) => { + Some(Geometry::MultiLineString(g.to_multi_line_string())) + } + GeometryType::MultiPolygon(g) => { + Some(Geometry::MultiPolygon(g.to_multi_polygon())) + } + GeometryType::GeometryCollection(g) => { + // Support one level of nested GeometryCollection + return convert_geometry_collection(g)? + .ok_or_else(|| { + DataFusionError::NotImplemented( + "Nested GeometryCollection with unsupported types".to_string(), + ) + }); + } + _ => None, + }; + + child_geom.ok_or_else(|| { + DataFusionError::NotImplemented( + "GeometryCollection contains unsupported geometry types".to_string(), + ) + }) + }) + .collect(); + + let geometries = geometries?; + + Ok(Some(Geometry::GeometryCollection( + geo_types::GeometryCollection(geometries), + ))) +} + +#[cfg(test)] +mod tests { + use datafusion_common::scalar::ScalarValue; + use datafusion_expr::ScalarUDF; + use rstest::rstest; + use sedona_schema::datatypes::{ + WKB_GEOGRAPHY, WKB_GEOMETRY, WKB_VIEW_GEOGRAPHY, + }; + use sedona_testing::{compare::assert_scalar_equal, testers::ScalarUdfTester}; + + use super::*; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = st_asgeojson_udf().into(); + assert_eq!(udf.name(), "st_asgeojson"); + assert!(udf.documentation().is_some()) + } + + #[rstest] + fn udf( + #[values(WKB_GEOMETRY, WKB_GEOGRAPHY, WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOGRAPHY)] + sedona_type: SedonaType, + ) { + let udf = st_asgeojson_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + + // Test with a simple point + let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + // Verify it's valid JSON and contains expected structure + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "Point"); + assert!(parsed["coordinates"].is_array()); + } else { + panic!("Expected Utf8 scalar value"); + } + + // Test with null + assert_scalar_equal( + &tester.invoke_wkb_scalar(None).unwrap(), + &ScalarValue::Utf8(None), + ); + + // Test with array + let result_array = tester + .invoke_wkb_array(vec![Some("POINT(1 2)"), None, Some("POINT(3 5)")]) + .unwrap(); + + // Verify the array has the expected number of elements + assert_eq!((*result_array).len(), 3); + } + + #[rstest] + fn geometry_collection( + #[values(WKB_GEOMETRY, WKB_GEOGRAPHY, WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOGRAPHY)] + sedona_type: SedonaType, + ) { + let udf = st_asgeojson_udf(); + let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); + + // Test with a simple GeometryCollection + let result = tester + .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION(POINT(1 2), LINESTRING(0 0, 1 1))")) + .unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + // Verify it's valid JSON and contains expected structure + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "GeometryCollection"); + assert!(parsed["geometries"].is_array()); + let geometries = parsed["geometries"].as_array().unwrap(); + assert_eq!(geometries.len(), 2); + assert_eq!(geometries[0]["type"], "Point"); + assert_eq!(geometries[1]["type"], "LineString"); + } else { + panic!("Expected Utf8 scalar value"); + } + + // Test with empty GeometryCollection + let result = tester + .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION EMPTY")) + .unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "GeometryCollection"); + let geometries = parsed["geometries"].as_array().unwrap(); + assert_eq!(geometries.len(), 0); + } else { + panic!("Expected Utf8 scalar value"); + } + } + + #[rstest] + fn geojson_type_parameter( + #[values(WKB_GEOMETRY, WKB_GEOGRAPHY)] + sedona_type: SedonaType, + ) { + // Test Simple type (default) + let tester = ScalarUdfTester::new(st_asgeojson_udf().into(), vec![sedona_type.clone()]); + let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "Point"); + assert!(parsed.get("geometry").is_none()); // No wrapping + } else { + panic!("Expected Utf8 scalar value"); + } + + // Test Feature type + let tester_with_type = ScalarUdfTester::new( + st_asgeojson_udf().into(), + vec![sedona_type, SedonaType::Arrow(DataType::Utf8)], + ); + let result = tester_with_type + .invoke_scalar_scalar("POINT (1 2)", "Feature") + .unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "Feature"); + assert!(parsed["geometry"].is_object()); + assert_eq!(parsed["geometry"]["type"], "Point"); + assert!(parsed["geometry"]["coordinates"].is_array()); + } else { + panic!("Expected Utf8 scalar value"); + } + + // Test FeatureCollection type + let result = tester_with_type + .invoke_scalar_scalar("POINT (1 2)", "FeatureCollection") + .unwrap(); + if let ScalarValue::Utf8(Some(json_str)) = result { + let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); + assert_eq!(parsed["type"], "FeatureCollection"); + assert!(parsed["features"].is_array()); + let features = parsed["features"].as_array().unwrap(); + assert_eq!(features.len(), 1); + assert_eq!(features[0]["type"], "Feature"); + assert_eq!(features[0]["geometry"]["type"], "Point"); + } else { + panic!("Expected Utf8 scalar value"); + } + } + + #[test] + fn invalid_geojson_type() { + let udf = st_asgeojson_udf(); + let tester = ScalarUdfTester::new( + udf.into(), + vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Utf8)], + ); + + // Test with invalid type + let result = tester.invoke_scalar_scalar("POINT (1 2)", "InvalidType"); + + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!(err_msg.contains("Invalid GeoJSON type")); + } +} From dfba3d6dd409809db037e2d72b20b59347b0fad4 Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Sun, 28 Dec 2025 10:26:27 -0800 Subject: [PATCH 02/11] clippy fix --- rust/sedona-functions/src/st_asgeojson.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rust/sedona-functions/src/st_asgeojson.rs b/rust/sedona-functions/src/st_asgeojson.rs index 6c36fa98a..d1dc4d22b 100644 --- a/rust/sedona-functions/src/st_asgeojson.rs +++ b/rust/sedona-functions/src/st_asgeojson.rs @@ -58,7 +58,7 @@ impl GeoJsonType { /// /// An implementation of GeoJSON writing using the geojson crate. pub fn st_asgeojson_udf() -> SedonaScalarUDF { - let udf = SedonaScalarUDF::new( + SedonaScalarUDF::new( "st_asgeojson", vec![ Arc::new(STAsGeoJSON {}), @@ -66,8 +66,7 @@ pub fn st_asgeojson_udf() -> SedonaScalarUDF { ], Volatility::Immutable, Some(st_asgeojson_doc()), - ); - udf + ) } fn st_asgeojson_doc() -> Documentation { From 180047bc0ff05b413008926df6931b29ac6f62f7 Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Sun, 28 Dec 2025 10:48:39 -0800 Subject: [PATCH 03/11] fmt fix --- rust/sedona-functions/src/lib.rs | 2 +- rust/sedona-functions/src/st_asgeojson.rs | 50 ++++++++++------------- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index fde750736..4fe633c84 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -26,8 +26,8 @@ pub mod sd_order; pub mod st_analyze_agg; mod st_area; mod st_asbinary; -mod st_astext; mod st_asgeojson; +mod st_astext; mod st_azimuth; mod st_buffer; mod st_centroid; diff --git a/rust/sedona-functions/src/st_asgeojson.rs b/rust/sedona-functions/src/st_asgeojson.rs index d1dc4d22b..035622afd 100644 --- a/rust/sedona-functions/src/st_asgeojson.rs +++ b/rust/sedona-functions/src/st_asgeojson.rs @@ -60,10 +60,7 @@ impl GeoJsonType { pub fn st_asgeojson_udf() -> SedonaScalarUDF { SedonaScalarUDF::new( "st_asgeojson", - vec![ - Arc::new(STAsGeoJSON {}), - Arc::new(STAsGeoJSONWithType {}), - ], + vec![Arc::new(STAsGeoJSON {}), Arc::new(STAsGeoJSONWithType {})], Volatility::Immutable, Some(st_asgeojson_doc()), ) @@ -76,7 +73,10 @@ fn st_asgeojson_doc() -> Documentation { "ST_AsGeoJSON (A: Geometry [, type: String])", ) .with_argument("geom", "geometry: Input geometry or geography") - .with_argument("type", "string (optional): Output type - 'Simple' (default), 'Feature', or 'FeatureCollection'") + .with_argument( + "type", + "string (optional): Output type - 'Simple' (default), 'Feature', or 'FeatureCollection'", + ) .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0))") .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'Feature')") .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'FeatureCollection')") @@ -176,14 +176,16 @@ fn convert_to_geojson( match geo_geometry { Some(geom) => { // Convert geo_types::Geometry to geojson::Geometry - let geojson_geom: geojson::Geometry = (&geom).try_into() - .map_err(|e| DataFusionError::Execution(format!("Failed to convert to GeoJSON: {:?}", e)))?; + let geojson_geom: geojson::Geometry = (&geom).try_into().map_err(|e| { + DataFusionError::Execution(format!( + "Failed to convert to GeoJSON: {:?}", + e + )) + })?; // Wrap the geometry based on the type parameter and serialize let geojson_output = match geojson_type { - GeoJsonType::Simple => { - geojson_geom - } + GeoJsonType::Simple => geojson_geom, GeoJsonType::Feature => { let feature = geojson::Feature { bbox: None, @@ -248,9 +250,7 @@ fn wkb_to_geometry(item: impl GeometryTrait) -> Result GeometryType::MultiLineString(geom) => { Some(Geometry::MultiLineString(geom.to_multi_line_string())) } - GeometryType::MultiPolygon(geom) => { - Some(Geometry::MultiPolygon(geom.to_multi_polygon())) - } + GeometryType::MultiPolygon(geom) => Some(Geometry::MultiPolygon(geom.to_multi_polygon())), GeometryType::GeometryCollection(geom) => convert_geometry_collection(geom)?, _ => None, }; @@ -273,17 +273,14 @@ fn convert_geometry_collection>( GeometryType::MultiLineString(g) => { Some(Geometry::MultiLineString(g.to_multi_line_string())) } - GeometryType::MultiPolygon(g) => { - Some(Geometry::MultiPolygon(g.to_multi_polygon())) - } + GeometryType::MultiPolygon(g) => Some(Geometry::MultiPolygon(g.to_multi_polygon())), GeometryType::GeometryCollection(g) => { // Support one level of nested GeometryCollection - return convert_geometry_collection(g)? - .ok_or_else(|| { - DataFusionError::NotImplemented( - "Nested GeometryCollection with unsupported types".to_string(), - ) - }); + return convert_geometry_collection(g)?.ok_or_else(|| { + DataFusionError::NotImplemented( + "Nested GeometryCollection with unsupported types".to_string(), + ) + }); } _ => None, }; @@ -308,9 +305,7 @@ mod tests { use datafusion_common::scalar::ScalarValue; use datafusion_expr::ScalarUDF; use rstest::rstest; - use sedona_schema::datatypes::{ - WKB_GEOGRAPHY, WKB_GEOMETRY, WKB_VIEW_GEOGRAPHY, - }; + use sedona_schema::datatypes::{WKB_GEOGRAPHY, WKB_GEOMETRY, WKB_VIEW_GEOGRAPHY}; use sedona_testing::{compare::assert_scalar_equal, testers::ScalarUdfTester}; use super::*; @@ -396,10 +391,7 @@ mod tests { } #[rstest] - fn geojson_type_parameter( - #[values(WKB_GEOMETRY, WKB_GEOGRAPHY)] - sedona_type: SedonaType, - ) { + fn geojson_type_parameter(#[values(WKB_GEOMETRY, WKB_GEOGRAPHY)] sedona_type: SedonaType) { // Test Simple type (default) let tester = ScalarUdfTester::new(st_asgeojson_udf().into(), vec![sedona_type.clone()]); let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); From 442118422bd1ff84533affcf31c016783a8b1635 Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Mon, 29 Dec 2025 11:58:34 -0800 Subject: [PATCH 04/11] refactor: use existing `GeoTypesExecutor`for iterating through geometry types --- rust/sedona-functions/Cargo.toml | 2 - rust/sedona-functions/src/st_asgeojson.rs | 409 +--------------------- rust/sedona-geo/Cargo.toml | 2 + rust/sedona-geo/src/lib.rs | 1 + rust/sedona-geo/src/register.rs | 19 +- rust/sedona-geo/src/st_asgeojson.rs | 301 ++++++++++++++++ 6 files changed, 328 insertions(+), 406 deletions(-) create mode 100644 rust/sedona-geo/src/st_asgeojson.rs diff --git a/rust/sedona-functions/Cargo.toml b/rust/sedona-functions/Cargo.toml index 72972c9e7..9fb7f35f4 100644 --- a/rust/sedona-functions/Cargo.toml +++ b/rust/sedona-functions/Cargo.toml @@ -45,8 +45,6 @@ arrow-buffer = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } geo-traits = { workspace = true } -geo-types = { workspace = true } -geojson = { workspace = true } sedona-common = { workspace = true } sedona-expr = { workspace = true } sedona-geometry = { workspace = true } diff --git a/rust/sedona-functions/src/st_asgeojson.rs b/rust/sedona-functions/src/st_asgeojson.rs index 035622afd..d277c444f 100644 --- a/rust/sedona-functions/src/st_asgeojson.rs +++ b/rust/sedona-functions/src/st_asgeojson.rs @@ -14,53 +14,21 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -use std::sync::Arc; - -use crate::executor::WkbExecutor; -use arrow_array::builder::StringBuilder; use arrow_schema::DataType; -use datafusion_common::error::{DataFusionError, Result}; -use datafusion_expr::{ - scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, Volatility, -}; -use geo_traits::to_geo::{ - ToGeoLineString, ToGeoMultiLineString, ToGeoMultiPoint, ToGeoMultiPolygon, ToGeoPoint, - ToGeoPolygon, -}; -use geo_traits::{GeometryCollectionTrait, GeometryTrait, GeometryType}; -use geo_types::Geometry; -use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility}; +use sedona_expr::scalar_udf::SedonaScalarUDF; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; -/// Output format type for GeoJSON -#[derive(Debug, Clone, Copy, PartialEq)] -enum GeoJsonType { - Simple, - Feature, - FeatureCollection, -} - -impl GeoJsonType { - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "simple" => Ok(GeoJsonType::Simple), - "feature" => Ok(GeoJsonType::Feature), - "featurecollection" => Ok(GeoJsonType::FeatureCollection), - _ => Err(DataFusionError::Execution(format!( - "Invalid GeoJSON type '{}'. Valid options are: 'Simple', 'Feature', 'FeatureCollection'", - s - ))), - } - } -} - /// ST_AsGeoJSON() scalar UDF implementation /// -/// An implementation of GeoJSON writing using the geojson crate. +/// Stub function for GeoJSON conversion. pub fn st_asgeojson_udf() -> SedonaScalarUDF { - SedonaScalarUDF::new( + SedonaScalarUDF::new_stub( "st_asgeojson", - vec![Arc::new(STAsGeoJSON {}), Arc::new(STAsGeoJSONWithType {})], + ArgMatcher::new( + vec![ArgMatcher::is_geometry_or_geography()], + SedonaType::Arrow(DataType::Utf8), + ), Volatility::Immutable, Some(st_asgeojson_doc()), ) @@ -69,10 +37,10 @@ pub fn st_asgeojson_udf() -> SedonaScalarUDF { fn st_asgeojson_doc() -> Documentation { Documentation::builder( DOC_SECTION_OTHER, - "Return the GeoJSON representation of a geometry or geography", + "Return the GeoJSON representation of a geometry", "ST_AsGeoJSON (A: Geometry [, type: String])", ) - .with_argument("geom", "geometry: Input geometry or geography") + .with_argument("geom", "geometry: Input geometry") .with_argument( "type", "string (optional): Output type - 'Simple' (default), 'Feature', or 'FeatureCollection'", @@ -84,229 +52,9 @@ fn st_asgeojson_doc() -> Documentation { .build() } -#[derive(Debug)] -struct STAsGeoJSON {} - -impl SedonaScalarKernel for STAsGeoJSON { - fn return_type(&self, args: &[SedonaType]) -> Result> { - let matcher = ArgMatcher::new( - vec![ArgMatcher::is_geometry_or_geography()], - SedonaType::Arrow(DataType::Utf8), - ); - - matcher.match_args(args) - } - - fn invoke_batch( - &self, - arg_types: &[SedonaType], - args: &[ColumnarValue], - ) -> Result { - convert_to_geojson(arg_types, args, GeoJsonType::Simple) - } -} - -#[derive(Debug)] -struct STAsGeoJSONWithType {} - -impl SedonaScalarKernel for STAsGeoJSONWithType { - fn return_type(&self, args: &[SedonaType]) -> Result> { - let matcher = ArgMatcher::new( - vec![ - ArgMatcher::is_geometry_or_geography(), - ArgMatcher::is_string(), - ], - SedonaType::Arrow(DataType::Utf8), - ); - - matcher.match_args(args) - } - - fn invoke_batch( - &self, - arg_types: &[SedonaType], - args: &[ColumnarValue], - ) -> Result { - // Extract the type parameter - let geojson_type = match &args[1] { - ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(Some(type_str))) => { - GeoJsonType::from_str(type_str.as_str())? - } - ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(None)) => { - GeoJsonType::Simple // Default to Simple if NULL - } - _ => { - return Err(DataFusionError::Execution( - "Second argument to ST_AsGeoJSON must be a string literal".to_string(), - )); - } - }; - - convert_to_geojson(&arg_types[..1], &args[..1], geojson_type) - } -} - -fn convert_to_geojson( - arg_types: &[SedonaType], - args: &[ColumnarValue], - geojson_type: GeoJsonType, -) -> Result { - let executor = WkbExecutor::new(arg_types, args); - - // Estimate the minimum probable memory requirement of the output. - // GeoJSON is typically longer than WKT due to JSON formatting. - // Feature and FeatureCollection add extra wrapping - let base_size = match geojson_type { - GeoJsonType::Simple => 50, - GeoJsonType::Feature => 100, - GeoJsonType::FeatureCollection => 150, - }; - let min_probable_geojson_size = executor.num_iterations() * base_size; - - // Initialize an output builder of the appropriate type - let mut builder = - StringBuilder::with_capacity(executor.num_iterations(), min_probable_geojson_size); - - executor.execute_wkb_void(|maybe_item| { - match maybe_item { - Some(item) => { - // Convert WKB geometry to geo_types::Geometry using geo-traits - let geo_geometry = wkb_to_geometry(item)?; - - match geo_geometry { - Some(geom) => { - // Convert geo_types::Geometry to geojson::Geometry - let geojson_geom: geojson::Geometry = (&geom).try_into().map_err(|e| { - DataFusionError::Execution(format!( - "Failed to convert to GeoJSON: {:?}", - e - )) - })?; - - // Wrap the geometry based on the type parameter and serialize - let geojson_output = match geojson_type { - GeoJsonType::Simple => geojson_geom, - GeoJsonType::Feature => { - let feature = geojson::Feature { - bbox: None, - geometry: Some(geojson_geom), - id: None, - properties: None, - foreign_members: None, - }; - let json_str = serde_json::to_string(&feature) - .map_err(|err| DataFusionError::External(Box::new(err)))?; - builder.append_value(&json_str); - return Ok(()); - } - GeoJsonType::FeatureCollection => { - let feature = geojson::Feature { - bbox: None, - geometry: Some(geojson_geom), - id: None, - properties: None, - foreign_members: None, - }; - let feature_collection = geojson::FeatureCollection { - bbox: None, - features: vec![feature], - foreign_members: None, - }; - let json_str = serde_json::to_string(&feature_collection) - .map_err(|err| DataFusionError::External(Box::new(err)))?; - builder.append_value(&json_str); - return Ok(()); - } - }; - - // For Simple type, serialize the geometry - let json_str = serde_json::to_string(&geojson_output) - .map_err(|err| DataFusionError::External(Box::new(err)))?; - builder.append_value(&json_str); - } - None => { - return Err(DataFusionError::NotImplemented( - "Unsupported geometry type for GeoJSON conversion".to_string(), - )); - } - } - } - None => builder.append_null(), - }; - - Ok(()) - })?; - - executor.finish(Arc::new(builder.finish())) -} - -/// Convert a WKB geometry to geo_types::Geometry, including GeometryCollection support -fn wkb_to_geometry(item: impl GeometryTrait) -> Result> { - let geo_geometry = match item.as_type() { - GeometryType::Point(geom) => geom.try_to_point().map(Geometry::Point), - GeometryType::LineString(geom) => Some(Geometry::LineString(geom.to_line_string())), - GeometryType::Polygon(geom) => Some(Geometry::Polygon(geom.to_polygon())), - GeometryType::MultiPoint(geom) => geom.try_to_multi_point().map(Geometry::MultiPoint), - GeometryType::MultiLineString(geom) => { - Some(Geometry::MultiLineString(geom.to_multi_line_string())) - } - GeometryType::MultiPolygon(geom) => Some(Geometry::MultiPolygon(geom.to_multi_polygon())), - GeometryType::GeometryCollection(geom) => convert_geometry_collection(geom)?, - _ => None, - }; - Ok(geo_geometry) -} - -/// Convert a GeometryCollection to geo_types::Geometry -/// Handles up to 1 level of nesting to avoid compiler issues with recursion -fn convert_geometry_collection>( - geom: &GC, -) -> Result> { - let geometries: Result> = geom - .geometries() - .map(|child| { - let child_geom = match child.as_type() { - GeometryType::Point(g) => g.try_to_point().map(Geometry::Point), - GeometryType::LineString(g) => Some(Geometry::LineString(g.to_line_string())), - GeometryType::Polygon(g) => Some(Geometry::Polygon(g.to_polygon())), - GeometryType::MultiPoint(g) => g.try_to_multi_point().map(Geometry::MultiPoint), - GeometryType::MultiLineString(g) => { - Some(Geometry::MultiLineString(g.to_multi_line_string())) - } - GeometryType::MultiPolygon(g) => Some(Geometry::MultiPolygon(g.to_multi_polygon())), - GeometryType::GeometryCollection(g) => { - // Support one level of nested GeometryCollection - return convert_geometry_collection(g)?.ok_or_else(|| { - DataFusionError::NotImplemented( - "Nested GeometryCollection with unsupported types".to_string(), - ) - }); - } - _ => None, - }; - - child_geom.ok_or_else(|| { - DataFusionError::NotImplemented( - "GeometryCollection contains unsupported geometry types".to_string(), - ) - }) - }) - .collect(); - - let geometries = geometries?; - - Ok(Some(Geometry::GeometryCollection( - geo_types::GeometryCollection(geometries), - ))) -} - #[cfg(test)] mod tests { - use datafusion_common::scalar::ScalarValue; use datafusion_expr::ScalarUDF; - use rstest::rstest; - use sedona_schema::datatypes::{WKB_GEOGRAPHY, WKB_GEOMETRY, WKB_VIEW_GEOGRAPHY}; - use sedona_testing::{compare::assert_scalar_equal, testers::ScalarUdfTester}; use super::*; @@ -316,141 +64,4 @@ mod tests { assert_eq!(udf.name(), "st_asgeojson"); assert!(udf.documentation().is_some()) } - - #[rstest] - fn udf( - #[values(WKB_GEOMETRY, WKB_GEOGRAPHY, WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOGRAPHY)] - sedona_type: SedonaType, - ) { - let udf = st_asgeojson_udf(); - let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); - - // Test with a simple point - let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); - if let ScalarValue::Utf8(Some(json_str)) = result { - // Verify it's valid JSON and contains expected structure - let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); - assert_eq!(parsed["type"], "Point"); - assert!(parsed["coordinates"].is_array()); - } else { - panic!("Expected Utf8 scalar value"); - } - - // Test with null - assert_scalar_equal( - &tester.invoke_wkb_scalar(None).unwrap(), - &ScalarValue::Utf8(None), - ); - - // Test with array - let result_array = tester - .invoke_wkb_array(vec![Some("POINT(1 2)"), None, Some("POINT(3 5)")]) - .unwrap(); - - // Verify the array has the expected number of elements - assert_eq!((*result_array).len(), 3); - } - - #[rstest] - fn geometry_collection( - #[values(WKB_GEOMETRY, WKB_GEOGRAPHY, WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOGRAPHY)] - sedona_type: SedonaType, - ) { - let udf = st_asgeojson_udf(); - let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]); - - // Test with a simple GeometryCollection - let result = tester - .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION(POINT(1 2), LINESTRING(0 0, 1 1))")) - .unwrap(); - if let ScalarValue::Utf8(Some(json_str)) = result { - // Verify it's valid JSON and contains expected structure - let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); - assert_eq!(parsed["type"], "GeometryCollection"); - assert!(parsed["geometries"].is_array()); - let geometries = parsed["geometries"].as_array().unwrap(); - assert_eq!(geometries.len(), 2); - assert_eq!(geometries[0]["type"], "Point"); - assert_eq!(geometries[1]["type"], "LineString"); - } else { - panic!("Expected Utf8 scalar value"); - } - - // Test with empty GeometryCollection - let result = tester - .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION EMPTY")) - .unwrap(); - if let ScalarValue::Utf8(Some(json_str)) = result { - let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); - assert_eq!(parsed["type"], "GeometryCollection"); - let geometries = parsed["geometries"].as_array().unwrap(); - assert_eq!(geometries.len(), 0); - } else { - panic!("Expected Utf8 scalar value"); - } - } - - #[rstest] - fn geojson_type_parameter(#[values(WKB_GEOMETRY, WKB_GEOGRAPHY)] sedona_type: SedonaType) { - // Test Simple type (default) - let tester = ScalarUdfTester::new(st_asgeojson_udf().into(), vec![sedona_type.clone()]); - let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); - if let ScalarValue::Utf8(Some(json_str)) = result { - let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); - assert_eq!(parsed["type"], "Point"); - assert!(parsed.get("geometry").is_none()); // No wrapping - } else { - panic!("Expected Utf8 scalar value"); - } - - // Test Feature type - let tester_with_type = ScalarUdfTester::new( - st_asgeojson_udf().into(), - vec![sedona_type, SedonaType::Arrow(DataType::Utf8)], - ); - let result = tester_with_type - .invoke_scalar_scalar("POINT (1 2)", "Feature") - .unwrap(); - if let ScalarValue::Utf8(Some(json_str)) = result { - let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); - assert_eq!(parsed["type"], "Feature"); - assert!(parsed["geometry"].is_object()); - assert_eq!(parsed["geometry"]["type"], "Point"); - assert!(parsed["geometry"]["coordinates"].is_array()); - } else { - panic!("Expected Utf8 scalar value"); - } - - // Test FeatureCollection type - let result = tester_with_type - .invoke_scalar_scalar("POINT (1 2)", "FeatureCollection") - .unwrap(); - if let ScalarValue::Utf8(Some(json_str)) = result { - let parsed: serde_json::Value = serde_json::from_str(json_str.as_str()).unwrap(); - assert_eq!(parsed["type"], "FeatureCollection"); - assert!(parsed["features"].is_array()); - let features = parsed["features"].as_array().unwrap(); - assert_eq!(features.len(), 1); - assert_eq!(features[0]["type"], "Feature"); - assert_eq!(features[0]["geometry"]["type"], "Point"); - } else { - panic!("Expected Utf8 scalar value"); - } - } - - #[test] - fn invalid_geojson_type() { - let udf = st_asgeojson_udf(); - let tester = ScalarUdfTester::new( - udf.into(), - vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Utf8)], - ); - - // Test with invalid type - let result = tester.invoke_scalar_scalar("POINT (1 2)", "InvalidType"); - - assert!(result.is_err()); - let err_msg = result.unwrap_err().to_string(); - assert!(err_msg.contains("Invalid GeoJSON type")); - } } diff --git a/rust/sedona-geo/Cargo.toml b/rust/sedona-geo/Cargo.toml index c09400102..bd20a30b7 100644 --- a/rust/sedona-geo/Cargo.toml +++ b/rust/sedona-geo/Cargo.toml @@ -47,6 +47,8 @@ sedona-geo-generic-alg = { workspace = true } geo-traits = { workspace = true, features = ["geo-types"] } geo-types = { workspace = true } geo = { workspace = true } +geojson = { workspace = true } +serde_json = { workspace = true } sedona-expr = { workspace = true } sedona-functions = { workspace = true } sedona-geometry = { workspace = true } diff --git a/rust/sedona-geo/src/lib.rs b/rust/sedona-geo/src/lib.rs index 52182beb5..949c4fc40 100644 --- a/rust/sedona-geo/src/lib.rs +++ b/rust/sedona-geo/src/lib.rs @@ -17,6 +17,7 @@ pub mod centroid; pub mod register; mod st_area; +mod st_asgeojson; mod st_buffer; mod st_centroid; pub mod st_concavehull; diff --git a/rust/sedona-geo/src/register.rs b/rust/sedona-geo/src/register.rs index 10c371cc9..fe6086748 100644 --- a/rust/sedona-geo/src/register.rs +++ b/rust/sedona-geo/src/register.rs @@ -18,17 +18,26 @@ use sedona_expr::aggregate_udf::SedonaAccumulatorRef; use sedona_expr::scalar_udf::ScalarKernelRef; use crate::{ - st_area::st_area_impl, st_buffer::st_buffer_impl, st_centroid::st_centroid_impl, - st_distance::st_distance_impl, st_dwithin::st_dwithin_impl, - st_intersection_agg::st_intersection_agg_impl, st_intersects::st_intersects_impl, - st_length::st_length_impl, st_line_interpolate_point::st_line_interpolate_point_impl, - st_perimeter::st_perimeter_impl, st_union_agg::st_union_agg_impl, + st_area::st_area_impl, + st_asgeojson::{st_asgeojson_impl, st_asgeojson_with_type_impl, GeoJsonType}, + st_buffer::st_buffer_impl, + st_centroid::st_centroid_impl, + st_distance::st_distance_impl, + st_dwithin::st_dwithin_impl, + st_intersection_agg::st_intersection_agg_impl, + st_intersects::st_intersects_impl, + st_length::st_length_impl, + st_line_interpolate_point::st_line_interpolate_point_impl, + st_perimeter::st_perimeter_impl, + st_union_agg::st_union_agg_impl, }; pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { vec![ ("st_intersects", st_intersects_impl()), ("st_area", st_area_impl()), + ("st_asgeojson", st_asgeojson_impl(GeoJsonType::Simple)), + ("st_asgeojson", st_asgeojson_with_type_impl()), ("st_buffer", st_buffer_impl()), ("st_centroid", st_centroid_impl()), ("st_distance", st_distance_impl()), diff --git a/rust/sedona-geo/src/st_asgeojson.rs b/rust/sedona-geo/src/st_asgeojson.rs new file mode 100644 index 000000000..0c51c7404 --- /dev/null +++ b/rust/sedona-geo/src/st_asgeojson.rs @@ -0,0 +1,301 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::sync::Arc; + +use crate::to_geo::GeoTypesExecutor; +use arrow_array::builder::StringBuilder; +use arrow_schema::DataType; +use datafusion_common::error::{DataFusionError, Result}; +use datafusion_common::exec_err; +use datafusion_expr::ColumnarValue; +use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// Output format type for GeoJSON +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum GeoJsonType { + Simple, + Feature, + FeatureCollection, +} + +impl GeoJsonType { + pub fn from_geojson_type_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "simple" => Ok(GeoJsonType::Simple), + "feature" => Ok(GeoJsonType::Feature), + "featurecollection" => Ok(GeoJsonType::FeatureCollection), + _ => exec_err!( + "Invalid GeoJSON type '{}'. Valid options are: 'Simple', 'Feature', 'FeatureCollection'", + s + ), + } + } +} + +/// ST_AsGeoJSON() kernel implementation using GeoTypesExecutor +pub fn st_asgeojson_impl(geojson_type: GeoJsonType) -> ScalarKernelRef { + Arc::new(STAsGeoJSON { geojson_type }) +} + +/// ST_AsGeoJSON() kernel implementation with dynamic type parameter +pub fn st_asgeojson_with_type_impl() -> ScalarKernelRef { + Arc::new(STAsGeoJSONWithType {}) +} + +#[derive(Debug)] +struct STAsGeoJSON { + geojson_type: GeoJsonType, +} + +impl SedonaScalarKernel for STAsGeoJSON { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry()], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = GeoTypesExecutor::new(arg_types, args); + + // Estimate the minimum probable memory requirement of the output. + // GeoJSON is typically longer than WKT due to JSON formatting. + // Feature and FeatureCollection add extra wrapping + let base_size = match self.geojson_type { + GeoJsonType::Simple => 50, + GeoJsonType::Feature => 100, + GeoJsonType::FeatureCollection => 150, + }; + let min_probable_geojson_size = executor.num_iterations() * base_size; + + // Initialize an output builder of the appropriate type + let mut builder = + StringBuilder::with_capacity(executor.num_iterations(), min_probable_geojson_size); + + executor.execute_wkb_void(|maybe_geom| { + match maybe_geom { + Some(geom) => { + // Convert geo_types::Geometry to geojson::Geometry + let geojson_geom: geojson::Geometry = (&geom).into(); + + // Wrap the geometry based on the type parameter and serialize + match self.geojson_type { + GeoJsonType::Simple => { + let json_str = serde_json::to_string(&geojson_geom) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); + } + GeoJsonType::Feature => { + let feature = geojson::Feature { + bbox: None, + geometry: Some(geojson_geom), + id: None, + properties: None, + foreign_members: None, + }; + let json_str = serde_json::to_string(&feature) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); + } + GeoJsonType::FeatureCollection => { + let feature = geojson::Feature { + bbox: None, + geometry: Some(geojson_geom), + id: None, + properties: None, + foreign_members: None, + }; + let feature_collection = geojson::FeatureCollection { + bbox: None, + features: vec![feature], + foreign_members: None, + }; + let json_str = serde_json::to_string(&feature_collection) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); + } + } + } + None => builder.append_null(), + }; + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +#[derive(Debug)] +struct STAsGeoJSONWithType {} + +impl SedonaScalarKernel for STAsGeoJSONWithType { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry(), ArgMatcher::is_string()], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + // Extract the type parameter + let geojson_type = match &args[1] { + ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(Some(type_str))) => { + GeoJsonType::from_geojson_type_str(type_str.as_str())? + } + ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(None)) => { + GeoJsonType::Simple // Default to Simple if NULL + } + _ => { + return exec_err!("Second argument to ST_AsGeoJSON must be a string literal"); + } + }; + + // Delegate to the appropriate kernel based on the type parameter + let kernel = st_asgeojson_impl(geojson_type); + kernel.invoke_batch(&arg_types[..1], &args[..1]) + } +} + +#[cfg(test)] +mod tests { + use datafusion_common::scalar::ScalarValue; + use sedona_expr::scalar_udf::SedonaScalarUDF; + use sedona_schema::datatypes::WKB_GEOMETRY; + use sedona_testing::testers::ScalarUdfTester; + + use super::*; + + #[test] + fn test_simple_geojson() { + let kernel = st_asgeojson_impl(GeoJsonType::Simple); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + // Test with a simple point + let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + tester.assert_scalar_result_equals(result, r#"{"type":"Point","coordinates":[1.0,2.0]}"#); + + // Test with null + let result = tester.invoke_wkb_scalar(None).unwrap(); + assert_eq!(result, ScalarValue::Utf8(None)); + } + + #[test] + fn test_feature_geojson() { + let kernel = st_asgeojson_impl(GeoJsonType::Feature); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + tester.assert_scalar_result_equals( + result, + r#"{"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":null}"#, + ); + } + + #[test] + fn test_feature_collection_geojson() { + let kernel = st_asgeojson_impl(GeoJsonType::FeatureCollection); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + tester.assert_scalar_result_equals( + result, + r#"{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":null}]}"#, + ); + } + + #[test] + fn test_geometry_collection() { + let kernel = st_asgeojson_impl(GeoJsonType::Simple); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester + .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION(POINT(1 2), LINESTRING(0 0, 1 1))")) + .unwrap(); + tester.assert_scalar_result_equals( + result, + r#"{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[1.0,2.0]},{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0]]}]}"#, + ); + } + + #[test] + fn test_with_type_parameter() { + let kernel = st_asgeojson_with_type_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new( + udf.into(), + vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Utf8)], + ); + + // Test with 'Simple' type + let result = tester + .invoke_scalar_scalar("POINT (1 2)", "Simple") + .unwrap(); + tester.assert_scalar_result_equals(result, r#"{"type":"Point","coordinates":[1.0,2.0]}"#); + + // Test with 'Feature' type + let result = tester + .invoke_scalar_scalar("POINT (1 2)", "Feature") + .unwrap(); + tester.assert_scalar_result_equals( + result, + r#"{"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":null}"#, + ); + + // Test with 'FeatureCollection' type + let result = tester + .invoke_scalar_scalar("POINT (1 2)", "FeatureCollection") + .unwrap(); + tester.assert_scalar_result_equals( + result, + r#"{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":null}]}"#, + ); + } + + #[test] + fn test_invalid_type_string() { + let kernel = st_asgeojson_with_type_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new( + udf.into(), + vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Utf8)], + ); + + // Test with invalid type string + let result = tester.invoke_scalar_scalar("POINT (1 2)", "InvalidType"); + assert!(result.is_err()); + let err_msg = result.unwrap_err().to_string(); + assert!(err_msg.contains("Invalid GeoJSON type")); + } +} From 9294a770106cb4d72a0030d7e69ab680d3c1783c Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Mon, 29 Dec 2025 13:26:25 -0800 Subject: [PATCH 05/11] remove output type parameter --- rust/sedona-geo/src/register.rs | 20 +-- rust/sedona-geo/src/st_asgeojson.rs | 196 ++++------------------------ 2 files changed, 28 insertions(+), 188 deletions(-) diff --git a/rust/sedona-geo/src/register.rs b/rust/sedona-geo/src/register.rs index fe6086748..d7dde2114 100644 --- a/rust/sedona-geo/src/register.rs +++ b/rust/sedona-geo/src/register.rs @@ -18,26 +18,18 @@ use sedona_expr::aggregate_udf::SedonaAccumulatorRef; use sedona_expr::scalar_udf::ScalarKernelRef; use crate::{ - st_area::st_area_impl, - st_asgeojson::{st_asgeojson_impl, st_asgeojson_with_type_impl, GeoJsonType}, - st_buffer::st_buffer_impl, - st_centroid::st_centroid_impl, - st_distance::st_distance_impl, - st_dwithin::st_dwithin_impl, - st_intersection_agg::st_intersection_agg_impl, - st_intersects::st_intersects_impl, - st_length::st_length_impl, - st_line_interpolate_point::st_line_interpolate_point_impl, - st_perimeter::st_perimeter_impl, - st_union_agg::st_union_agg_impl, + st_area::st_area_impl, st_asgeojson::st_asgeojson_impl, st_buffer::st_buffer_impl, + st_centroid::st_centroid_impl, st_distance::st_distance_impl, st_dwithin::st_dwithin_impl, + st_intersection_agg::st_intersection_agg_impl, st_intersects::st_intersects_impl, + st_length::st_length_impl, st_line_interpolate_point::st_line_interpolate_point_impl, + st_perimeter::st_perimeter_impl, st_union_agg::st_union_agg_impl, }; pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { vec![ ("st_intersects", st_intersects_impl()), ("st_area", st_area_impl()), - ("st_asgeojson", st_asgeojson_impl(GeoJsonType::Simple)), - ("st_asgeojson", st_asgeojson_with_type_impl()), + ("st_asgeojson", st_asgeojson_impl()), ("st_buffer", st_buffer_impl()), ("st_centroid", st_centroid_impl()), ("st_distance", st_distance_impl()), diff --git a/rust/sedona-geo/src/st_asgeojson.rs b/rust/sedona-geo/src/st_asgeojson.rs index 0c51c7404..5d7f93229 100644 --- a/rust/sedona-geo/src/st_asgeojson.rs +++ b/rust/sedona-geo/src/st_asgeojson.rs @@ -20,47 +20,17 @@ use crate::to_geo::GeoTypesExecutor; use arrow_array::builder::StringBuilder; use arrow_schema::DataType; use datafusion_common::error::{DataFusionError, Result}; -use datafusion_common::exec_err; use datafusion_expr::ColumnarValue; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; -/// Output format type for GeoJSON -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum GeoJsonType { - Simple, - Feature, - FeatureCollection, -} - -impl GeoJsonType { - pub fn from_geojson_type_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "simple" => Ok(GeoJsonType::Simple), - "feature" => Ok(GeoJsonType::Feature), - "featurecollection" => Ok(GeoJsonType::FeatureCollection), - _ => exec_err!( - "Invalid GeoJSON type '{}'. Valid options are: 'Simple', 'Feature', 'FeatureCollection'", - s - ), - } - } -} - /// ST_AsGeoJSON() kernel implementation using GeoTypesExecutor -pub fn st_asgeojson_impl(geojson_type: GeoJsonType) -> ScalarKernelRef { - Arc::new(STAsGeoJSON { geojson_type }) -} - -/// ST_AsGeoJSON() kernel implementation with dynamic type parameter -pub fn st_asgeojson_with_type_impl() -> ScalarKernelRef { - Arc::new(STAsGeoJSONWithType {}) +pub fn st_asgeojson_impl() -> ScalarKernelRef { + Arc::new(STAsGeoJSON {}) } #[derive(Debug)] -struct STAsGeoJSON { - geojson_type: GeoJsonType, -} +struct STAsGeoJSON {} impl SedonaScalarKernel for STAsGeoJSON { fn return_type(&self, args: &[SedonaType]) -> Result> { @@ -81,13 +51,7 @@ impl SedonaScalarKernel for STAsGeoJSON { // Estimate the minimum probable memory requirement of the output. // GeoJSON is typically longer than WKT due to JSON formatting. - // Feature and FeatureCollection add extra wrapping - let base_size = match self.geojson_type { - GeoJsonType::Simple => 50, - GeoJsonType::Feature => 100, - GeoJsonType::FeatureCollection => 150, - }; - let min_probable_geojson_size = executor.num_iterations() * base_size; + let min_probable_geojson_size = executor.num_iterations() * 50; // Initialize an output builder of the appropriate type let mut builder = @@ -99,43 +63,10 @@ impl SedonaScalarKernel for STAsGeoJSON { // Convert geo_types::Geometry to geojson::Geometry let geojson_geom: geojson::Geometry = (&geom).into(); - // Wrap the geometry based on the type parameter and serialize - match self.geojson_type { - GeoJsonType::Simple => { - let json_str = serde_json::to_string(&geojson_geom) - .map_err(|err| DataFusionError::External(Box::new(err)))?; - builder.append_value(&json_str); - } - GeoJsonType::Feature => { - let feature = geojson::Feature { - bbox: None, - geometry: Some(geojson_geom), - id: None, - properties: None, - foreign_members: None, - }; - let json_str = serde_json::to_string(&feature) - .map_err(|err| DataFusionError::External(Box::new(err)))?; - builder.append_value(&json_str); - } - GeoJsonType::FeatureCollection => { - let feature = geojson::Feature { - bbox: None, - geometry: Some(geojson_geom), - id: None, - properties: None, - foreign_members: None, - }; - let feature_collection = geojson::FeatureCollection { - bbox: None, - features: vec![feature], - foreign_members: None, - }; - let json_str = serde_json::to_string(&feature_collection) - .map_err(|err| DataFusionError::External(Box::new(err)))?; - builder.append_value(&json_str); - } - } + // Serialize to JSON string + let json_str = serde_json::to_string(&geojson_geom) + .map_err(|err| DataFusionError::External(Box::new(err)))?; + builder.append_value(&json_str); } None => builder.append_null(), }; @@ -147,43 +78,6 @@ impl SedonaScalarKernel for STAsGeoJSON { } } -#[derive(Debug)] -struct STAsGeoJSONWithType {} - -impl SedonaScalarKernel for STAsGeoJSONWithType { - fn return_type(&self, args: &[SedonaType]) -> Result> { - let matcher = ArgMatcher::new( - vec![ArgMatcher::is_geometry(), ArgMatcher::is_string()], - SedonaType::Arrow(DataType::Utf8), - ); - - matcher.match_args(args) - } - - fn invoke_batch( - &self, - arg_types: &[SedonaType], - args: &[ColumnarValue], - ) -> Result { - // Extract the type parameter - let geojson_type = match &args[1] { - ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(Some(type_str))) => { - GeoJsonType::from_geojson_type_str(type_str.as_str())? - } - ColumnarValue::Scalar(datafusion_common::ScalarValue::Utf8(None)) => { - GeoJsonType::Simple // Default to Simple if NULL - } - _ => { - return exec_err!("Second argument to ST_AsGeoJSON must be a string literal"); - } - }; - - // Delegate to the appropriate kernel based on the type parameter - let kernel = st_asgeojson_impl(geojson_type); - kernel.invoke_batch(&arg_types[..1], &args[..1]) - } -} - #[cfg(test)] mod tests { use datafusion_common::scalar::ScalarValue; @@ -195,7 +89,7 @@ mod tests { #[test] fn test_simple_geojson() { - let kernel = st_asgeojson_impl(GeoJsonType::Simple); + let kernel = st_asgeojson_impl(); let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); @@ -209,34 +103,38 @@ mod tests { } #[test] - fn test_feature_geojson() { - let kernel = st_asgeojson_impl(GeoJsonType::Feature); + fn test_linestring() { + let kernel = st_asgeojson_impl(); let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); - let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + let result = tester + .invoke_wkb_scalar(Some("LINESTRING (0 0, 1 1, 2 2)")) + .unwrap(); tester.assert_scalar_result_equals( result, - r#"{"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":null}"#, + r#"{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0],[2.0,2.0]]}"#, ); } #[test] - fn test_feature_collection_geojson() { - let kernel = st_asgeojson_impl(GeoJsonType::FeatureCollection); + fn test_polygon() { + let kernel = st_asgeojson_impl(); let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); - let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + let result = tester + .invoke_wkb_scalar(Some("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))")) + .unwrap(); tester.assert_scalar_result_equals( result, - r#"{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":null}]}"#, + r#"{"type":"Polygon","coordinates":[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]]}"#, ); } #[test] fn test_geometry_collection() { - let kernel = st_asgeojson_impl(GeoJsonType::Simple); + let kernel = st_asgeojson_impl(); let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); @@ -248,54 +146,4 @@ mod tests { r#"{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[1.0,2.0]},{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0]]}]}"#, ); } - - #[test] - fn test_with_type_parameter() { - let kernel = st_asgeojson_with_type_impl(); - let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); - let tester = ScalarUdfTester::new( - udf.into(), - vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Utf8)], - ); - - // Test with 'Simple' type - let result = tester - .invoke_scalar_scalar("POINT (1 2)", "Simple") - .unwrap(); - tester.assert_scalar_result_equals(result, r#"{"type":"Point","coordinates":[1.0,2.0]}"#); - - // Test with 'Feature' type - let result = tester - .invoke_scalar_scalar("POINT (1 2)", "Feature") - .unwrap(); - tester.assert_scalar_result_equals( - result, - r#"{"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":null}"#, - ); - - // Test with 'FeatureCollection' type - let result = tester - .invoke_scalar_scalar("POINT (1 2)", "FeatureCollection") - .unwrap(); - tester.assert_scalar_result_equals( - result, - r#"{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[1.0,2.0]},"properties":null}]}"#, - ); - } - - #[test] - fn test_invalid_type_string() { - let kernel = st_asgeojson_with_type_impl(); - let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); - let tester = ScalarUdfTester::new( - udf.into(), - vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Utf8)], - ); - - // Test with invalid type string - let result = tester.invoke_scalar_scalar("POINT (1 2)", "InvalidType"); - assert!(result.is_err()); - let err_msg = result.unwrap_err().to_string(); - assert!(err_msg.contains("Invalid GeoJSON type")); - } } From ad886bb8503150f2fa5141e78b63889ff0e84ab8 Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Mon, 29 Dec 2025 14:35:53 -0800 Subject: [PATCH 06/11] add integration tests --- .../tests/functions/test_functions.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 40daaff67..349ad2c70 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -119,6 +119,32 @@ def test_st_astext(eng, geom): eng.assert_query_result(f"SELECT ST_AsText({geom_or_null(geom)})", expected) +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom", "expected"), + [ + (None, None), + ("POINT EMPTY", '{"type":"Point","coordinates":[]}'), + ("LINESTRING EMPTY", '{"type":"LineString","coordinates":[]}'), + ("POLYGON EMPTY", '{"type":"Polygon","coordinates":[]}'), + ("MULTIPOINT EMPTY", '{"type":"MultiPoint","coordinates":[]}'), + ("MULTILINESTRING EMPTY", '{"type":"MultiLineString","coordinates":[]}'), + ("MULTIPOLYGON EMPTY", '{"type":"MultiPolygon","coordinates":[]}'), + ("GEOMETRYCOLLECTION EMPTY", '{"type":"GeometryCollection","geometries":[]}'), + ("POINT (1 2)", '{"type":"Point","coordinates":[1.0,2.0]}'), + ("LINESTRING (0 0, 1 1)", '{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0]]}'), + ("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", '{"type":"Polygon","coordinates":[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]]}'), + ("MULTIPOINT ((0 0), (1 1))", '{"type":"MultiPoint","coordinates":[[0.0,0.0],[1.0,1.0]]}'), + ("MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))", '{"type":"MultiLineString","coordinates":[[[0.0,0.0],[1.0,1.0]],[[2.0,2.0],[3.0,3.0]]]}'), + ("MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3, 2 2)))", '{"type":"MultiPolygon","coordinates":[[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]],[[[2.0,2.0],[3.0,2.0],[3.0,3.0],[2.0,3.0],[2.0,2.0]]]]}'), + ("GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (1 1, 2 2))", '{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0.0,0.0]},{"type":"LineString","coordinates":[[1.0,1.0],[2.0,2.0]]}]}'), + ], +) +def test_st_asgeojson(eng, geom, expected): + eng = eng.create_or_skip() + eng.assert_query_result(f"SELECT ST_AsGeoJSON({geom_or_null(geom)})", expected) + + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geom1", "geom2", "expected"), From 29f333c3277426d209cb65e32cce206c2857189a Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Mon, 29 Dec 2025 14:39:14 -0800 Subject: [PATCH 07/11] remove empty point test --- python/sedonadb/tests/functions/test_functions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 349ad2c70..63044af91 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -124,9 +124,8 @@ def test_st_astext(eng, geom): ("geom", "expected"), [ (None, None), - ("POINT EMPTY", '{"type":"Point","coordinates":[]}'), ("LINESTRING EMPTY", '{"type":"LineString","coordinates":[]}'), - ("POLYGON EMPTY", '{"type":"Polygon","coordinates":[]}'), + ("POLYGON EMPTY", '{"type":"Polygon","coordinates":[[]]}'), ("MULTIPOINT EMPTY", '{"type":"MultiPoint","coordinates":[]}'), ("MULTILINESTRING EMPTY", '{"type":"MultiLineString","coordinates":[]}'), ("MULTIPOLYGON EMPTY", '{"type":"MultiPolygon","coordinates":[]}'), From 3660f07c693b9b5948f1d083d225e62c48cba4d0 Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Mon, 29 Dec 2025 14:49:40 -0800 Subject: [PATCH 08/11] pre-commit fix --- .../tests/functions/test_functions.py | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 63044af91..32ec26c4c 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -131,12 +131,30 @@ def test_st_astext(eng, geom): ("MULTIPOLYGON EMPTY", '{"type":"MultiPolygon","coordinates":[]}'), ("GEOMETRYCOLLECTION EMPTY", '{"type":"GeometryCollection","geometries":[]}'), ("POINT (1 2)", '{"type":"Point","coordinates":[1.0,2.0]}'), - ("LINESTRING (0 0, 1 1)", '{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0]]}'), - ("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", '{"type":"Polygon","coordinates":[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]]}'), - ("MULTIPOINT ((0 0), (1 1))", '{"type":"MultiPoint","coordinates":[[0.0,0.0],[1.0,1.0]]}'), - ("MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))", '{"type":"MultiLineString","coordinates":[[[0.0,0.0],[1.0,1.0]],[[2.0,2.0],[3.0,3.0]]]}'), - ("MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3, 2 2)))", '{"type":"MultiPolygon","coordinates":[[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]],[[[2.0,2.0],[3.0,2.0],[3.0,3.0],[2.0,3.0],[2.0,2.0]]]]}'), - ("GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (1 1, 2 2))", '{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0.0,0.0]},{"type":"LineString","coordinates":[[1.0,1.0],[2.0,2.0]]}]}'), + ( + "LINESTRING (0 0, 1 1)", + '{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0]]}', + ), + ( + "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", + '{"type":"Polygon","coordinates":[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]]}', + ), + ( + "MULTIPOINT ((0 0), (1 1))", + '{"type":"MultiPoint","coordinates":[[0.0,0.0],[1.0,1.0]]}', + ), + ( + "MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))", + '{"type":"MultiLineString","coordinates":[[[0.0,0.0],[1.0,1.0]],[[2.0,2.0],[3.0,3.0]]]}', + ), + ( + "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3, 2 2)))", + '{"type":"MultiPolygon","coordinates":[[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]],[[[2.0,2.0],[3.0,2.0],[3.0,3.0],[2.0,3.0],[2.0,2.0]]]]}', + ), + ( + "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (1 1, 2 2))", + '{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0.0,0.0]},{"type":"LineString","coordinates":[[1.0,1.0],[2.0,2.0]]}]}', + ), ], ) def test_st_asgeojson(eng, geom, expected): From 0da4fc0bbd595564d47b2241c422805310dedc0a Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Tue, 30 Dec 2025 17:03:10 -0800 Subject: [PATCH 09/11] Apply @petern48's suggestions from code review Co-authored-by: Peter Nguyen --- rust/sedona-geo/src/st_asgeojson.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rust/sedona-geo/src/st_asgeojson.rs b/rust/sedona-geo/src/st_asgeojson.rs index 5d7f93229..af91e0b92 100644 --- a/rust/sedona-geo/src/st_asgeojson.rs +++ b/rust/sedona-geo/src/st_asgeojson.rs @@ -49,9 +49,8 @@ impl SedonaScalarKernel for STAsGeoJSON { ) -> Result { let executor = GeoTypesExecutor::new(arg_types, args); - // Estimate the minimum probable memory requirement of the output. - // GeoJSON is typically longer than WKT due to JSON formatting. - let min_probable_geojson_size = executor.num_iterations() * 50; + // Minimal GeoJSON: {"type":"Point","coordinates":[]} + let min_probable_geojson_size = executor.num_iterations() * 33; // Initialize an output builder of the appropriate type let mut builder = From 5c340dbc236fb44b234c1108a0c32fbb290ae6e4 Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Thu, 1 Jan 2026 17:43:18 -0800 Subject: [PATCH 10/11] Special case empty point and polygon --- .../tests/functions/test_functions.py | 3 +- rust/sedona-functions/src/st_asgeojson.rs | 8 +- rust/sedona-geo/src/st_asgeojson.rs | 74 +++++++++++++++---- 3 files changed, 64 insertions(+), 21 deletions(-) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 32ec26c4c..1a1dc3d6c 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -124,8 +124,9 @@ def test_st_astext(eng, geom): ("geom", "expected"), [ (None, None), + ("POINT EMPTY", '{"type":"Point","coordinates":[]}'), ("LINESTRING EMPTY", '{"type":"LineString","coordinates":[]}'), - ("POLYGON EMPTY", '{"type":"Polygon","coordinates":[[]]}'), + ("POLYGON EMPTY", '{"type":"Polygon","coordinates":[]}'), ("MULTIPOINT EMPTY", '{"type":"MultiPoint","coordinates":[]}'), ("MULTILINESTRING EMPTY", '{"type":"MultiLineString","coordinates":[]}'), ("MULTIPOLYGON EMPTY", '{"type":"MultiPolygon","coordinates":[]}'), diff --git a/rust/sedona-functions/src/st_asgeojson.rs b/rust/sedona-functions/src/st_asgeojson.rs index d277c444f..c3d127c35 100644 --- a/rust/sedona-functions/src/st_asgeojson.rs +++ b/rust/sedona-functions/src/st_asgeojson.rs @@ -38,16 +38,10 @@ fn st_asgeojson_doc() -> Documentation { Documentation::builder( DOC_SECTION_OTHER, "Return the GeoJSON representation of a geometry", - "ST_AsGeoJSON (A: Geometry [, type: String])", + "ST_AsGeoJSON (A: Geometry)", ) .with_argument("geom", "geometry: Input geometry") - .with_argument( - "type", - "string (optional): Output type - 'Simple' (default), 'Feature', or 'FeatureCollection'", - ) .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0))") - .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'Feature')") - .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0), 'FeatureCollection')") .with_related_udf("ST_GeomFromGeoJSON") .build() } diff --git a/rust/sedona-geo/src/st_asgeojson.rs b/rust/sedona-geo/src/st_asgeojson.rs index af91e0b92..29394be0b 100644 --- a/rust/sedona-geo/src/st_asgeojson.rs +++ b/rust/sedona-geo/src/st_asgeojson.rs @@ -16,15 +16,19 @@ // under the License. use std::sync::Arc; -use crate::to_geo::GeoTypesExecutor; use arrow_array::builder::StringBuilder; use arrow_schema::DataType; use datafusion_common::error::{DataFusionError, Result}; use datafusion_expr::ColumnarValue; +use geo_traits::{GeometryTrait, PointTrait, PolygonTrait}; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_functions::executor::WkbExecutor; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; +use wkb::reader::Wkb; -/// ST_AsGeoJSON() kernel implementation using GeoTypesExecutor +use crate::to_geo::item_to_geometry; + +/// ST_AsGeoJSON() kernel implementation using WkbExecutor pub fn st_asgeojson_impl() -> ScalarKernelRef { Arc::new(STAsGeoJSON {}) } @@ -47,24 +51,20 @@ impl SedonaScalarKernel for STAsGeoJSON { arg_types: &[SedonaType], args: &[ColumnarValue], ) -> Result { - let executor = GeoTypesExecutor::new(arg_types, args); + let executor = WkbExecutor::new(arg_types, args); - // Minimal GeoJSON: {"type":"Point","coordinates":[]} + // Estimate the minimum probable memory requirement of the output. + // GeoJSON is typically longer than WKT due to JSON formatting. let min_probable_geojson_size = executor.num_iterations() * 33; // Initialize an output builder of the appropriate type let mut builder = StringBuilder::with_capacity(executor.num_iterations(), min_probable_geojson_size); - executor.execute_wkb_void(|maybe_geom| { - match maybe_geom { - Some(geom) => { - // Convert geo_types::Geometry to geojson::Geometry - let geojson_geom: geojson::Geometry = (&geom).into(); - - // Serialize to JSON string - let json_str = serde_json::to_string(&geojson_geom) - .map_err(|err| DataFusionError::External(Box::new(err)))?; + executor.execute_wkb_void(|maybe_wkb| { + match maybe_wkb { + Some(wkb) => { + let json_str = geom_to_geojson(&wkb)?; builder.append_value(&json_str); } None => builder.append_null(), @@ -77,6 +77,34 @@ impl SedonaScalarKernel for STAsGeoJSON { } } +/// Convert a WKB geometry to GeoJSON string, handling special cases for empty geometries +fn geom_to_geojson(geom: &Wkb) -> Result { + // Special case handling for geometries that geo_types::Geometry cannot represent + match geom.as_type() { + geo_traits::GeometryType::Point(pt) => { + if pt.coord().is_none() { + // Empty point - geo_types cannot represent this + return Ok(r#"{"type":"Point","coordinates":[]}"#.to_string()); + } + } + geo_traits::GeometryType::Polygon(poly) => { + if poly.exterior().is_none() { + // Empty polygon - to match PostGIS behavior + return Ok(r#"{"type":"Polygon","coordinates":[]}"#.to_string()); + } + } + _ => {} + } + + // For all other geometries (including other empty geometries), convert to geo_types::Geometry + let geo_geom = item_to_geometry(geom)?; + + let geojson_value = geojson::Value::from(&geo_geom); + let geojson_geom = geojson::Geometry::new(geojson_value); + + serde_json::to_string(&geojson_geom).map_err(|err| DataFusionError::External(Box::new(err))) +} + #[cfg(test)] mod tests { use datafusion_common::scalar::ScalarValue; @@ -145,4 +173,24 @@ mod tests { r#"{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[1.0,2.0]},{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0]]}]}"#, ); } + + #[test] + fn test_empty_point() { + let kernel = st_asgeojson_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester.invoke_wkb_scalar(Some("POINT EMPTY")).unwrap(); + tester.assert_scalar_result_equals(result, r#"{"type":"Point","coordinates":[]}"#); + } + + #[test] + fn test_empty_polygon() { + let kernel = st_asgeojson_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester.invoke_wkb_scalar(Some("POLYGON EMPTY")).unwrap(); + tester.assert_scalar_result_equals(result, r#"{"type":"Polygon","coordinates":[]}"#); + } } From 5b092e409f773044feaa137ce47e168515d3eb91 Mon Sep 17 00:00:00 2001 From: Pranav Toggi Date: Thu, 1 Jan 2026 18:45:46 -0800 Subject: [PATCH 11/11] fix python tests --- .../tests/functions/test_functions.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 1a1dc3d6c..dd1024374 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -123,6 +123,9 @@ def test_st_astext(eng, geom): @pytest.mark.parametrize( ("geom", "expected"), [ + # Note: Using coordinates with decimal values instead of integers + # because PostGIS returns integer coordinates in GeoJSON when the geometry has integer + # coordinates, while SedonaDB always returns floats. See issue #472. (None, None), ("POINT EMPTY", '{"type":"Point","coordinates":[]}'), ("LINESTRING EMPTY", '{"type":"LineString","coordinates":[]}'), @@ -131,30 +134,30 @@ def test_st_astext(eng, geom): ("MULTILINESTRING EMPTY", '{"type":"MultiLineString","coordinates":[]}'), ("MULTIPOLYGON EMPTY", '{"type":"MultiPolygon","coordinates":[]}'), ("GEOMETRYCOLLECTION EMPTY", '{"type":"GeometryCollection","geometries":[]}'), - ("POINT (1 2)", '{"type":"Point","coordinates":[1.0,2.0]}'), + ("POINT (1.5 2.5)", '{"type":"Point","coordinates":[1.5,2.5]}'), ( - "LINESTRING (0 0, 1 1)", - '{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0]]}', + "LINESTRING (0.5 0.5, 1.5 1.5)", + '{"type":"LineString","coordinates":[[0.5,0.5],[1.5,1.5]]}', ), ( - "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", - '{"type":"Polygon","coordinates":[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]]}', + "POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5))", + '{"type":"Polygon","coordinates":[[[0.5,0.5],[1.5,0.5],[1.5,1.5],[0.5,1.5],[0.5,0.5]]]}', ), ( - "MULTIPOINT ((0 0), (1 1))", - '{"type":"MultiPoint","coordinates":[[0.0,0.0],[1.0,1.0]]}', + "MULTIPOINT ((0.5 0.5), (1.5 1.5))", + '{"type":"MultiPoint","coordinates":[[0.5,0.5],[1.5,1.5]]}', ), ( - "MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))", - '{"type":"MultiLineString","coordinates":[[[0.0,0.0],[1.0,1.0]],[[2.0,2.0],[3.0,3.0]]]}', + "MULTILINESTRING ((0.5 0.5, 1.5 1.5), (2.5 2.5, 3.5 3.5))", + '{"type":"MultiLineString","coordinates":[[[0.5,0.5],[1.5,1.5]],[[2.5,2.5],[3.5,3.5]]]}', ), ( - "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3, 2 2)))", - '{"type":"MultiPolygon","coordinates":[[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]],[[[2.0,2.0],[3.0,2.0],[3.0,3.0],[2.0,3.0],[2.0,2.0]]]]}', + "MULTIPOLYGON (((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5)), ((2.5 2.5, 3.5 2.5, 3.5 3.5, 2.5 3.5, 2.5 2.5)))", + '{"type":"MultiPolygon","coordinates":[[[[0.5,0.5],[1.5,0.5],[1.5,1.5],[0.5,1.5],[0.5,0.5]]],[[[2.5,2.5],[3.5,2.5],[3.5,3.5],[2.5,3.5],[2.5,2.5]]]]}', ), ( - "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (1 1, 2 2))", - '{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0.0,0.0]},{"type":"LineString","coordinates":[[1.0,1.0],[2.0,2.0]]}]}', + "GEOMETRYCOLLECTION (POINT (0.5 0.5), LINESTRING (1.5 1.5, 2.5 2.5))", + '{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0.5,0.5]},{"type":"LineString","coordinates":[[1.5,1.5],[2.5,2.5]]}]}', ), ], )