diff --git a/Cargo.toml b/Cargo.toml index 9780fbe16..a84e149cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -105,6 +105,7 @@ geos = { git="https://github.com/georust/geos.git", rev="47afbad2483e489911ddb45 geo-types = "0.7.17" geo-traits = "0.3.0" geo = "0.31.0" +geojson = "0.24.2" geo-index = { version = "0.3.2", features = ["use-geo_0_31"] } diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 40daaff67..dd1024374 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -119,6 +119,53 @@ def test_st_astext(eng, geom): eng.assert_query_result(f"SELECT ST_AsText({geom_or_null(geom)})", expected) +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom", "expected"), + [ + # Note: Using coordinates with decimal values instead of integers + # because PostGIS returns integer coordinates in GeoJSON when the geometry has integer + # coordinates, while SedonaDB always returns floats. See issue #472. + (None, None), + ("POINT EMPTY", '{"type":"Point","coordinates":[]}'), + ("LINESTRING EMPTY", '{"type":"LineString","coordinates":[]}'), + ("POLYGON EMPTY", '{"type":"Polygon","coordinates":[]}'), + ("MULTIPOINT EMPTY", '{"type":"MultiPoint","coordinates":[]}'), + ("MULTILINESTRING EMPTY", '{"type":"MultiLineString","coordinates":[]}'), + ("MULTIPOLYGON EMPTY", '{"type":"MultiPolygon","coordinates":[]}'), + ("GEOMETRYCOLLECTION EMPTY", '{"type":"GeometryCollection","geometries":[]}'), + ("POINT (1.5 2.5)", '{"type":"Point","coordinates":[1.5,2.5]}'), + ( + "LINESTRING (0.5 0.5, 1.5 1.5)", + '{"type":"LineString","coordinates":[[0.5,0.5],[1.5,1.5]]}', + ), + ( + "POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5))", + '{"type":"Polygon","coordinates":[[[0.5,0.5],[1.5,0.5],[1.5,1.5],[0.5,1.5],[0.5,0.5]]]}', + ), + ( + "MULTIPOINT ((0.5 0.5), (1.5 1.5))", + '{"type":"MultiPoint","coordinates":[[0.5,0.5],[1.5,1.5]]}', + ), + ( + "MULTILINESTRING ((0.5 0.5, 1.5 1.5), (2.5 2.5, 3.5 3.5))", + '{"type":"MultiLineString","coordinates":[[[0.5,0.5],[1.5,1.5]],[[2.5,2.5],[3.5,3.5]]]}', + ), + ( + "MULTIPOLYGON (((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5)), ((2.5 2.5, 3.5 2.5, 3.5 3.5, 2.5 3.5, 2.5 2.5)))", + '{"type":"MultiPolygon","coordinates":[[[[0.5,0.5],[1.5,0.5],[1.5,1.5],[0.5,1.5],[0.5,0.5]]],[[[2.5,2.5],[3.5,2.5],[3.5,3.5],[2.5,3.5],[2.5,2.5]]]]}', + ), + ( + "GEOMETRYCOLLECTION (POINT (0.5 0.5), LINESTRING (1.5 1.5, 2.5 2.5))", + '{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0.5,0.5]},{"type":"LineString","coordinates":[[1.5,1.5],[2.5,2.5]]}]}', + ), + ], +) +def test_st_asgeojson(eng, geom, expected): + eng = eng.create_or_skip() + eng.assert_query_result(f"SELECT ST_AsGeoJSON({geom_or_null(geom)})", expected) + + @pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) @pytest.mark.parametrize( ("geom1", "geom2", "expected"), diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs index 44c8ad027..4fe633c84 100644 --- a/rust/sedona-functions/src/lib.rs +++ b/rust/sedona-functions/src/lib.rs @@ -26,6 +26,7 @@ pub mod sd_order; pub mod st_analyze_agg; mod st_area; mod st_asbinary; +mod st_asgeojson; mod st_astext; mod st_azimuth; mod st_buffer; diff --git a/rust/sedona-functions/src/register.rs b/rust/sedona-functions/src/register.rs index ff4395787..9fce9f5f6 100644 --- a/rust/sedona-functions/src/register.rs +++ b/rust/sedona-functions/src/register.rs @@ -64,6 +64,7 @@ pub fn default_function_set() -> FunctionSet { crate::sd_order::sd_order_udf, crate::st_area::st_area_udf, crate::st_asbinary::st_asbinary_udf, + crate::st_asgeojson::st_asgeojson_udf, crate::st_astext::st_astext_udf, crate::st_azimuth::st_azimuth_udf, crate::st_buffer::st_buffer_udf, diff --git a/rust/sedona-functions/src/st_asgeojson.rs b/rust/sedona-functions/src/st_asgeojson.rs new file mode 100644 index 000000000..c3d127c35 --- /dev/null +++ b/rust/sedona-functions/src/st_asgeojson.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use arrow_schema::DataType; +use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility}; +use sedona_expr::scalar_udf::SedonaScalarUDF; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; + +/// ST_AsGeoJSON() scalar UDF implementation +/// +/// Stub function for GeoJSON conversion. +pub fn st_asgeojson_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new_stub( + "st_asgeojson", + ArgMatcher::new( + vec![ArgMatcher::is_geometry_or_geography()], + SedonaType::Arrow(DataType::Utf8), + ), + Volatility::Immutable, + Some(st_asgeojson_doc()), + ) +} + +fn st_asgeojson_doc() -> Documentation { + Documentation::builder( + DOC_SECTION_OTHER, + "Return the GeoJSON representation of a geometry", + "ST_AsGeoJSON (A: Geometry)", + ) + .with_argument("geom", "geometry: Input geometry") + .with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0))") + .with_related_udf("ST_GeomFromGeoJSON") + .build() +} + +#[cfg(test)] +mod tests { + use datafusion_expr::ScalarUDF; + + use super::*; + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = st_asgeojson_udf().into(); + assert_eq!(udf.name(), "st_asgeojson"); + assert!(udf.documentation().is_some()) + } +} diff --git a/rust/sedona-geo/Cargo.toml b/rust/sedona-geo/Cargo.toml index c09400102..bd20a30b7 100644 --- a/rust/sedona-geo/Cargo.toml +++ b/rust/sedona-geo/Cargo.toml @@ -47,6 +47,8 @@ sedona-geo-generic-alg = { workspace = true } geo-traits = { workspace = true, features = ["geo-types"] } geo-types = { workspace = true } geo = { workspace = true } +geojson = { workspace = true } +serde_json = { workspace = true } sedona-expr = { workspace = true } sedona-functions = { workspace = true } sedona-geometry = { workspace = true } diff --git a/rust/sedona-geo/src/lib.rs b/rust/sedona-geo/src/lib.rs index 52182beb5..949c4fc40 100644 --- a/rust/sedona-geo/src/lib.rs +++ b/rust/sedona-geo/src/lib.rs @@ -17,6 +17,7 @@ pub mod centroid; pub mod register; mod st_area; +mod st_asgeojson; mod st_buffer; mod st_centroid; pub mod st_concavehull; diff --git a/rust/sedona-geo/src/register.rs b/rust/sedona-geo/src/register.rs index 10c371cc9..d7dde2114 100644 --- a/rust/sedona-geo/src/register.rs +++ b/rust/sedona-geo/src/register.rs @@ -18,8 +18,8 @@ use sedona_expr::aggregate_udf::SedonaAccumulatorRef; use sedona_expr::scalar_udf::ScalarKernelRef; use crate::{ - st_area::st_area_impl, st_buffer::st_buffer_impl, st_centroid::st_centroid_impl, - st_distance::st_distance_impl, st_dwithin::st_dwithin_impl, + st_area::st_area_impl, st_asgeojson::st_asgeojson_impl, st_buffer::st_buffer_impl, + st_centroid::st_centroid_impl, st_distance::st_distance_impl, st_dwithin::st_dwithin_impl, st_intersection_agg::st_intersection_agg_impl, st_intersects::st_intersects_impl, st_length::st_length_impl, st_line_interpolate_point::st_line_interpolate_point_impl, st_perimeter::st_perimeter_impl, st_union_agg::st_union_agg_impl, @@ -29,6 +29,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { vec![ ("st_intersects", st_intersects_impl()), ("st_area", st_area_impl()), + ("st_asgeojson", st_asgeojson_impl()), ("st_buffer", st_buffer_impl()), ("st_centroid", st_centroid_impl()), ("st_distance", st_distance_impl()), diff --git a/rust/sedona-geo/src/st_asgeojson.rs b/rust/sedona-geo/src/st_asgeojson.rs new file mode 100644 index 000000000..29394be0b --- /dev/null +++ b/rust/sedona-geo/src/st_asgeojson.rs @@ -0,0 +1,196 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::sync::Arc; + +use arrow_array::builder::StringBuilder; +use arrow_schema::DataType; +use datafusion_common::error::{DataFusionError, Result}; +use datafusion_expr::ColumnarValue; +use geo_traits::{GeometryTrait, PointTrait, PolygonTrait}; +use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_functions::executor::WkbExecutor; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; +use wkb::reader::Wkb; + +use crate::to_geo::item_to_geometry; + +/// ST_AsGeoJSON() kernel implementation using WkbExecutor +pub fn st_asgeojson_impl() -> ScalarKernelRef { + Arc::new(STAsGeoJSON {}) +} + +#[derive(Debug)] +struct STAsGeoJSON {} + +impl SedonaScalarKernel for STAsGeoJSON { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry()], + SedonaType::Arrow(DataType::Utf8), + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = WkbExecutor::new(arg_types, args); + + // Estimate the minimum probable memory requirement of the output. + // GeoJSON is typically longer than WKT due to JSON formatting. + let min_probable_geojson_size = executor.num_iterations() * 33; + + // Initialize an output builder of the appropriate type + let mut builder = + StringBuilder::with_capacity(executor.num_iterations(), min_probable_geojson_size); + + executor.execute_wkb_void(|maybe_wkb| { + match maybe_wkb { + Some(wkb) => { + let json_str = geom_to_geojson(&wkb)?; + builder.append_value(&json_str); + } + None => builder.append_null(), + }; + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +/// Convert a WKB geometry to GeoJSON string, handling special cases for empty geometries +fn geom_to_geojson(geom: &Wkb) -> Result { + // Special case handling for geometries that geo_types::Geometry cannot represent + match geom.as_type() { + geo_traits::GeometryType::Point(pt) => { + if pt.coord().is_none() { + // Empty point - geo_types cannot represent this + return Ok(r#"{"type":"Point","coordinates":[]}"#.to_string()); + } + } + geo_traits::GeometryType::Polygon(poly) => { + if poly.exterior().is_none() { + // Empty polygon - to match PostGIS behavior + return Ok(r#"{"type":"Polygon","coordinates":[]}"#.to_string()); + } + } + _ => {} + } + + // For all other geometries (including other empty geometries), convert to geo_types::Geometry + let geo_geom = item_to_geometry(geom)?; + + let geojson_value = geojson::Value::from(&geo_geom); + let geojson_geom = geojson::Geometry::new(geojson_value); + + serde_json::to_string(&geojson_geom).map_err(|err| DataFusionError::External(Box::new(err))) +} + +#[cfg(test)] +mod tests { + use datafusion_common::scalar::ScalarValue; + use sedona_expr::scalar_udf::SedonaScalarUDF; + use sedona_schema::datatypes::WKB_GEOMETRY; + use sedona_testing::testers::ScalarUdfTester; + + use super::*; + + #[test] + fn test_simple_geojson() { + let kernel = st_asgeojson_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + // Test with a simple point + let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(); + tester.assert_scalar_result_equals(result, r#"{"type":"Point","coordinates":[1.0,2.0]}"#); + + // Test with null + let result = tester.invoke_wkb_scalar(None).unwrap(); + assert_eq!(result, ScalarValue::Utf8(None)); + } + + #[test] + fn test_linestring() { + let kernel = st_asgeojson_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester + .invoke_wkb_scalar(Some("LINESTRING (0 0, 1 1, 2 2)")) + .unwrap(); + tester.assert_scalar_result_equals( + result, + r#"{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0],[2.0,2.0]]}"#, + ); + } + + #[test] + fn test_polygon() { + let kernel = st_asgeojson_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester + .invoke_wkb_scalar(Some("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))")) + .unwrap(); + tester.assert_scalar_result_equals( + result, + r#"{"type":"Polygon","coordinates":[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]]}"#, + ); + } + + #[test] + fn test_geometry_collection() { + let kernel = st_asgeojson_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester + .invoke_wkb_scalar(Some("GEOMETRYCOLLECTION(POINT(1 2), LINESTRING(0 0, 1 1))")) + .unwrap(); + tester.assert_scalar_result_equals( + result, + r#"{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[1.0,2.0]},{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0]]}]}"#, + ); + } + + #[test] + fn test_empty_point() { + let kernel = st_asgeojson_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester.invoke_wkb_scalar(Some("POINT EMPTY")).unwrap(); + tester.assert_scalar_result_equals(result, r#"{"type":"Point","coordinates":[]}"#); + } + + #[test] + fn test_empty_polygon() { + let kernel = st_asgeojson_impl(); + let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel); + let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]); + + let result = tester.invoke_wkb_scalar(Some("POLYGON EMPTY")).unwrap(); + tester.assert_scalar_result_equals(result, r#"{"type":"Polygon","coordinates":[]}"#); + } +}