Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ geos = { git="https://github.com/georust/geos.git", rev="47afbad2483e489911ddb45
geo-types = "0.7.17"
geo-traits = "0.3.0"
geo = "0.31.0"
geojson = "0.24.2"

geo-index = { version = "0.3.2", features = ["use-geo_0_31"] }

Expand Down
47 changes: 47 additions & 0 deletions python/sedonadb/tests/functions/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,53 @@ def test_st_astext(eng, geom):
eng.assert_query_result(f"SELECT ST_AsText({geom_or_null(geom)})", expected)


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "expected"),
[
# Note: Using coordinates with decimal values instead of integers
# because PostGIS returns integer coordinates in GeoJSON when the geometry has integer
# coordinates, while SedonaDB always returns floats. See issue #472.
(None, None),
("POINT EMPTY", '{"type":"Point","coordinates":[]}'),
("LINESTRING EMPTY", '{"type":"LineString","coordinates":[]}'),
("POLYGON EMPTY", '{"type":"Polygon","coordinates":[]}'),
("MULTIPOINT EMPTY", '{"type":"MultiPoint","coordinates":[]}'),
("MULTILINESTRING EMPTY", '{"type":"MultiLineString","coordinates":[]}'),
("MULTIPOLYGON EMPTY", '{"type":"MultiPolygon","coordinates":[]}'),
("GEOMETRYCOLLECTION EMPTY", '{"type":"GeometryCollection","geometries":[]}'),
("POINT (1.5 2.5)", '{"type":"Point","coordinates":[1.5,2.5]}'),
(
"LINESTRING (0.5 0.5, 1.5 1.5)",
'{"type":"LineString","coordinates":[[0.5,0.5],[1.5,1.5]]}',
),
(
"POLYGON ((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5))",
'{"type":"Polygon","coordinates":[[[0.5,0.5],[1.5,0.5],[1.5,1.5],[0.5,1.5],[0.5,0.5]]]}',
),
(
"MULTIPOINT ((0.5 0.5), (1.5 1.5))",
'{"type":"MultiPoint","coordinates":[[0.5,0.5],[1.5,1.5]]}',
),
(
"MULTILINESTRING ((0.5 0.5, 1.5 1.5), (2.5 2.5, 3.5 3.5))",
'{"type":"MultiLineString","coordinates":[[[0.5,0.5],[1.5,1.5]],[[2.5,2.5],[3.5,3.5]]]}',
),
(
"MULTIPOLYGON (((0.5 0.5, 1.5 0.5, 1.5 1.5, 0.5 1.5, 0.5 0.5)), ((2.5 2.5, 3.5 2.5, 3.5 3.5, 2.5 3.5, 2.5 2.5)))",
'{"type":"MultiPolygon","coordinates":[[[[0.5,0.5],[1.5,0.5],[1.5,1.5],[0.5,1.5],[0.5,0.5]]],[[[2.5,2.5],[3.5,2.5],[3.5,3.5],[2.5,3.5],[2.5,2.5]]]]}',
),
(
"GEOMETRYCOLLECTION (POINT (0.5 0.5), LINESTRING (1.5 1.5, 2.5 2.5))",
'{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[0.5,0.5]},{"type":"LineString","coordinates":[[1.5,1.5],[2.5,2.5]]}]}',
),
],
)
def test_st_asgeojson(eng, geom, expected):
eng = eng.create_or_skip()
eng.assert_query_result(f"SELECT ST_AsGeoJSON({geom_or_null(geom)})", expected)


@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom1", "geom2", "expected"),
Expand Down
1 change: 1 addition & 0 deletions rust/sedona-functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pub mod sd_order;
pub mod st_analyze_agg;
mod st_area;
mod st_asbinary;
mod st_asgeojson;
mod st_astext;
mod st_azimuth;
mod st_buffer;
Expand Down
1 change: 1 addition & 0 deletions rust/sedona-functions/src/register.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ pub fn default_function_set() -> FunctionSet {
crate::sd_order::sd_order_udf,
crate::st_area::st_area_udf,
crate::st_asbinary::st_asbinary_udf,
crate::st_asgeojson::st_asgeojson_udf,
crate::st_astext::st_astext_udf,
crate::st_azimuth::st_azimuth_udf,
crate::st_buffer::st_buffer_udf,
Expand Down
61 changes: 61 additions & 0 deletions rust/sedona-functions/src/st_asgeojson.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use arrow_schema::DataType;
use datafusion_expr::{scalar_doc_sections::DOC_SECTION_OTHER, Documentation, Volatility};
use sedona_expr::scalar_udf::SedonaScalarUDF;
use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};

/// ST_AsGeoJSON() scalar UDF implementation
///
/// Stub function for GeoJSON conversion.
pub fn st_asgeojson_udf() -> SedonaScalarUDF {
SedonaScalarUDF::new_stub(
"st_asgeojson",
ArgMatcher::new(
vec![ArgMatcher::is_geometry_or_geography()],
SedonaType::Arrow(DataType::Utf8),
),
Volatility::Immutable,
Some(st_asgeojson_doc()),
)
}

fn st_asgeojson_doc() -> Documentation {
Documentation::builder(
DOC_SECTION_OTHER,
"Return the GeoJSON representation of a geometry",
"ST_AsGeoJSON (A: Geometry)",
)
.with_argument("geom", "geometry: Input geometry")
.with_sql_example("SELECT ST_AsGeoJSON(ST_Point(1.0, 2.0))")
.with_related_udf("ST_GeomFromGeoJSON")
.build()
}

#[cfg(test)]
mod tests {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In addition to rust tests, we need integration tests in test_functions.py. Instructions for testing it locally and iterating are here.

use datafusion_expr::ScalarUDF;

use super::*;

#[test]
fn udf_metadata() {
let udf: ScalarUDF = st_asgeojson_udf().into();
assert_eq!(udf.name(), "st_asgeojson");
assert!(udf.documentation().is_some())
}
}
2 changes: 2 additions & 0 deletions rust/sedona-geo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ sedona-geo-generic-alg = { workspace = true }
geo-traits = { workspace = true, features = ["geo-types"] }
geo-types = { workspace = true }
geo = { workspace = true }
geojson = { workspace = true }
serde_json = { workspace = true }
sedona-expr = { workspace = true }
sedona-functions = { workspace = true }
sedona-geometry = { workspace = true }
Expand Down
1 change: 1 addition & 0 deletions rust/sedona-geo/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
pub mod centroid;
pub mod register;
mod st_area;
mod st_asgeojson;
mod st_buffer;
mod st_centroid;
pub mod st_concavehull;
Expand Down
5 changes: 3 additions & 2 deletions rust/sedona-geo/src/register.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ use sedona_expr::aggregate_udf::SedonaAccumulatorRef;
use sedona_expr::scalar_udf::ScalarKernelRef;

use crate::{
st_area::st_area_impl, st_buffer::st_buffer_impl, st_centroid::st_centroid_impl,
st_distance::st_distance_impl, st_dwithin::st_dwithin_impl,
st_area::st_area_impl, st_asgeojson::st_asgeojson_impl, st_buffer::st_buffer_impl,
st_centroid::st_centroid_impl, st_distance::st_distance_impl, st_dwithin::st_dwithin_impl,
st_intersection_agg::st_intersection_agg_impl, st_intersects::st_intersects_impl,
st_length::st_length_impl, st_line_interpolate_point::st_line_interpolate_point_impl,
st_perimeter::st_perimeter_impl, st_union_agg::st_union_agg_impl,
Expand All @@ -29,6 +29,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
vec![
("st_intersects", st_intersects_impl()),
("st_area", st_area_impl()),
("st_asgeojson", st_asgeojson_impl()),
("st_buffer", st_buffer_impl()),
("st_centroid", st_centroid_impl()),
("st_distance", st_distance_impl()),
Expand Down
196 changes: 196 additions & 0 deletions rust/sedona-geo/src/st_asgeojson.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use std::sync::Arc;

use arrow_array::builder::StringBuilder;
use arrow_schema::DataType;
use datafusion_common::error::{DataFusionError, Result};
use datafusion_expr::ColumnarValue;
use geo_traits::{GeometryTrait, PointTrait, PolygonTrait};
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
use sedona_functions::executor::WkbExecutor;
use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
use wkb::reader::Wkb;

use crate::to_geo::item_to_geometry;

/// ST_AsGeoJSON() kernel implementation using WkbExecutor
pub fn st_asgeojson_impl() -> ScalarKernelRef {
Arc::new(STAsGeoJSON {})
}

#[derive(Debug)]
struct STAsGeoJSON {}

impl SedonaScalarKernel for STAsGeoJSON {
fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
let matcher = ArgMatcher::new(
vec![ArgMatcher::is_geometry()],
SedonaType::Arrow(DataType::Utf8),
);

matcher.match_args(args)
}

fn invoke_batch(
&self,
arg_types: &[SedonaType],
args: &[ColumnarValue],
) -> Result<ColumnarValue> {
let executor = WkbExecutor::new(arg_types, args);

// Estimate the minimum probable memory requirement of the output.
// GeoJSON is typically longer than WKT due to JSON formatting.
let min_probable_geojson_size = executor.num_iterations() * 33;

// Initialize an output builder of the appropriate type
let mut builder =
StringBuilder::with_capacity(executor.num_iterations(), min_probable_geojson_size);

executor.execute_wkb_void(|maybe_wkb| {
match maybe_wkb {
Some(wkb) => {
let json_str = geom_to_geojson(&wkb)?;
builder.append_value(&json_str);
}
None => builder.append_null(),
};

Ok(())
})?;

executor.finish(Arc::new(builder.finish()))
}
}

/// Convert a WKB geometry to GeoJSON string, handling special cases for empty geometries
fn geom_to_geojson(geom: &Wkb) -> Result<String> {
// Special case handling for geometries that geo_types::Geometry cannot represent
match geom.as_type() {
geo_traits::GeometryType::Point(pt) => {
if pt.coord().is_none() {
// Empty point - geo_types cannot represent this
return Ok(r#"{"type":"Point","coordinates":[]}"#.to_string());
}
}
geo_traits::GeometryType::Polygon(poly) => {
if poly.exterior().is_none() {
// Empty polygon - to match PostGIS behavior
return Ok(r#"{"type":"Polygon","coordinates":[]}"#.to_string());
}
}
_ => {}
}

// For all other geometries (including other empty geometries), convert to geo_types::Geometry
let geo_geom = item_to_geometry(geom)?;

let geojson_value = geojson::Value::from(&geo_geom);
let geojson_geom = geojson::Geometry::new(geojson_value);

serde_json::to_string(&geojson_geom).map_err(|err| DataFusionError::External(Box::new(err)))
}

#[cfg(test)]
mod tests {
use datafusion_common::scalar::ScalarValue;
use sedona_expr::scalar_udf::SedonaScalarUDF;
use sedona_schema::datatypes::WKB_GEOMETRY;
use sedona_testing::testers::ScalarUdfTester;

use super::*;

#[test]
fn test_simple_geojson() {
let kernel = st_asgeojson_impl();
let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel);
let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]);

// Test with a simple point
let result = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap();
tester.assert_scalar_result_equals(result, r#"{"type":"Point","coordinates":[1.0,2.0]}"#);

// Test with null
let result = tester.invoke_wkb_scalar(None).unwrap();
assert_eq!(result, ScalarValue::Utf8(None));
}

#[test]
fn test_linestring() {
let kernel = st_asgeojson_impl();
let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel);
let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]);

let result = tester
.invoke_wkb_scalar(Some("LINESTRING (0 0, 1 1, 2 2)"))
.unwrap();
tester.assert_scalar_result_equals(
result,
r#"{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0],[2.0,2.0]]}"#,
);
}

#[test]
fn test_polygon() {
let kernel = st_asgeojson_impl();
let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel);
let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]);

let result = tester
.invoke_wkb_scalar(Some("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"))
.unwrap();
tester.assert_scalar_result_equals(
result,
r#"{"type":"Polygon","coordinates":[[[0.0,0.0],[1.0,0.0],[1.0,1.0],[0.0,1.0],[0.0,0.0]]]}"#,
);
}

#[test]
fn test_geometry_collection() {
let kernel = st_asgeojson_impl();
let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel);
let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]);

let result = tester
.invoke_wkb_scalar(Some("GEOMETRYCOLLECTION(POINT(1 2), LINESTRING(0 0, 1 1))"))
.unwrap();
tester.assert_scalar_result_equals(
result,
r#"{"type":"GeometryCollection","geometries":[{"type":"Point","coordinates":[1.0,2.0]},{"type":"LineString","coordinates":[[0.0,0.0],[1.0,1.0]]}]}"#,
);
}

#[test]
fn test_empty_point() {
let kernel = st_asgeojson_impl();
let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel);
let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]);

let result = tester.invoke_wkb_scalar(Some("POINT EMPTY")).unwrap();
tester.assert_scalar_result_equals(result, r#"{"type":"Point","coordinates":[]}"#);
}

#[test]
fn test_empty_polygon() {
let kernel = st_asgeojson_impl();
let udf = SedonaScalarUDF::from_kernel("st_asgeojson", kernel);
let tester = ScalarUdfTester::new(udf.into(), vec![WKB_GEOMETRY]);

let result = tester.invoke_wkb_scalar(Some("POLYGON EMPTY")).unwrap();
tester.assert_scalar_result_equals(result, r#"{"type":"Polygon","coordinates":[]}"#);
}
}