diff --git a/NAMESPACE b/NAMESPACE index 86f0ad68..1e195dd8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,13 @@ S3method(drop_null_geometries,character) S3method(drop_null_geometries,json) +S3method(ms_clean,SpatialLines) +S3method(ms_clean,SpatialPoints) +S3method(ms_clean,SpatialPolygons) +S3method(ms_clean,character) +S3method(ms_clean,json) +S3method(ms_clean,sf) +S3method(ms_clean,sfc) S3method(ms_clip,SpatialLines) S3method(ms_clip,SpatialPoints) S3method(ms_clip,SpatialPolygons) @@ -63,6 +70,7 @@ S3method(ms_simplify,sfc) export(apply_mapshaper_commands) export(check_sys_mapshaper) export(drop_null_geometries) +export(ms_clean) export(ms_clip) export(ms_dissolve) export(ms_erase) diff --git a/R/clean.R b/R/clean.R new file mode 100644 index 00000000..dbea35d5 --- /dev/null +++ b/R/clean.R @@ -0,0 +1,230 @@ +#' Clean geometries using mapshaper +#' +#' Uses \href{https://github.com/mbloch/mapshaper}{mapshaper} to clean +#' geometries by removing overlaps, filling gaps, and repairing various kinds +#' of abnormal geometry. +#' +#' @param input spatial object to clean. One of: +#' \itemize{ +#' \item \code{geo_json} or \code{character} polygons, lines, or points; +#' \item \code{SpatialPolygons*}, \code{SpatialLines*}, or \code{SpatialPoints*}; +#' \item \code{sf} or \code{sfc} polygons, lines, or points object +#' } +#' @param gap_fill_area (polygons) Gaps smaller than this area will be filled; +#' larger gaps will be retained as holes in the polygon mosaic. Numeric value +#' in source units. Default \code{NULL} uses mapshaper's dynamic calculation. +#' @param sliver_control (polygons) Preferentially remove slivers (polygons with +#' a high perimeter-area ratio). Accepts values from 0-1, default is 1. +#' Multiplies the area of gap areas by the "Polsby Popper" compactness metric +#' before applying area threshold. +#' @param overlap_rule (polygons) Assign overlapping polygon areas to one of the +#' overlapping features based on this rule. One of: "min-id", "max-id", +#' "min-area", "max-area". Default is "max-area". +#' @param allow_overlaps Allow features to overlap each other. The default +#' behavior is to remove overlaps (default \code{FALSE}). +#' @param snap_interval Snap vertices within a given threshold before performing +#' other kinds of geometry repair. Defaults to a very small threshold. Uses +#' source units. Default \code{NULL}. +#' @param rewind Fix errors in the winding order of polygon rings (default +#' \code{FALSE}). +#' @param allow_empty Allow null geometries, which are removed by default +#' (default \code{FALSE}). +#' @inheritDotParams apply_mapshaper_commands force_FC sys sys_mem quiet gj2008 +#' +#' @details +#' Features with null geometries are deleted, unless the \code{allow_empty} flag +#' is used. +#' +#' Polygon features are cleaned by removing overlaps and filling small gaps +#' between adjacent polygons. Only gaps that are completely enclosed can be +#' filled. Areas that are contained by more than one polygon (overlaps) are +#' assigned to the polygon with the largest area by default. Similarly, gaps are +#' assigned to the largest-area polygon. +#' +#' Line features are cleaned by removing self-intersections within the same path. +#' Self-intersecting paths are split at the point of intersection and converted +#' into multiple paths within the same feature. When two separate paths intersect +#' in-between segment endpoints, new vertices are inserted at the point of +#' intersection. +#' +#' Point features are cleaned by removing duplicate coordinates within the same +#' feature. +#' +#' @return a cleaned representation of the geometry in the same class as the +#' input +#' @examples +#' library(rmapshaper) +#' +#' # Example with overlapping polygons +#' overlapping_poly <- structure('{ +#' "type": "FeatureCollection", +#' "features": [ +#' { +#' "type": "Feature", +#' "properties": {"id": 1}, +#' "geometry": { +#' "type": "Polygon", +#' "coordinates": [[[0, 0], [2, 0], [2, 2], [0, 2], [0, 0]]] +#' } +#' }, +#' { +#' "type": "Feature", +#' "properties": {"id": 2}, +#' "geometry": { +#' "type": "Polygon", +#' "coordinates": [[[1, 1], [3, 1], [3, 3], [1, 3], [1, 1]]] +#' } +#' } +#' ] +#' }', class = c("geojson", "json")) +#' +#' # Clean overlapping polygons +#' ms_clean(overlapping_poly) +#' +#' # Clean with specific overlap rule +#' ms_clean(overlapping_poly, overlap_rule = "min-area") +#' +#' @export +ms_clean <- function(input, gap_fill_area = NULL, sliver_control = 1, + overlap_rule = "max-area", allow_overlaps = FALSE, + snap_interval = NULL, rewind = FALSE, allow_empty = FALSE, + ...) { + UseMethod("ms_clean") +} + +#' @export +ms_clean.character <- function(input, gap_fill_area = NULL, sliver_control = 1, + overlap_rule = "max-area", allow_overlaps = FALSE, + snap_interval = NULL, rewind = FALSE, + allow_empty = FALSE, ...) { + input <- check_character_input(input) + + ms_clean_json(input = input, gap_fill_area = gap_fill_area, + sliver_control = sliver_control, overlap_rule = overlap_rule, + allow_overlaps = allow_overlaps, snap_interval = snap_interval, + rewind = rewind, allow_empty = allow_empty, ...) +} + +#' @export +ms_clean.json <- function(input, gap_fill_area = NULL, sliver_control = 1, + overlap_rule = "max-area", allow_overlaps = FALSE, + snap_interval = NULL, rewind = FALSE, + allow_empty = FALSE, ...) { + ms_clean_json(input = input, gap_fill_area = gap_fill_area, + sliver_control = sliver_control, overlap_rule = overlap_rule, + allow_overlaps = allow_overlaps, snap_interval = snap_interval, + rewind = rewind, allow_empty = allow_empty, ...) +} + +#' @export +ms_clean.SpatialPolygons <- function(input, gap_fill_area = NULL, + sliver_control = 1, overlap_rule = "max-area", + allow_overlaps = FALSE, snap_interval = NULL, + rewind = FALSE, allow_empty = FALSE, ...) { + + if (!is(input, "Spatial")) stop("input must be a spatial object") + + call <- make_clean_call(gap_fill_area = gap_fill_area, + sliver_control = sliver_control, + overlap_rule = overlap_rule, + allow_overlaps = allow_overlaps, + snap_interval = snap_interval, + rewind = rewind, allow_empty = allow_empty) + + ms_sp(input, call, ...) +} + +#' @export +ms_clean.SpatialLines <- ms_clean.SpatialPolygons + +#' @export +ms_clean.SpatialPoints <- ms_clean.SpatialPolygons + +#' @export +ms_clean.sf <- function(input, gap_fill_area = NULL, sliver_control = 1, + overlap_rule = "max-area", allow_overlaps = FALSE, + snap_interval = NULL, rewind = FALSE, + allow_empty = FALSE, ...) { + + call <- make_clean_call(gap_fill_area = gap_fill_area, + sliver_control = sliver_control, + overlap_rule = overlap_rule, + allow_overlaps = allow_overlaps, + snap_interval = snap_interval, + rewind = rewind, allow_empty = allow_empty) + + ms_sf(input, call, ...) +} + +#' @export +ms_clean.sfc <- ms_clean.sf + +ms_clean_json <- function(input, gap_fill_area, sliver_control, overlap_rule, + allow_overlaps, snap_interval, rewind, allow_empty, ...) { + + call <- make_clean_call(gap_fill_area = gap_fill_area, + sliver_control = sliver_control, + overlap_rule = overlap_rule, + allow_overlaps = allow_overlaps, + snap_interval = snap_interval, + rewind = rewind, allow_empty = allow_empty) + + ret <- apply_mapshaper_commands(data = input, command = call, ...) + + ret +} + +make_clean_call <- function(gap_fill_area, sliver_control, overlap_rule, + allow_overlaps, snap_interval, rewind, allow_empty) { + + # Validate inputs + if (!is.null(gap_fill_area) && (!is.numeric(gap_fill_area) || gap_fill_area < 0)) { + stop("gap_fill_area must be a positive numeric value or NULL") + } + + if (!is.numeric(sliver_control) || sliver_control < 0 || sliver_control > 1) { + stop("sliver_control must be a numeric value between 0 and 1") + } + + valid_overlap_rules <- c("min-id", "max-id", "min-area", "max-area") + if (!overlap_rule %in% valid_overlap_rules) { + stop("overlap_rule must be one of: ", paste(valid_overlap_rules, collapse = ", ")) + } + + if (!is.null(snap_interval) && (!is.numeric(snap_interval) || snap_interval < 0)) { + stop("snap_interval must be a positive numeric value or NULL") + } + + # Build command arguments + args <- "-clean" + + if (!is.null(gap_fill_area)) { + args <- c(args, paste0("gap-fill-area=", format(gap_fill_area, scientific = FALSE))) + } + + if (sliver_control != 1) { + args <- c(args, paste0("sliver-control=", format(sliver_control, scientific = FALSE))) + } + + if (overlap_rule != "max-area") { + args <- c(args, paste0("overlap-rule=", overlap_rule)) + } + + if (allow_overlaps) { + args <- c(args, "allow-overlaps") + } + + if (!is.null(snap_interval)) { + args <- c(args, paste0("snap-interval=", format(snap_interval, scientific = FALSE))) + } + + if (rewind) { + args <- c(args, "rewind") + } + + if (allow_empty) { + args <- c(args, "allow-empty") + } + + as.list(args) +} diff --git a/man/ms_clean.Rd b/man/ms_clean.Rd new file mode 100644 index 00000000..f95d48c1 --- /dev/null +++ b/man/ms_clean.Rd @@ -0,0 +1,137 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean.R +\name{ms_clean} +\alias{ms_clean} +\title{Clean geometries using mapshaper} +\usage{ +ms_clean( + input, + gap_fill_area = NULL, + sliver_control = 1, + overlap_rule = "max-area", + allow_overlaps = FALSE, + snap_interval = NULL, + rewind = FALSE, + allow_empty = FALSE, + ... +) +} +\arguments{ +\item{input}{spatial object to clean. One of: +\itemize{ +\item \code{geo_json} or \code{character} polygons, lines, or points; +\item \code{SpatialPolygons*}, \code{SpatialLines*}, or \code{SpatialPoints*}; +\item \code{sf} or \code{sfc} polygons, lines, or points object +}} + +\item{gap_fill_area}{(polygons) Gaps smaller than this area will be filled; +larger gaps will be retained as holes in the polygon mosaic. Numeric value +in source units. Default \code{NULL} uses mapshaper's dynamic calculation.} + +\item{sliver_control}{(polygons) Preferentially remove slivers (polygons with +a high perimeter-area ratio). Accepts values from 0-1, default is 1. +Multiplies the area of gap areas by the "Polsby Popper" compactness metric +before applying area threshold.} + +\item{overlap_rule}{(polygons) Assign overlapping polygon areas to one of the +overlapping features based on this rule. One of: "min-id", "max-id", +"min-area", "max-area". Default is "max-area".} + +\item{allow_overlaps}{Allow features to overlap each other. The default +behavior is to remove overlaps (default \code{FALSE}).} + +\item{snap_interval}{Snap vertices within a given threshold before performing +other kinds of geometry repair. Defaults to a very small threshold. Uses +source units. Default \code{NULL}.} + +\item{rewind}{Fix errors in the winding order of polygon rings (default +\code{FALSE}).} + +\item{allow_empty}{Allow null geometries, which are removed by default +(default \code{FALSE}).} + +\item{...}{ + Arguments passed on to \code{\link[=apply_mapshaper_commands]{apply_mapshaper_commands}} + \describe{ + \item{\code{force_FC}}{should the output be forced to be a FeatureCollection (or sf +object or Spatial*DataFrame) even if there are no attributes? Default +\code{TRUE}. If FALSE and there are no attributes associated with the +geometries, a GeometryCollection (or Spatial object with no dataframe, or +sfc) will be output.} + \item{\code{sys}}{Should the system mapshaper be used instead of the bundled +mapshaper? Gives better performance on large files. Requires the mapshaper +node package to be installed and on the PATH.} + \item{\code{sys_mem}}{How much memory (in GB) should be allocated if using the +system mapshaper (\code{sys = TRUE})? Default 8. Ignored if \code{sys = FALSE}. This +can also be set globally with the option \code{"mapshaper.sys_mem"}} + \item{\code{quiet}}{If \code{sys = TRUE}, should the mapshaper messages be silenced? +Default \code{FALSE}. This can also be set globally with the option +\code{"mapshaper.sys_quiet"}} + \item{\code{gj2008}}{Generate output that is consistent with the pre-RFC 7946 +GeoJSON spec (dating to 2008). Polygon rings are CW and holes are CCW, +which is the opposite of the default RFC 7946-compatible output. This should +be rarely needed, though may be useful when preparing data for D3-based +data visualizations (such as \code{plotly::plot_ly()}). Default \code{FALSE}} + }} +} +\value{ +a cleaned representation of the geometry in the same class as the +input +} +\description{ +Uses \href{https://github.com/mbloch/mapshaper}{mapshaper} to clean +geometries by removing overlaps, filling gaps, and repairing various kinds +of abnormal geometry. +} +\details{ +Features with null geometries are deleted, unless the \code{allow_empty} flag +is used. + +Polygon features are cleaned by removing overlaps and filling small gaps +between adjacent polygons. Only gaps that are completely enclosed can be +filled. Areas that are contained by more than one polygon (overlaps) are +assigned to the polygon with the largest area by default. Similarly, gaps are +assigned to the largest-area polygon. + +Line features are cleaned by removing self-intersections within the same path. +Self-intersecting paths are split at the point of intersection and converted +into multiple paths within the same feature. When two separate paths intersect +in-between segment endpoints, new vertices are inserted at the point of +intersection. + +Point features are cleaned by removing duplicate coordinates within the same +feature. +} +\examples{ +library(rmapshaper) + +# Example with overlapping polygons +overlapping_poly <- structure('{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {"id": 1}, + "geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [2, 0], [2, 2], [0, 2], [0, 0]]] + } + }, + { + "type": "Feature", + "properties": {"id": 2}, + "geometry": { + "type": "Polygon", + "coordinates": [[[1, 1], [3, 1], [3, 3], [1, 3], [1, 1]]] + } + } + ] +}', class = c("geojson", "json")) + +# Clean overlapping polygons +ms_clean(overlapping_poly) + +# Clean with specific overlap rule +ms_clean(overlapping_poly, overlap_rule = "min-area") + +} diff --git a/tests/testthat/test-clean.R b/tests/testthat/test-clean.R new file mode 100644 index 00000000..db7e2403 --- /dev/null +++ b/tests/testthat/test-clean.R @@ -0,0 +1,207 @@ +test_that("ms_clean works with geojson/character input", { + # Simple polygon with no issues + simple_poly <- structure('{ + "type": "Feature", + "properties": {}, + "geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]] + } + }', class = c("geojson", "json")) + + result <- ms_clean(simple_poly) + expect_s3_class(result, "geojson") + expect_true(jsonify::validate_json(result)) + + # Test character input + expect_equal(ms_clean(as.character(simple_poly)), result) +}) + +test_that("ms_clean works with overlapping polygons", { + overlapping_poly <- structure('{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {"id": 1}, + "geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [2, 0], [2, 2], [0, 2], [0, 0]]] + } + }, + { + "type": "Feature", + "properties": {"id": 2}, + "geometry": { + "type": "Polygon", + "coordinates": [[[1, 1], [3, 1], [3, 3], [1, 3], [1, 1]]] + } + } + ] + }', class = c("geojson", "json")) + + result <- ms_clean(overlapping_poly) + expect_s3_class(result, "geojson") + expect_true(jsonify::validate_json(result)) + + # Test with different overlap rules + result_min_area <- ms_clean(overlapping_poly, overlap_rule = "min-area") + expect_s3_class(result_min_area, "geojson") + expect_true(jsonify::validate_json(result_min_area)) + + # Test allow_overlaps + result_allow <- ms_clean(overlapping_poly, allow_overlaps = TRUE) + expect_s3_class(result_allow, "geojson") + expect_true(jsonify::validate_json(result_allow)) +}) + +test_that("ms_clean works with sf objects", { + skip_if_not_installed("sf") + + # Create sf object with overlapping polygons + library(sf) + + poly1 <- st_polygon(list(rbind(c(0,0), c(2,0), c(2,2), c(0,2), c(0,0)))) + poly2 <- st_polygon(list(rbind(c(1,1), c(3,1), c(3,3), c(1,3), c(1,1)))) + + sf_obj <- st_sf( + id = c(1, 2), + geometry = st_sfc(poly1, poly2) + ) + + result <- ms_clean(sf_obj) + expect_s3_class(result, "sf") + expect_true(all(st_is_valid(result))) + + # Test with parameters + result_params <- ms_clean(sf_obj, sliver_control = 0.5, rewind = TRUE) + expect_s3_class(result_params, "sf") + expect_true(all(st_is_valid(result_params))) +}) + +test_that("ms_clean works with sfc objects", { + skip_if_not_installed("sf") + + library(sf) + + poly1 <- st_polygon(list(rbind(c(0,0), c(1,0), c(1,1), c(0,1), c(0,0)))) + sfc_obj <- st_sfc(poly1) + + result <- ms_clean(sfc_obj) + expect_s3_class(result, "sfc") + expect_true(all(st_is_valid(result))) +}) + +test_that("ms_clean works with Spatial objects", { + skip_if_not_installed("sp") + skip_if_not_installed("rgeos") + + library(sp) + + # Create simple SpatialPolygons + coords <- rbind(c(0,0), c(1,0), c(1,1), c(0,1), c(0,0)) + poly <- Polygon(coords) + polys <- Polygons(list(poly), ID = "1") + sp_obj <- SpatialPolygons(list(polys)) + + result <- ms_clean(sp_obj) + expect_s4_class(result, "SpatialPolygons") + + # Test with SpatialPolygonsDataFrame + spdf_obj <- SpatialPolygonsDataFrame(sp_obj, data.frame(id = 1)) + result_df <- ms_clean(spdf_obj) + expect_s4_class(result_df, "SpatialPolygonsDataFrame") +}) + +test_that("ms_clean works with line features", { + # Self-intersecting line + line_geojson <- structure('{ + "type": "Feature", + "properties": {}, + "geometry": { + "type": "LineString", + "coordinates": [[0, 0], [2, 0], [1, 1], [1, -1]] + } + }', class = c("geojson", "json")) + + result <- ms_clean(line_geojson) + expect_s3_class(result, "geojson") + expect_true(jsonify::validate_json(result)) +}) + +test_that("ms_clean works with point features", { + # Points with duplicates + points_geojson <- structure('{ + "type": "Feature", + "properties": {}, + "geometry": { + "type": "MultiPoint", + "coordinates": [[0, 0], [0, 0], [1, 1]] + } + }', class = c("geojson", "json")) + + result <- ms_clean(points_geojson) + expect_s3_class(result, "geojson") + expect_true(jsonify::validate_json(result)) +}) + +test_that("ms_clean parameter validation works", { + simple_poly <- structure('{ + "type": "Feature", + "properties": {}, + "geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]] + } + }', class = c("geojson", "json")) + + # Test invalid gap_fill_area + expect_error(ms_clean(simple_poly, gap_fill_area = -1)) + expect_error(ms_clean(simple_poly, gap_fill_area = "invalid")) + + # Test invalid sliver_control + expect_error(ms_clean(simple_poly, sliver_control = -1)) + expect_error(ms_clean(simple_poly, sliver_control = 2)) + + # Test invalid overlap_rule + expect_error(ms_clean(simple_poly, overlap_rule = "invalid")) + + # Test invalid snap_interval + expect_error(ms_clean(simple_poly, snap_interval = -1)) + expect_error(ms_clean(simple_poly, snap_interval = "invalid")) +}) + +test_that("ms_clean works with various parameter combinations", { + simple_poly <- structure('{ + "type": "Feature", + "properties": {}, + "geometry": { + "type": "Polygon", + "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]] + } + }', class = c("geojson", "json")) + + # Test with gap_fill_area + result1 <- ms_clean(simple_poly, gap_fill_area = 100) + expect_s3_class(result1, "geojson") + + # Test with sliver_control + result2 <- ms_clean(simple_poly, sliver_control = 0.5) + expect_s3_class(result2, "geojson") + + # Test with snap_interval + result3 <- ms_clean(simple_poly, snap_interval = 0.01) + expect_s3_class(result3, "geojson") + + # Test with rewind and allow_empty + result4 <- ms_clean(simple_poly, rewind = TRUE, allow_empty = TRUE) + expect_s3_class(result4, "geojson") + + # Test with multiple parameters + result5 <- ms_clean(simple_poly, + gap_fill_area = 50, + sliver_control = 0.8, + overlap_rule = "min-id", + snap_interval = 0.001) + expect_s3_class(result5, "geojson") +})