From 42c4173af795ef0e543e4ed5e9d4bb0763b0280f Mon Sep 17 00:00:00 2001 From: Anna Petrasova Date: Thu, 16 Apr 2026 15:15:55 -0400 Subject: [PATCH 1/3] v.geometry: new wrapper tool based on v.to.db --- scripts/CMakeLists.txt | 1 + scripts/Makefile | 1 + scripts/v.geometry/Makefile | 7 + scripts/v.geometry/tests/conftest.py | 60 +++++ scripts/v.geometry/tests/v_geometry_test.py | 125 ++++++++++ scripts/v.geometry/v.geometry.html | 87 +++++++ scripts/v.geometry/v.geometry.md | 99 ++++++++ scripts/v.geometry/v.geometry.py | 260 ++++++++++++++++++++ 8 files changed, 640 insertions(+) create mode 100644 scripts/v.geometry/Makefile create mode 100644 scripts/v.geometry/tests/conftest.py create mode 100644 scripts/v.geometry/tests/v_geometry_test.py create mode 100644 scripts/v.geometry/v.geometry.html create mode 100644 scripts/v.geometry/v.geometry.md create mode 100644 scripts/v.geometry/v.geometry.py diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 22fd18a10e2..6587fa5fd03 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -67,6 +67,7 @@ set(script_DIRS v.db.univar v.db.update v.dissolve + v.geometry v.import v.in.e00 v.in.geonames diff --git a/scripts/Makefile b/scripts/Makefile index dc3d80f4209..a171274b07b 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -69,6 +69,7 @@ SUBDIRS = \ v.db.univar \ v.db.update \ v.dissolve \ + v.geometry \ v.import \ v.in.e00 \ v.in.geonames \ diff --git a/scripts/v.geometry/Makefile b/scripts/v.geometry/Makefile new file mode 100644 index 00000000000..0abe9b749e7 --- /dev/null +++ b/scripts/v.geometry/Makefile @@ -0,0 +1,7 @@ +MODULE_TOPDIR = ../.. + +PGM = v.geometry + +include $(MODULE_TOPDIR)/include/Make/Script.make + +default: script diff --git a/scripts/v.geometry/tests/conftest.py b/scripts/v.geometry/tests/conftest.py new file mode 100644 index 00000000000..f22acb5b0be --- /dev/null +++ b/scripts/v.geometry/tests/conftest.py @@ -0,0 +1,60 @@ +"""Fixtures for v.geometry tests.""" + +import io +import os +from types import SimpleNamespace + +import pytest + +import grass.script as gs +from grass.tools import Tools + + +LINE_ASCII = """\ +ORGANIZATION: GRASS Test +DIGIT DATE: today +DIGIT NAME: test +MAP NAME: line +MAP DATE: today +MAP SCALE: 1 +OTHER INFO: +ZONE: 0 +MAP THRESH: 0.500000 +VERTI: +L 2 1 + 0 0 + 100 100 + 1 1 +""" + + +@pytest.fixture(scope="module") +def session(tmp_path_factory): + """Session with a 2x2 rectangle grid, a set of points, and a straight line.""" + tmp_path = tmp_path_factory.mktemp("v_geometry") + project = tmp_path / "test" + gs.create_project(project) + with gs.setup.init(project, env=os.environ.copy()) as grass_session: + tools = Tools(session=grass_session) + tools.g_region(s=0, n=100, w=0, e=100, res=1) + + # 2x2 grid of 50x50 rectangles over (0,0)-(100,100). + tools.v_mkgrid(map="grid", grid=(2, 2)) + + # Three points at known coordinates. + points_ascii = "10|20\n30|40\n50|60\n" + tools.v_in_ascii( + input=io.StringIO(points_ascii), + output="points", + format="point", + separator="pipe", + ) + + # Single straight line from (0,0) to (100,100) with category 1. + tools.v_in_ascii( + input=io.StringIO(LINE_ASCII), + output="line", + format="standard", + ) + + yield SimpleNamespace(session=grass_session) diff --git a/scripts/v.geometry/tests/v_geometry_test.py b/scripts/v.geometry/tests/v_geometry_test.py new file mode 100644 index 00000000000..8835570654e --- /dev/null +++ b/scripts/v.geometry/tests/v_geometry_test.py @@ -0,0 +1,125 @@ +"""Tests for v.geometry.""" + +import math + +import pytest + +from grass.tools import Tools + + +def test_area_metrics(session): + """Area-family metrics on a 2x2 grid of 50x50 squares. + + Exercises area, perimeter, compactness, fractal_dimension, bbox in + one call so the merged-record and key-rename behaviors are both + covered without paying for extra subprocess spawns. + """ + tools = Tools(session=session.session) + result = tools.v_geometry( + map="grid", metric="area,perimeter,compactness,fractal_dimension,bbox" + ) + assert len(result["records"]) == 4 + expected_compactness = 200.0 / (2.0 * math.sqrt(math.pi * 2500.0)) + for record in result["records"]: + assert record["area"] == pytest.approx(2500.0) + assert record["perimeter"] == pytest.approx(200.0) + assert record["compactness"] == pytest.approx(expected_compactness, rel=1e-6) + assert "compact" not in record + assert "fractal_dimension" in record + assert "fd" not in record + assert {"north", "south", "east", "west"} <= set(record.keys()) + assert result["units"]["area"] == "square meters" + assert result["units"]["perimeter"] == "meters" + + +def test_area_hectares(session): + tools = Tools(session=session.session) + result = tools.v_geometry(map="grid", metric="area", units="hectares") + for record in result["records"]: + assert record["area"] == pytest.approx(0.25) + assert result["units"]["area"] == "hectares" + + +def test_line_metrics(session): + """Line-family metrics on a straight line from (0,0) to (100,100).""" + tools = Tools(session=session.session) + result = tools.v_geometry(map="line", metric="length,sinuosity,azimuth") + record = result["records"][0] + assert record["length"] == pytest.approx(math.sqrt(2.0) * 100.0, rel=1e-6) + # A straight line has sinuosity 1. + assert record["sinuosity"] == pytest.approx(1.0, rel=1e-6) + assert "sinuous" not in record + assert "azimuth" in record + + +def test_count_totals_flag(session): + tools = Tools(session=session.session) + result = tools.v_geometry(map="points", metric="count", flags="c") + assert result["totals"]["count"] == 3 + + +def test_coordinates(session): + tools = Tools(session=session.session) + result = tools.v_geometry(map="points", metric="coordinates") + coords = {(record["x"], record["y"]) for record in result["records"]} + assert coords == {(10.0, 20.0), (30.0, 40.0), (50.0, 60.0)} + + +def test_multiple_metrics_totals(session): + """Totals from different metrics are merged.""" + tools = Tools(session=session.session) + result = tools.v_geometry( + map="grid", metric="area,count", type="centroid", flags="c" + ) + assert result["totals"]["area"] == pytest.approx(10000.0) + assert result["totals"]["count"] == 4 + + +def test_per_metric_units(session): + """Each metric gets its own unit via positional correspondence.""" + tools = Tools(session=session.session) + result = tools.v_geometry( + map="grid", metric="area,perimeter", units="hectares,kilometers" + ) + assert result["units"]["area"] == "hectares" + assert result["units"]["perimeter"] == "kilometers" + for record in result["records"]: + assert record["area"] == pytest.approx(0.25) + assert record["perimeter"] == pytest.approx(0.2) + + +def test_partial_units(session): + """Fewer units than metrics: extra metrics use defaults.""" + tools = Tools(session=session.session) + result = tools.v_geometry(map="grid", metric="area,perimeter", units="hectares") + assert result["units"]["area"] == "hectares" + assert result["units"]["perimeter"] == "meters" + + +def test_plain_format(session): + tools = Tools(session=session.session) + result = tools.v_geometry(map="grid", metric="area", format="plain") + lines = result.stdout.splitlines() + assert lines[0] == "category|area" + assert len(lines) == 5 + + +def test_csv_format(session): + tools = Tools(session=session.session) + result = tools.v_geometry(map="grid", metric="area", format="csv") + lines = result.stdout.splitlines() + assert lines[0] == "category,area" + assert len(lines) == 5 + + +def test_csv_rejects_multichar_separator(session): + tools = Tools(session=session.session) + with pytest.raises(Exception, match="CSV separator"): + tools.v_geometry(map="grid", metric="area", format="csv", separator="--") + + +def test_mixed_feature_types_rejected(session): + """Mixing metrics from different feature-type families fails cleanly.""" + tools = Tools(session=session.session) + with pytest.raises(Exception, match="different feature types"): + tools.v_geometry(map="grid", metric="area,length") diff --git a/scripts/v.geometry/v.geometry.html b/scripts/v.geometry/v.geometry.html new file mode 100644 index 00000000000..9e33f07a5e9 --- /dev/null +++ b/scripts/v.geometry/v.geometry.html @@ -0,0 +1,87 @@ +

DESCRIPTION

+ +v.geometry prints geometry metrics of vector features. Output +is available as JSON, CSV, or plain text. + +

One or more metric values can be requested per invocation. When +multiple metrics are given, they are computed in parallel and the +results are merged by category. Supported metrics are: + +

+ +

Values are aggregated per category. + +

Measures of lengths and areas are reported in meters by default; use +the units option to change this. + +

NOTES

+ +v.geometry is a read-only front-end to +v.to.db and accepts the same set of +units. It does not read from or write to the attribute table, so no +table needs to be attached to the input map. + +

For writing metrics back into an attribute table, use +v.to.db directly. Features of +v.to.db that are not purely geometric, such as cross-layer +attribute queries (option=query), are intentionally not +exposed here. + +

Records are keyed by category. When multiple metrics are requested, +the values for each category are merged into one record. If a category +is shared across features of different types (for example a line and +an area with the same cat), their metrics would end up in the same +record even though they describe different features. To avoid this, +v.geometry rejects combinations of metrics that belong to +different feature-type families (area, line, point); the +count metric is universal and can be combined with any of +them. Run v.geometry separately for each feature type if you +need metrics from more than one family. + +

EXAMPLES

+ +Report area sizes of geology polygons in hectares: + +
+v.geometry map=geology metric=area units=hectares
+
+ +Compute area, perimeter, and compactness in a single call: + +
+v.geometry map=geology metric=area,perimeter,compactness
+
+ +Consume metrics from Python: + +
+import grass.script as gs
+
+data = gs.parse_command("v.geometry", map="geology", metric="compactness")
+for record in data["records"]:
+    print(record["category"], record["compactness"])
+
+ +

SEE ALSO

+ + +r.object.geometry, +v.category, +v.db.join, +v.report, +v.to.db, +v.univar + + +

AUTHORS

+ +Anna Petrasova, NC State University diff --git a/scripts/v.geometry/v.geometry.md b/scripts/v.geometry/v.geometry.md new file mode 100644 index 00000000000..b9e302c7ee3 --- /dev/null +++ b/scripts/v.geometry/v.geometry.md @@ -0,0 +1,99 @@ +## DESCRIPTION + +*v.geometry* prints geometry metrics of vector features. Output is +available as JSON, CSV, or plain text. + +One or more **metric** values can be requested per invocation. When +multiple metrics are given, they are computed in parallel and the +results are merged by category. Supported metrics are: + +- `area`, `perimeter`, `compactness`, `fractal_dimension`, `bbox` - + for areas (and boundaries that form areas) +- `length`, `slope`, `sinuosity`, `azimuth`, `start`, `end` - for + lines and boundaries +- `coordinates` - for points and centroids +- `count` - number of features per category + +Values are aggregated per category. + +Measures of lengths and areas are reported in meters by default; use +the **units** option to change this. + +## NOTES + +*v.geometry* is a read-only front-end to *[v.to.db](v.to.db.md)* and +accepts the same set of units. It does not read from or write to the +attribute table, so no table needs to be attached to the input map. + +For writing metrics back into an attribute table, use +*[v.to.db](v.to.db.md)* directly. Features of *v.to.db* that are not +purely geometric, such as cross-layer attribute queries +(`option=query`), are intentionally not exposed here. + +Records are keyed by category. When multiple metrics are requested, the +values for each category are merged into one record. If a category is +shared across features of different types (for example a line and an +area with the same cat), their metrics would end up in the same record +even though they describe different features. To avoid this, *v.geometry* +rejects combinations of metrics that belong to different feature-type +families (area, line, point); the `count` metric is universal and can +be combined with any of them. Run *v.geometry* separately for each +feature type if you need metrics from more than one family. + +## EXAMPLES + +Report area sizes of geology polygons in hectares: + +```sh +v.geometry map=geology metric=area units=hectares +``` + +Compute area, perimeter, and compactness in a single call: + +```sh +v.geometry map=geology metric=area,perimeter,compactness +``` + +Consume metrics from Python: + +```python +import grass.script as gs + +data = gs.parse_command("v.geometry", map="geology", metric="compactness") +for record in data["records"]: + print(record["category"], record["compactness"]) +``` + +Sample JSON output for `metric=length`: + +```json +{ + "units": { + "length": "meters" + }, + "totals": { + "length": 10426.657857419743 + }, + "records": [ + { + "category": 1, + "length": 4554.943058982206 + }, + { + "category": 2, + "length": 5871.714798437538 + } + ] +} +``` + +## SEE ALSO + +*[r.object.geometry](r.object.geometry.md), +[v.category](v.category.md), [v.db.join](v.db.join.md), +[v.report](v.report.md), [v.to.db](v.to.db.md), +[v.univar](v.univar.md)* + +## AUTHORS + +Anna Petrasova, NC State University diff --git a/scripts/v.geometry/v.geometry.py b/scripts/v.geometry/v.geometry.py new file mode 100644 index 00000000000..d5bc1be39b8 --- /dev/null +++ b/scripts/v.geometry/v.geometry.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 + +############################################################################ +# +# MODULE: v.geometry +# AUTHOR: Anna Petrasova +# PURPOSE: Print geometry metrics of vector features as JSON +# COPYRIGHT: (C) 2026 by Anna Petrasova and the GRASS Development Team +# This program is free software under the GNU General +# Public License (>=v2). Read the file COPYING that +# comes with GRASS for details. +# +############################################################################# + +# %module +# % description: Prints geometry metrics of vector features. +# % keyword: vector +# % keyword: geometry +# % keyword: metric +# %end + +# %option G_OPT_V_MAP +# %end + +# %option +# % key: metric +# % type: string +# % required: yes +# % multiple: yes +# % options: area,perimeter,length,count,compactness,fractal_dimension,slope,sinuosity,azimuth,coordinates,start,end,bbox +# % description: Geometry metric(s) to compute +# % descriptions: area;area size;perimeter;perimeter length of an area;length;line length;count;number of features for each category;compactness;compactness of an area, calculated as perimeter / (2 * sqrt(PI * area));fractal_dimension;fractal dimension of boundary defining a polygon, calculated as 2 * (log(perimeter) / log(area));slope;slope steepness of vector line or boundary;sinuosity;line sinuosity, calculated as line length / distance between end points;azimuth;line azimuth, calculated as angle between North direction and endnode direction at startnode;coordinates;point coordinates, X,Y or X,Y,Z;start;line/boundary starting point coordinates, X,Y or X,Y,Z;end;line/boundary end point coordinates, X,Y or X,Y,Z;bbox;bounding box of area, N,S,E,W +# %end + +# %option G_OPT_V_TYPE +# % options: point,line,boundary,centroid +# % answer: point,line,boundary,centroid +# %end + +# %option G_OPT_V_FIELD +# %end + +# %option G_OPT_M_UNITS +# % multiple: yes +# % options: miles,feet,meters,kilometers,acres,hectares,radians,degrees +# % description: Units (one per metric, positional; unspecified metrics use defaults) +# %end + +# %option G_OPT_F_SEP +# % answer: {NULL} +# %end + +# %option G_OPT_F_FORMAT +# % options: plain,json,csv +# % answer: json +# % descriptions: plain;Plain text with pipe separator by default;json;JSON (JavaScript Object Notation);csv;CSV (Comma Separated Values) +# %end + +# %flag +# % key: c +# % description: Include totals in the output where supported by the metric +# %end + +import csv +import json +import sys +from concurrent.futures import ThreadPoolExecutor + +import grass.script as gs +from grass.tools import Tools + + +# Map v.geometry metric names to v.to.db option names. Most are identical; +# a few are renamed for clarity (e.g. "compact" -> "compactness"). +METRIC_TO_VTODB_OPTION = { + "area": "area", + "perimeter": "perimeter", + "length": "length", + "count": "count", + "compactness": "compact", + "fractal_dimension": "fd", + "slope": "slope", + "sinuosity": "sinuous", + "azimuth": "azimuth", + "coordinates": "coor", + "start": "start", + "end": "end", + "bbox": "bbox", +} + +# Keys that v.to.db emits in its JSON output which v.geometry renames to +# match the user-facing metric names above. +_VTODB_KEY_RENAMES = { + "compact": "compactness", + "fd": "fractal_dimension", + "sinuous": "sinuosity", +} + +# Group each metric by the feature type it describes. Records from different +# metrics are merged by category, so mixing metrics from different groups +# (e.g. a line's sinuosity and an area's perimeter) would silently combine +# unrelated features that happen to share a category. "count" applies to any +# feature type and may be combined with any group. +METRIC_GROUPS = { + "area": "area", + "perimeter": "area", + "compactness": "area", + "fractal_dimension": "area", + "bbox": "area", + "length": "line", + "slope": "line", + "sinuosity": "line", + "azimuth": "line", + "start": "line", + "end": "line", + "coordinates": "point", + "count": "any", +} + + +def _rename_keys(mapping): + return {_VTODB_KEY_RENAMES.get(k, k): v for k, v in mapping.items()} + + +def _run_vtodb(metric, unit, common_kwargs): + """Run v.to.db for a single metric and return the parsed JSON result.""" + vtodb_option = METRIC_TO_VTODB_OPTION[metric] + kwargs = dict(common_kwargs) + if unit: + kwargs["units"] = unit + result = Tools().v_to_db(option=vtodb_option, format="json", **kwargs) + return result.json + + +def _merge_results(results): + """Merge per-metric v.to.db results into a single JSON structure. + + Each metric contributes its keys to every record (matched by category), + and its entries to the shared ``units`` and ``totals`` dicts. The + ``results`` list must be in the caller's metric order so that the + resulting record field order is deterministic. + """ + merged_units = {} + merged_totals = {} + # category -> merged record dict + records_by_cat = {} + + for result in results: + merged_units.update(_rename_keys(result.get("units", {}))) + merged_totals.update(_rename_keys(result.get("totals", {}))) + for record in result.get("records", []): + record = _rename_keys(record) + cat = record["category"] + if cat in records_by_cat: + records_by_cat[cat].update(record) + else: + records_by_cat[cat] = dict(record) + + # Preserve category order. + records = [records_by_cat[cat] for cat in sorted(records_by_cat)] + return {"units": merged_units, "totals": merged_totals, "records": records} + + +def main(): + options, flags = gs.parser() + + metrics = options["metric"].split(",") + groups = {METRIC_GROUPS[m] for m in metrics} - {"any"} + if len(groups) > 1: + gs.fatal( + _( + "Cannot mix metrics from different feature types: {}. " + "Results are merged by category, so combining e.g. line and " + "area metrics would produce misleading records. Run " + "v.geometry separately for each feature type." + ).format( + ", ".join( + "{} ({})".format(m, METRIC_GROUPS[m]) + for m in metrics + if METRIC_GROUPS[m] != "any" + ) + ) + ) + + units_list = options["units"].split(",") if options["units"] else [] + if len(units_list) > len(metrics): + gs.fatal( + _("More units ({}) than metrics ({}) specified").format( + len(units_list), len(metrics) + ) + ) + # Pad with None so every metric has a corresponding entry. + units_list.extend([None] * (len(metrics) - len(units_list))) + + flag_str = "p" + if flags["c"]: + flag_str += "c" + + # v.to.db requires the "columns" parameter even in print-only mode, but + # does not use it for JSON or plain output labels; any valid name works. + common_kwargs = { + "map": options["map"], + "type": options["type"], + "layer": options["layer"], + "columns": "value", + "flags": flag_str, + } + + if len(metrics) == 1: + results = [_run_vtodb(metrics[0], units_list[0], common_kwargs)] + else: + # Submit all metrics concurrently but collect results in metric + # order so downstream column/field ordering is deterministic. + with ThreadPoolExecutor() as executor: + futures = [ + executor.submit(_run_vtodb, m, u, common_kwargs) + for m, u in zip(metrics, units_list, strict=True) + ] + results = [f.result() for f in futures] + + result = _merge_results(results) + + output_format = options["format"] + if output_format == "json": + print(json.dumps(result, indent=4)) + return 0 + + separator = gs.separator(options["separator"]) + records = result["records"] + if not records: + return 0 + columns = list(records[0].keys()) + + if output_format == "csv": + if not separator: + separator = "," + elif len(separator) > 1: + gs.fatal( + _( + "A standard CSV separator (delimiter) is only one character " + "long, got: {}" + ).format(separator) + ) + # Force LF endings; csv.writer defaults to CRLF, which compounds + # with text-mode stdout's newline translation on some platforms. + writer = csv.writer(sys.stdout, delimiter=separator, lineterminator="\n") + writer.writerow(columns) + writer.writerows([record.get(c, "") for c in columns] for record in records) + else: # plain + if not separator: + separator = "|" + print(separator.join(columns)) + for record in records: + print(separator.join(str(record.get(c, "")) for c in columns)) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From f43f7e09e50bc7c4b45d7a297420d1bddef4657f Mon Sep 17 00:00:00 2001 From: Anna Petrasova Date: Sat, 18 Apr 2026 00:00:10 -0400 Subject: [PATCH 2/3] further improvements --- scripts/v.geometry/tests/v_geometry_test.py | 27 +++++++--- scripts/v.geometry/v.geometry.html | 2 +- scripts/v.geometry/v.geometry.md | 2 +- scripts/v.geometry/v.geometry.py | 58 +++++++++++++++------ 4 files changed, 64 insertions(+), 25 deletions(-) diff --git a/scripts/v.geometry/tests/v_geometry_test.py b/scripts/v.geometry/tests/v_geometry_test.py index 8835570654e..e4271b368ce 100644 --- a/scripts/v.geometry/tests/v_geometry_test.py +++ b/scripts/v.geometry/tests/v_geometry_test.py @@ -52,9 +52,9 @@ def test_line_metrics(session): assert "azimuth" in record -def test_count_totals_flag(session): +def test_count_totals(session): tools = Tools(session=session.session) - result = tools.v_geometry(map="points", metric="count", flags="c") + result = tools.v_geometry(map="points", metric="count") assert result["totals"]["count"] == 3 @@ -68,9 +68,7 @@ def test_coordinates(session): def test_multiple_metrics_totals(session): """Totals from different metrics are merged.""" tools = Tools(session=session.session) - result = tools.v_geometry( - map="grid", metric="area,count", type="centroid", flags="c" - ) + result = tools.v_geometry(map="grid", metric="area,count", type="centroid") assert result["totals"]["area"] == pytest.approx(10000.0) assert result["totals"]["count"] == 4 @@ -105,11 +103,24 @@ def test_plain_format(session): def test_csv_format(session): + """Also verifies that nprocs does not affect output.""" tools = Tools(session=session.session) - result = tools.v_geometry(map="grid", metric="area", format="csv") - lines = result.stdout.splitlines() - assert lines[0] == "category,area" + serial = tools.v_geometry( + map="grid", + metric="area,perimeter,compactness", + format="csv", + nprocs=1, + ) + lines = serial.stdout.splitlines() + assert lines[0] == "category,area,perimeter,compactness" assert len(lines) == 5 + parallel = tools.v_geometry( + map="grid", + metric="area,perimeter,compactness", + format="csv", + nprocs=2, + ) + assert serial.stdout == parallel.stdout def test_csv_rejects_multichar_separator(session): diff --git a/scripts/v.geometry/v.geometry.html b/scripts/v.geometry/v.geometry.html index 9e33f07a5e9..85082fb558f 100644 --- a/scripts/v.geometry/v.geometry.html +++ b/scripts/v.geometry/v.geometry.html @@ -84,4 +84,4 @@

SEE ALSO

AUTHORS

-Anna Petrasova, NC State University +Anna Petrasova, NCSU GeoForAll Laboratory diff --git a/scripts/v.geometry/v.geometry.md b/scripts/v.geometry/v.geometry.md index b9e302c7ee3..422044eb00b 100644 --- a/scripts/v.geometry/v.geometry.md +++ b/scripts/v.geometry/v.geometry.md @@ -96,4 +96,4 @@ Sample JSON output for `metric=length`: ## AUTHORS -Anna Petrasova, NC State University +Anna Petrasova, NCSU GeoForAll Laboratory diff --git a/scripts/v.geometry/v.geometry.py b/scripts/v.geometry/v.geometry.py index d5bc1be39b8..d105b88eda4 100644 --- a/scripts/v.geometry/v.geometry.py +++ b/scripts/v.geometry/v.geometry.py @@ -17,6 +17,7 @@ # % keyword: vector # % keyword: geometry # % keyword: metric +# % keyword: parallel # %end # %option G_OPT_V_MAP @@ -46,6 +47,9 @@ # % description: Units (one per metric, positional; unspecified metrics use defaults) # %end +# %option G_OPT_M_NPROCS +# %end + # %option G_OPT_F_SEP # % answer: {NULL} # %end @@ -56,13 +60,9 @@ # % descriptions: plain;Plain text with pipe separator by default;json;JSON (JavaScript Object Notation);csv;CSV (Comma Separated Values) # %end -# %flag -# % key: c -# % description: Include totals in the output where supported by the metric -# %end - import csv import json +import os import sys from concurrent.futures import ThreadPoolExecutor @@ -122,6 +122,36 @@ def _rename_keys(mapping): return {_VTODB_KEY_RENAMES.get(k, k): v for k, v in mapping.items()} +def _available_cpus(): + """Number of CPUs this process may actually use. + + Prefers affinity-aware sources over ``os.cpu_count()``, which reports + the host total and overcounts in containers and cgroup-limited jobs. + """ + if hasattr(os, "process_cpu_count"): # Python 3.13+ + return os.process_cpu_count() or 1 + if hasattr(os, "sched_getaffinity"): # Linux + return len(os.sched_getaffinity(0)) + return os.cpu_count() or 1 + + +def _resolve_nprocs(nprocs): + """Resolve G_OPT_M_NPROCS into a worker count for ThreadPoolExecutor. + + Mirrors the semantics of G_set_omp_num_threads() in + lib/gis/omp_threads.c: 0 means use all available cores, a positive + number is used as-is, a negative number means cpu_count + nprocs + (clamped to at least 1). Belongs in a library helper eventually. + """ + nprocs = int(nprocs) + if nprocs > 0: + return nprocs + available = _available_cpus() + if nprocs == 0: + return available + return max(1, available + nprocs) + + def _run_vtodb(metric, unit, common_kwargs): """Run v.to.db for a single metric and return the parsed JSON result.""" vtodb_option = METRIC_TO_VTODB_OPTION[metric] @@ -162,7 +192,7 @@ def _merge_results(results): def main(): - options, flags = gs.parser() + options, _flags = gs.parser() metrics = options["metric"].split(",") groups = {METRIC_GROUPS[m] for m in metrics} - {"any"} @@ -192,26 +222,24 @@ def main(): # Pad with None so every metric has a corresponding entry. units_list.extend([None] * (len(metrics) - len(units_list))) - flag_str = "p" - if flags["c"]: - flag_str += "c" - # v.to.db requires the "columns" parameter even in print-only mode, but - # does not use it for JSON or plain output labels; any valid name works. + # does not use it for JSON or plain output; any valid name works. common_kwargs = { "map": options["map"], "type": options["type"], "layer": options["layer"], - "columns": "value", - "flags": flag_str, + "columns": "unused", + "flags": "p", } if len(metrics) == 1: results = [_run_vtodb(metrics[0], units_list[0], common_kwargs)] else: # Submit all metrics concurrently but collect results in metric - # order so downstream column/field ordering is deterministic. - with ThreadPoolExecutor() as executor: + # order so downstream column/field ordering is deterministic. Cap + # at len(metrics); extra workers just sit idle. + max_workers = min(_resolve_nprocs(options["nprocs"]), len(metrics)) + with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [ executor.submit(_run_vtodb, m, u, common_kwargs) for m, u in zip(metrics, units_list, strict=True) From c90cd5efea6bac6d0d07c8ff9963778bf983f2d9 Mon Sep 17 00:00:00 2001 From: Anna Petrasova Date: Sat, 18 Apr 2026 01:01:26 -0400 Subject: [PATCH 3/3] Align count with other metrics, move CSV separator check --- scripts/v.geometry/tests/conftest.py | 60 ++++++++++++++++++++- scripts/v.geometry/tests/v_geometry_test.py | 55 ++++++++++++++----- scripts/v.geometry/v.geometry.html | 6 +++ scripts/v.geometry/v.geometry.md | 6 +++ scripts/v.geometry/v.geometry.py | 47 +++++++++++----- 5 files changed, 147 insertions(+), 27 deletions(-) diff --git a/scripts/v.geometry/tests/conftest.py b/scripts/v.geometry/tests/conftest.py index f22acb5b0be..da609e83147 100644 --- a/scripts/v.geometry/tests/conftest.py +++ b/scripts/v.geometry/tests/conftest.py @@ -2,7 +2,6 @@ import io import os -from types import SimpleNamespace import pytest @@ -28,6 +27,56 @@ """ +# Three non-overlapping areas (two sharing cat 2, one cat 1) plus a line +# with cat 10. Boundaries carry no categories, so v.to.db option=count +# emits spurious cat=-1 records for them. Used to check that aligning +# count to the other metric's cat set drops the line and the cat=-1 +# records, and that counts for repeated cats aggregate correctly. +MIXED_ASCII = """\ +ORGANIZATION: GRASS Test +DIGIT DATE: today +DIGIT NAME: test +MAP NAME: mixed +MAP DATE: today +MAP SCALE: 1 +OTHER INFO: +ZONE: 0 +MAP THRESH: 0.500000 +VERTI: +B 5 + 0 0 + 50 0 + 50 50 + 0 50 + 0 0 +C 1 1 + 25 25 + 1 1 +B 5 + 60 0 + 100 0 + 100 50 + 60 50 + 60 0 +C 1 1 + 80 25 + 1 2 +B 5 + 0 60 + 50 60 + 50 100 + 0 100 + 0 60 +C 1 1 + 25 80 + 1 2 +L 2 1 + 70 70 + 90 90 + 1 10 +""" + + @pytest.fixture(scope="module") def session(tmp_path_factory): """Session with a 2x2 rectangle grid, a set of points, and a straight line.""" @@ -57,4 +106,11 @@ def session(tmp_path_factory): format="standard", ) - yield SimpleNamespace(session=grass_session) + # Mixed map: a line (cat 1) and a 50x50 area (cat 2). + tools.v_in_ascii( + input=io.StringIO(MIXED_ASCII), + output="mixed", + format="standard", + ) + + yield grass_session diff --git a/scripts/v.geometry/tests/v_geometry_test.py b/scripts/v.geometry/tests/v_geometry_test.py index e4271b368ce..5f3d927f50c 100644 --- a/scripts/v.geometry/tests/v_geometry_test.py +++ b/scripts/v.geometry/tests/v_geometry_test.py @@ -14,7 +14,7 @@ def test_area_metrics(session): one call so the merged-record and key-rename behaviors are both covered without paying for extra subprocess spawns. """ - tools = Tools(session=session.session) + tools = Tools(session=session) result = tools.v_geometry( map="grid", metric="area,perimeter,compactness,fractal_dimension,bbox" ) @@ -33,7 +33,7 @@ def test_area_metrics(session): def test_area_hectares(session): - tools = Tools(session=session.session) + tools = Tools(session=session) result = tools.v_geometry(map="grid", metric="area", units="hectares") for record in result["records"]: assert record["area"] == pytest.approx(0.25) @@ -42,7 +42,7 @@ def test_area_hectares(session): def test_line_metrics(session): """Line-family metrics on a straight line from (0,0) to (100,100).""" - tools = Tools(session=session.session) + tools = Tools(session=session) result = tools.v_geometry(map="line", metric="length,sinuosity,azimuth") record = result["records"][0] assert record["length"] == pytest.approx(math.sqrt(2.0) * 100.0, rel=1e-6) @@ -53,13 +53,13 @@ def test_line_metrics(session): def test_count_totals(session): - tools = Tools(session=session.session) + tools = Tools(session=session) result = tools.v_geometry(map="points", metric="count") assert result["totals"]["count"] == 3 def test_coordinates(session): - tools = Tools(session=session.session) + tools = Tools(session=session) result = tools.v_geometry(map="points", metric="coordinates") coords = {(record["x"], record["y"]) for record in result["records"]} assert coords == {(10.0, 20.0), (30.0, 40.0), (50.0, 60.0)} @@ -67,7 +67,7 @@ def test_coordinates(session): def test_multiple_metrics_totals(session): """Totals from different metrics are merged.""" - tools = Tools(session=session.session) + tools = Tools(session=session) result = tools.v_geometry(map="grid", metric="area,count", type="centroid") assert result["totals"]["area"] == pytest.approx(10000.0) assert result["totals"]["count"] == 4 @@ -75,7 +75,7 @@ def test_multiple_metrics_totals(session): def test_per_metric_units(session): """Each metric gets its own unit via positional correspondence.""" - tools = Tools(session=session.session) + tools = Tools(session=session) result = tools.v_geometry( map="grid", metric="area,perimeter", units="hectares,kilometers" ) @@ -88,14 +88,14 @@ def test_per_metric_units(session): def test_partial_units(session): """Fewer units than metrics: extra metrics use defaults.""" - tools = Tools(session=session.session) + tools = Tools(session=session) result = tools.v_geometry(map="grid", metric="area,perimeter", units="hectares") assert result["units"]["area"] == "hectares" assert result["units"]["perimeter"] == "meters" def test_plain_format(session): - tools = Tools(session=session.session) + tools = Tools(session=session) result = tools.v_geometry(map="grid", metric="area", format="plain") lines = result.stdout.splitlines() assert lines[0] == "category|area" @@ -104,7 +104,7 @@ def test_plain_format(session): def test_csv_format(session): """Also verifies that nprocs does not affect output.""" - tools = Tools(session=session.session) + tools = Tools(session=session) serial = tools.v_geometry( map="grid", metric="area,perimeter,compactness", @@ -124,13 +124,44 @@ def test_csv_format(session): def test_csv_rejects_multichar_separator(session): - tools = Tools(session=session.session) + tools = Tools(session=session) with pytest.raises(Exception, match="CSV separator"): tools.v_geometry(map="grid", metric="area", format="csv", separator="--") def test_mixed_feature_types_rejected(session): """Mixing metrics from different feature-type families fails cleanly.""" - tools = Tools(session=session.session) + tools = Tools(session=session) with pytest.raises(Exception, match="different feature types"): tools.v_geometry(map="grid", metric="area,length") + + +def test_count_combined_aligns_with_family(session): + """count combined with another metric filters to its cat set. + + Mixed map features: + - Area cat 1 (50x50 = 2500): one feature. + - Area cat 2: two non-overlapping 40x50 features (total area 4000, + count 2) — verifies that repeated cats aggregate correctly. + - Line cat 10: must be dropped from count (not in area's cats). + - Three un-categorized boundaries: v.to.db option=count reports + them as cat=-1, which must also be dropped. + + Expected after alignment: + - Records for cats 1 and 2 only, both carrying area and count. + - totals["count"] = 3 (1 for cat 1, 2 for cat 2), not polluted by + the line or the cat=-1 boundaries. + """ + tools = Tools(session=session) + result = tools.v_geometry(map="mixed", metric="area,count") + + records_by_cat = {record["category"]: record for record in result["records"]} + assert set(records_by_cat) == {1, 2} + + assert records_by_cat[1]["area"] == pytest.approx(2500.0) + assert records_by_cat[1]["count"] == 1 + assert records_by_cat[2]["area"] == pytest.approx(4000.0) + assert records_by_cat[2]["count"] == 2 + + assert result["totals"]["area"] == pytest.approx(6500.0) + assert result["totals"]["count"] == 3 diff --git a/scripts/v.geometry/v.geometry.html b/scripts/v.geometry/v.geometry.html index 85082fb558f..9d4b8a42d11 100644 --- a/scripts/v.geometry/v.geometry.html +++ b/scripts/v.geometry/v.geometry.html @@ -47,6 +47,12 @@

NOTES

them. Run v.geometry separately for each feature type if you need metrics from more than one family. +

When count is combined with another metric (for example +metric=area,count), the count is aligned to the categories +covered by that metric. Categories that only appear in the count result +are dropped so every merged record carries both values, and the reported +count total reflects only the aligned features. +

EXAMPLES

Report area sizes of geology polygons in hectares: diff --git a/scripts/v.geometry/v.geometry.md b/scripts/v.geometry/v.geometry.md index 422044eb00b..98b5a0265fe 100644 --- a/scripts/v.geometry/v.geometry.md +++ b/scripts/v.geometry/v.geometry.md @@ -40,6 +40,12 @@ families (area, line, point); the `count` metric is universal and can be combined with any of them. Run *v.geometry* separately for each feature type if you need metrics from more than one family. +When `count` is combined with another metric (for example +`metric=area,count`), the count is aligned to the categories covered +by that metric. Categories that only appear in the count result are +dropped so every merged record carries both values, and the reported +count total reflects only the aligned features. + ## EXAMPLES Report area sizes of geology polygons in hectares: diff --git a/scripts/v.geometry/v.geometry.py b/scripts/v.geometry/v.geometry.py index d105b88eda4..eeb95b10138 100644 --- a/scripts/v.geometry/v.geometry.py +++ b/scripts/v.geometry/v.geometry.py @@ -222,6 +222,21 @@ def main(): # Pad with None so every metric has a corresponding entry. units_list.extend([None] * (len(metrics) - len(units_list))) + output_format = options["format"] + separator = gs.separator(options["separator"]) + if output_format == "csv": + if not separator: + separator = "," + elif len(separator) > 1: + gs.fatal( + _( + "A standard CSV separator (delimiter) is only one character " + "long, got: {}" + ).format(separator) + ) + elif output_format == "plain" and not separator: + separator = "|" + # v.to.db requires the "columns" parameter even in print-only mode, but # does not use it for JSON or plain output; any valid name works. common_kwargs = { @@ -246,37 +261,43 @@ def main(): ] results = [f.result() for f in futures] + # count's records cover every feature; family metrics emit records + # only for cat-bearing features of their family. Align count's cat + # set to the other metrics so merged records share one schema. + if "count" in metrics: + count_idx = metrics.index("count") + other_cats = { + record["category"] + for m, r in zip(metrics, results, strict=True) + if m != "count" + for record in r.get("records", []) + } + aligned = [ + record + for record in results[count_idx].get("records", []) + if record["category"] in other_cats + ] + results[count_idx]["records"] = aligned + results[count_idx]["totals"]["count"] = sum(r["count"] for r in aligned) + result = _merge_results(results) - output_format = options["format"] if output_format == "json": print(json.dumps(result, indent=4)) return 0 - separator = gs.separator(options["separator"]) records = result["records"] if not records: return 0 columns = list(records[0].keys()) if output_format == "csv": - if not separator: - separator = "," - elif len(separator) > 1: - gs.fatal( - _( - "A standard CSV separator (delimiter) is only one character " - "long, got: {}" - ).format(separator) - ) # Force LF endings; csv.writer defaults to CRLF, which compounds # with text-mode stdout's newline translation on some platforms. writer = csv.writer(sys.stdout, delimiter=separator, lineterminator="\n") writer.writerow(columns) writer.writerows([record.get(c, "") for c in columns] for record in records) else: # plain - if not separator: - separator = "|" print(separator.join(columns)) for record in records: print(separator.join(str(record.get(c, "")) for c in columns))