From d1e8b91663b49926766ccc5af1f25c735561d16d Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Mon, 30 Mar 2026 13:38:33 +0100 Subject: [PATCH 01/55] WIP Vectorising grid results --- geest/core/algorithms/area_iterator.py | 15 +++--- .../core/tasks/study_area_processing_task.py | 26 +++++++--- geest/core/workflows/workflow_base.py | 51 ++++++++++++++----- 3 files changed, 63 insertions(+), 29 deletions(-) diff --git a/geest/core/algorithms/area_iterator.py b/geest/core/algorithms/area_iterator.py index b07cbb8c..18fb5d02 100644 --- a/geest/core/algorithms/area_iterator.py +++ b/geest/core/algorithms/area_iterator.py @@ -7,10 +7,9 @@ from typing import Iterator, Tuple +from geoe3.utilities import log_message from qgis.core import Qgis, QgsFeatureRequest, QgsGeometry, QgsVectorLayer -from geest.utilities import log_message - class AreaIterator: """ @@ -140,14 +139,14 @@ def area_count(self) -> int: """ return self.total_features - def __iter__(self) -> Iterator[Tuple[QgsGeometry, QgsGeometry, float]]: + def __iter__(self) -> Iterator[Tuple[QgsGeometry, QgsGeometry, QgsGeometry, float, str]]: """ Iterator that yields pairs of geometries from the polygon layer and the corresponding bbox layer, - along with a progress percentage. + along with a progress percentage and area name. Yields: - Iterator[Tuple[QgsGeometry, QgsGeometry, float]]: Yields a tuple of polygon and bbox geometries, - along with a progress value representing the percentage of the iteration completed. + Iterator[Tuple[QgsGeometry, QgsGeometry, QgsGeometry, float, str]]: Yields a tuple of + polygon geometry, clip geometry, bbox geometry, progress percentage, and area name. """ try: # Ensure all layers have the same CRS @@ -224,8 +223,8 @@ def __iter__(self) -> Iterator[Tuple[QgsGeometry, QgsGeometry, float]]: level=Qgis.Info, ) - # Yield a tuple with polygon geometry, clip geometry, bbox geometry, and progress percentage - yield polygon_feature.geometry(), clip_geom, bbox_feature.geometry(), progress_percent + # Yield a tuple with polygon geometry, clip geometry, bbox geometry, progress percentage, and area name + yield polygon_feature.geometry(), clip_geom, bbox_feature.geometry(), progress_percent, area_name else: log_message( diff --git a/geest/core/tasks/study_area_processing_task.py b/geest/core/tasks/study_area_processing_task.py index 1d82e959..f7589062 100644 --- a/geest/core/tasks/study_area_processing_task.py +++ b/geest/core/tasks/study_area_processing_task.py @@ -11,6 +11,12 @@ import time import traceback +from geoe3.core.algorithms import GHSLDownloader, GHSLProcessor +from geoe3.core.grid_column_utils import add_model_columns_to_grid +from geoe3.core.h3_utils import get_h3_resolution_for_scale +from geoe3.core.settings import setting +from geoe3.utilities import calculate_utm_zone, log_message + # GDAL / OGR / OSR imports from osgeo import gdal, ogr, osr from qgis.core import ( @@ -30,14 +36,9 @@ pyqtSignal, ) -from geest.core.algorithms import GHSLDownloader, GHSLProcessor -from geest.core.settings import setting -from geest.core.h3_utils import get_h3_resolution_for_scale -from geest.utilities import calculate_utm_zone, log_message - from .grid_chunker_task import GridChunkerTask -from .grid_from_bbox_task import GridFromBboxTask from .grid_from_bbox_h3_task import GridFromBboxH3Task +from .grid_from_bbox_task import GridFromBboxTask class QtQueue: @@ -1360,7 +1361,18 @@ def run(self): log_message(f"Areas that could not be processed due to errors: {self.error_count}") log_message(f"Total cells generated: {self.total_cells}") - # 4) Create a VRT of all generated raster masks + # 4) Add model columns to the grid layer + model_path = os.path.join(self.working_dir, "model.json") + if os.path.exists(model_path): + log_message("Adding model columns to study_area_grid layer...") + if add_model_columns_to_grid(self.gpkg_path, model_path): + log_message("Model columns added successfully") + else: + log_message("Failed to add model columns to grid", level="WARNING") + else: + log_message(f"Model file not found at {model_path}, skipping column addition", level="WARNING") + + # 5) Create a VRT of all generated raster masks self.create_raster_vrt() except Exception as e: diff --git a/geest/core/workflows/workflow_base.py b/geest/core/workflows/workflow_base.py index ac10aba5..fe8a88f5 100644 --- a/geest/core/workflows/workflow_base.py +++ b/geest/core/workflows/workflow_base.py @@ -10,6 +10,19 @@ from abc import abstractmethod from typing import Optional +from geoe3.core import JsonTreeItem, setting +from geoe3.core.algorithms import ( + AreaIterator, + GHSLDownloader, + GHSLProcessor, + check_and_reproject_layer, + combine_rasters_to_vrt, + geometry_to_memory_layer, + subset_vector_layer, +) +from geoe3.core.constants import GDAL_OUTPUT_DATA_TYPE +from geoe3.core.grid_column_utils import write_raster_values_to_grid +from geoe3.utilities import log_layer_count, log_message, resources_path from qgis import processing from qgis.core import ( Qgis, @@ -26,19 +39,6 @@ ) from qgis.PyQt.QtCore import QObject, QSettings, pyqtSignal -from geest.core import JsonTreeItem, setting -from geest.core.algorithms import ( - AreaIterator, - GHSLDownloader, - GHSLProcessor, - check_and_reproject_layer, - combine_rasters_to_vrt, - geometry_to_memory_layer, - subset_vector_layer, -) -from geest.core.constants import GDAL_OUTPUT_DATA_TYPE -from geest.utilities import log_layer_count, log_message, resources_path - class WorkflowBase(QObject): """ @@ -482,7 +482,7 @@ def execute(self) -> bool: try: total_areas = area_iterator.area_count() - for index, (current_area, clip_area, current_bbox, progress) in enumerate(area_iterator): + for index, (current_area, clip_area, current_bbox, progress, area_name) in enumerate(area_iterator): areas_processed += 1 message = f"{self.workflow_name} Processing area {index} with progress {progress:.2f}%" # noqa E231 self.updateStatus(f"Processing area {index + 1}/{total_areas}") @@ -548,6 +548,29 @@ def execute(self) -> bool: index=index, ) output_rasters.append(masked_layer) + + # Write raster values to grid for this area + if masked_layer and os.path.exists(masked_layer): + self.updateStatus(f"Writing grid values for area {index + 1}...") + updated_cells = write_raster_values_to_grid( + gpkg_path=self.gpkg_path, + raster_path=masked_layer, + column_name=self.layer_id, + area_name=area_name, + ) + if updated_cells >= 0: + log_message( + f"Updated {updated_cells} grid cells for {self.layer_id} in area {area_name}", + tag="GeoE3", + level=Qgis.Info, + ) + else: + log_message( + f"Failed to update grid cells for {self.layer_id} in area {area_name}", + tag="GeoE3", + level=Qgis.Warning, + ) + # Note: We don't emit area iterator progress here because it would # override the sub-task progress in the Task Progress bar. # The sub-task progress (0-100%) is more useful to the user. From 02895113689124593a82d30402f22adab5ec6135 Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Mon, 30 Mar 2026 13:45:03 +0100 Subject: [PATCH 02/55] WIP Vectorising grid results --- .cspell.json | 3 + docs/workflow_analysis.md | 286 +++++++++++++++++++++++++++++++ geest/core/grid_column_utils.py | 294 ++++++++++++++++++++++++++++++++ geest/resources/model.json | 8 +- 4 files changed, 587 insertions(+), 4 deletions(-) create mode 100644 docs/workflow_analysis.md create mode 100644 geest/core/grid_column_utils.py diff --git a/.cspell.json b/.cspell.json index 035b587f..ba51fa74 100644 --- a/.cspell.json +++ b/.cspell.json @@ -160,6 +160,9 @@ "hrsh", "pasky", "nvimrc", + "geoe", + "eplex", + "Queryability", "PYTHONPATH" ] } diff --git a/docs/workflow_analysis.md b/docs/workflow_analysis.md new file mode 100644 index 00000000..44b5001b --- /dev/null +++ b/docs/workflow_analysis.md @@ -0,0 +1,286 @@ +# GeoE3 Workflow Analysis: Raster vs Vector Processing + +This document analyzes all workflows in the GeoE3 plugin to identify which can be migrated from raster-based processing to pure vector/SQL operations on the study_area_grid layer. + +## Workflow Analysis Table + +| Type | ID | Workflow Option | Raster Only (Current) | Vector-Only Possible? | Notes | +| ------------------------------------ | ------------------------------------------------------- | ---------------------------------------- | --------------------- | --------------------- | ----------------------------------- | --- | +| **CONTEXTUAL DIMENSION** | | | | | | | +| Dimension | contextual | Dimension aggregation | Yes | **Yes** | `SUM(factor * weight)` SQL | +| Factor | eplex | Factor aggregation | Yes | **Yes** | `SUM(indicator * weight)` SQL | +| Factor | workplace_discrimination | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | regulatory_frameworks | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | financial_inclusion | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Indicator | eplex_score_indicator | use_eplex_score | No | **Yes** | Uniform scalar → set attribute | +| Indicator | eplex_score_indicator | use_index_score | No | **Yes** | Uniform scalar | +| Indicator | eplex_score_indicator | use_contextual_index_score | No | **Yes** | Rescaled scalar | +| Indicator | Workplace_Index | use_contextual_index_score | No | **Yes** | Rescaled scalar | +| Indicator | Pay_Parenthood_Index | use_contextual_index_score | No | **Yes** | Rescaled scalar | +| Indicator | Entrepreneurship_Index | use_contextual_index_score | No | **Yes** | Rescaled scalar | +| **ACCESSIBILITY DIMENSION** | | | | | | | +| Dimension | accessibility | Dimension aggregation | Yes | **Yes** | `SUM(factor * weight)` SQL | +| Factor | women_s_travel_patterns | Factor aggregation | Yes | **Yes** | SQL weighted avg of 5 indicators | +| Factor | access_to_public_transport | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | access_to_health_facilities | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | access_to_education_and_training_facilities | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | access_to_financial_facilities | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Indicator | Kindergartens_Location | use_multi_buffer_point | No | **Yes** | Spatial join: grid ∩ buffers | +| Indicator | Kindergartens_Location | use_single_buffer_point | No | **Yes** | Spatial join | +| Indicator | Kindergartens_Location | use_point_per_cell | No | **Yes** | Count points in cell | +| Indicator | Primary_School_Location | use_multi_buffer_point | No | **Yes** | Spatial join | +| Indicator | Primary_School_Location | use_point_per_cell | No | **Yes** | Count points in cell | +| Indicator | Groceries_Location | use_multi_buffer_point | No | **Yes** | Spatial join | +| Indicator | Groceries_Location | use_point_per_cell | No | **Yes** | Count points in cell | +| Indicator | Pharmacies_Location | use_multi_buffer_point | No | **Yes** | Spatial join | +| Indicator | Pharmacies_Location | use_point_per_cell | No | **Yes** | Count points in cell | +| Indicator | Green_Space_location | use_multi_buffer_point | No | **Yes** | Spatial join | +| Indicator | Green_Space_location | use_polygon_per_cell | No | **Yes** | Polygon intersection | +| Indicator | Public_Transport_location | use_multi_buffer_point | No | **Yes** | Spatial join | +| Indicator | Public_Transport_location | use_point_per_cell | No | **Yes** | Count points in cell | +| Indicator | Hospital_Location | use_multi_buffer_point | No | **Yes** | Spatial join | +| Indicator | Hospital_Location | use_point_per_cell | No | **Yes** | Count points in cell | +| Indicator | Universities_Location | use_multi_buffer_point | No | **Yes** | Spatial join | +| Indicator | Universities_Location | use_point_per_cell | No | **Yes** | Count points in cell | +| Indicator | Banks_Location | use_multi_buffer_point | No | **Yes** | Spatial join | +| Indicator | Banks_Location | use_point_per_cell | No | **Yes** | Count points in cell | +| **PLACE CHARACTERIZATION DIMENSION** | | | | | | | +| Dimension | place_characterization | Dimension aggregation | Yes | **Yes** | `SUM(factor * weight)` SQL | +| Factor | active_transport | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | safety_perception | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | fcv | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | education | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | digital_inclusion | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Factor | environmental_hazards | Factor aggregation | Yes | **Yes** | SQL weighted avg of 5 hazards | +| Factor | water_sanitation | Factor aggregation | Yes | **Yes** | SQL weighted average | +| Indicator | Active_Transport_Network | use_polyline_per_cell | No | **Yes** | Polyline length per cell | +| Indicator | Active_Transport_Network | use_osm_transport_polyline_per_cell | No | **Yes** | OSM highway scoring per cell | +| Indicator | Street_Lights | use_nighttime_lights | **Yes** | No | GHSL nighttime lights raster | +| Indicator | Street_Lights | use_street_lights | Maybe | Maybe | Point buffers=vector, raster=raster | +| Indicator | Street_Lights | use_classify_safety_polygon_into_classes | No | **Yes** | Polygon classification | +| Indicator | FCV | use_csv_to_point_layer | No | **Yes** | CSV→points→buffer→intersect | +| Indicator | FCV | use_single_buffer_point | No | **Yes** | Spatial join with buffer | +| Indicator | FCV | use_point_per_cell | No | **Yes** | Count points in cell | +| Indicator | Education | use_index_score_with_ghsl | **Yes** | No | Requires GHSL raster mask | +| Indicator | Education | use_classify_polygon_into_classes | No | **Yes** | Polygon classification | +| Indicator | Digital_Inclusion | use_index_score_with_ookla | **Yes** | No | Requires Ookla raster | +| Indicator | Digital_Inclusion | use_classify_polygon_into_classes | No | **Yes** | Polygon classification | +| Indicator | Fire | use_environmental_hazards | **Yes** | No | Hazard raster input | +| Indicator | Flood | use_environmental_hazards | **Yes** | No | Hazard raster input | +| Indicator | Landslide | use_environmental_hazards | **Yes** | No | Hazard raster input | +| Indicator | Cyclone | use_environmental_hazards | **Yes** | No | Hazard raster input | +| Indicator | Drought | use_environmental_hazards | **Yes** | No | Hazard raster input | +| Indicator | Water_Sanitation | use_single_buffer_point | No | **Yes** | Spatial join with 3km buffer | +| Indicator | Water_Sanitation | use_point_per_cell | No | **Yes** | Count points in cell | +| **ANALYSIS RESULTS** | | | | | | | +| Analysis | geoe3_score | Analysis aggregation | Yes | **Yes** | `SUM(dimension * weight)` SQL | +| Analysis | geoe3_by_population | Population weighting | Yes | **Yes** | `geoe3_score * population` SQL | +| Analysis | geoe3_score_ghsl_masked | GHSL masking | **Yes** | No | Requires GHSL settlement raster | +| Analysis | geoe3_by_population_ghsl_masked | GHSL + population | **Yes** | No | Requires GHSL settlement raster | +| Analysis | geoe3_score_subnational_aggregation | Subnational stats | Yes | **Yes** | SQL GROUP BY subnational unit | +| Analysis | geoe3_by_population_subnational_aggregation | Subnational + pop | Yes | **Yes** | SQL GROUP BY with population | +| Analysis | geoe3_score_ghsl_masked_subnational_aggregation | GHSL + subnational | **Yes** | No | Requires GHSL raster first | +| Analysis | geoe3_by_population_ghsl_masked_subnational_aggregation | All combined | **Yes** | No | Requires GHSL raster first | +| Analysis | geoe3_by_population_by_opportunities_mask | Opportunities mask | **Yes** | No | Requires opportunities raster | +| Analysis | population | Population per cell | **Yes** | No | Sample population raster | + +--- + +## Summary by Type + +| Type | Total | Raster Required | Vector-Only Possible | +| --------- | --------------------- | ----------------- | ------------------------- | +| Dimension | 3 | 3 (current impl) | **3** (all migratable) | +| Factor | 16 | 16 (current impl) | **16** (all migratable) | +| Indicator | 21 IDs, ~40 workflows | 8 workflows | **32+ workflows** | +| Analysis | 10 | 5 | **5** (partial migration) | +| **TOTAL** | ~70 workflows | ~32 | **~56 migratable** | + +--- + +## Raster-Only Dependencies + +The following workflows genuinely require raster data and cannot be converted to pure vector operations: + +| Workflow | Raster Data Required | Reason | +| ------------------------------ | ---------------------------------------- | --------------------------------- | +| use_nighttime_lights | GHSL Nighttime Lights | Input is raster imagery | +| use_environmental_hazards (5x) | Fire, Flood, Landslide, Cyclone, Drought | Input hazard data is raster | +| use_index_score_with_ghsl | GHSL Settlement Layer | Mask requires raster intersection | +| use_index_score_with_ookla | Ookla Internet Coverage | Coverage data is raster | +| population | Population Raster | WorldPop/GHSL population grids | +| geoe3\_\*\_ghsl_masked | GHSL Settlement Layer | Masking requires raster | +| geoe3\_\*\_opportunities_mask | Opportunities Raster | Masking requires raster | + +--- + +## SQL Examples for Vector-Only Workflows + +### Indicator: Index Score (Uniform Value) + +```sql +-- Set a uniform score across all grid cells +UPDATE study_area_grid +SET eplex_score_indicator = 3.5 +WHERE area_name = 'Study Area 1'; +``` + +### Indicator: Multi-Buffer Point (Spatial Join) + +```sql +-- Score grid cells based on proximity to points (e.g., kindergartens) +-- Assumes buffers have been pre-computed with scores +UPDATE study_area_grid g +SET kindergartens_location = COALESCE( + (SELECT MAX(b.score) + FROM kindergarten_buffers b + WHERE ST_Intersects(g.geom, b.geom)), + 0 +) +WHERE area_name = 'Study Area 1'; +``` + +### Indicator: Point Per Cell (Count) + +```sql +-- Count features within each grid cell +UPDATE study_area_grid g +SET water_sanitation = ( + SELECT COUNT(*) + FROM water_points p + WHERE ST_Contains(g.geom, p.geom) +) +WHERE area_name = 'Study Area 1'; +``` + +### Indicator: Polyline Per Cell (Length/Score) + +```sql +-- Calculate walkability score based on road types in cell +UPDATE study_area_grid g +SET active_transport_network = COALESCE( + (SELECT MAX( + CASE + WHEN r.highway IN ('footway', 'pedestrian', 'cycleway') THEN 5 + WHEN r.highway IN ('residential', 'living_street') THEN 4 + WHEN r.highway IN ('tertiary', 'unclassified') THEN 3 + WHEN r.highway IN ('secondary') THEN 2 + WHEN r.highway IN ('primary', 'trunk') THEN 1 + ELSE 0 + END + ) + FROM osm_roads r + WHERE ST_Intersects(g.geom, r.geom)), + 0 +) +WHERE area_name = 'Study Area 1'; +``` + +### Factor Aggregation (Weighted Average) + +```sql +-- Aggregate indicators into factor score +UPDATE study_area_grid +SET women_s_travel_patterns = ( + kindergartens_location * 0.2 + + primary_school_location * 0.2 + + groceries_location * 0.2 + + pharmacies_location * 0.2 + + green_space_location * 0.2 +) +WHERE area_name = 'Study Area 1'; +``` + +### Dimension Aggregation (Weighted Average) + +```sql +-- Aggregate factors into dimension score +UPDATE study_area_grid +SET accessibility = ( + women_s_travel_patterns * 0.2 + + access_to_public_transport * 0.2 + + access_to_health_facilities * 0.2 + + access_to_education_and_training_facilities * 0.2 + + access_to_financial_facilities * 0.2 +) +WHERE area_name = 'Study Area 1'; +``` + +### Analysis: GeoE3 Score (Final Aggregation) + +```sql +-- Calculate final GeoE3 score from dimensions +UPDATE study_area_grid +SET geoe3_score = ( + contextual * 0.1 + + accessibility * 0.45 + + place_characterization * 0.45 +) +WHERE area_name = 'Study Area 1'; +``` + +### Analysis: Population Weighted Score + +```sql +-- Calculate population-weighted score +UPDATE study_area_grid +SET geoe3_by_population = geoe3_score * population +WHERE area_name = 'Study Area 1'; +``` + +### Analysis: Subnational Aggregation + +```sql +-- Aggregate scores by subnational unit +SELECT + subnational_unit, + AVG(geoe3_score) as avg_score, + SUM(geoe3_by_population) / SUM(population) as pop_weighted_avg, + MIN(geoe3_score) as min_score, + MAX(geoe3_score) as max_score, + COUNT(*) as cell_count +FROM study_area_grid +WHERE area_name = 'Study Area 1' +GROUP BY subnational_unit; +``` + +--- + +## Migration Benefits + +Converting from raster-based to vector-based processing provides: + +1. **Performance**: SQL operations on indexed vector tables are significantly faster than pixel-by-pixel raster sampling +2. **Simplicity**: No intermediate raster files to manage +3. **Accuracy**: Values stored directly in grid cells without resampling artifacts +4. **Storage**: Reduced disk usage (no duplicate raster outputs) +5. **Queryability**: Results immediately available for SQL analysis and reporting + +--- + +## Implementation Priority + +### Phase 1: Aggregation Workflows (Highest Impact) + +- Factor aggregation (16 workflows) +- Dimension aggregation (3 workflows) +- Analysis aggregation (geoe3_score, geoe3_by_population) + +### Phase 2: Vector-Based Indicators + +- Index score workflows (uniform values) +- Multi-buffer point workflows (spatial joins) +- Single-buffer point workflows +- Point/polyline/polygon per cell workflows + +### Phase 3: Hybrid Workflows + +- Workflows requiring both raster sampling AND vector output +- Environmental hazards (sample raster → write to grid) +- Population (sample raster → write to grid) + +--- + +_Document generated: 2026-03-30_ + +_Made with 💗 by [Kartoza](https://kartoza.com) | [Donate](https://github.com/sponsors/worldbank/GEOE3) | [GitHub](https://github.com/worldbank/GEOE3)_ diff --git a/geest/core/grid_column_utils.py b/geest/core/grid_column_utils.py new file mode 100644 index 00000000..75b6be6e --- /dev/null +++ b/geest/core/grid_column_utils.py @@ -0,0 +1,294 @@ +# -*- coding: utf-8 -*- +"""Grid column utilities for model-based columns. + +This module provides utilities for extracting IDs from the JSON model +and managing grid columns for indicators, factors, dimensions, and aggregate scores. +""" + +import json +import os +from typing import Dict, List, Optional + +from geoe3.utilities import log_message +from osgeo import ogr +from qgis.core import Qgis + + +def extract_model_ids(model_path: str) -> Dict[str, List[str]]: + """Extract all IDs from the model JSON file. + + Traverses the model structure and extracts IDs for dimensions, + factors, and indicators. + + Args: + model_path: Path to the model.json file. + + Returns: + Dictionary with keys 'dimensions', 'factors', 'indicators' containing + lists of IDs for each category. + """ + ids = { + "dimensions": [], + "factors": [], + "indicators": [], + } + + if not os.path.exists(model_path): + log_message(f"Model file not found: {model_path}", level=Qgis.Warning) + return ids + + try: + with open(model_path, "r", encoding="utf-8") as f: + model = json.load(f) + + for dimension in model.get("dimensions", []): + dim_id = dimension.get("id", "") + if dim_id: + ids["dimensions"].append(dim_id.lower()) + + for factor in dimension.get("factors", []): + factor_id = factor.get("id", "") + if factor_id: + ids["factors"].append(factor_id.lower()) + + for indicator in factor.get("indicators", []): + indicator_id = indicator.get("id", "") + if indicator_id: + ids["indicators"].append(indicator_id.lower()) + + except Exception as e: + log_message(f"Error extracting model IDs: {e}", level=Qgis.Critical) + + return ids + + +def get_aggregate_column_names() -> List[str]: + """Get the list of aggregate score column names. + + Returns: + List of column names for aggregate scores (WEE score, WEE by population, etc.) + """ + return [ + "wee_score", + "wee_by_population", + "contextual_score", + "accessibility_score", + "place_characterization_score", + ] + + +def get_all_column_names(model_path: str) -> List[str]: + """Get all column names to be added to the grid layer. + + Args: + model_path: Path to the model.json file. + + Returns: + List of all column names (indicators, factors, dimensions, and aggregates). + """ + ids = extract_model_ids(model_path) + columns = [] + + # Add indicator columns + columns.extend(ids["indicators"]) + + # Add factor columns + columns.extend(ids["factors"]) + + # Add dimension columns + columns.extend(ids["dimensions"]) + + # Add aggregate columns + columns.extend(get_aggregate_column_names()) + + return columns + + +def add_model_columns_to_grid(gpkg_path: str, model_path: str) -> bool: + """Add model-based columns to the study_area_grid layer. + + Adds one float column for each indicator, factor, dimension, and aggregate score + based on the IDs from the model.json file. + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + model_path: Path to the model.json file. + + Returns: + True if columns were added successfully, False otherwise. + """ + column_names = get_all_column_names(model_path) + + if not column_names: + log_message("No columns to add to grid layer", level=Qgis.Warning) + return False + + try: + ds = ogr.Open(gpkg_path, 1) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return False + + layer = ds.GetLayerByName("study_area_grid") + if not layer: + log_message("study_area_grid layer not found", level=Qgis.Critical) + ds = None + return False + + # Get existing field names to avoid duplicates + layer_defn = layer.GetLayerDefn() + existing_fields = set() + for i in range(layer_defn.GetFieldCount()): + existing_fields.add(layer_defn.GetFieldDefn(i).GetName().lower()) + + # Add new columns + added_count = 0 + for col_name in column_names: + # Sanitize column name (replace spaces with underscores, limit length) + sanitized_name = col_name.replace(" ", "_").replace("-", "_")[:63] + + if sanitized_name.lower() in existing_fields: + log_message(f"Column {sanitized_name} already exists, skipping") + continue + + field_defn = ogr.FieldDefn(sanitized_name, ogr.OFTReal) + if layer.CreateField(field_defn) != 0: + log_message(f"Failed to create field: {sanitized_name}", level=Qgis.Warning) + else: + added_count += 1 + + ds.FlushCache() + ds = None + + log_message(f"Added {added_count} model columns to study_area_grid") + return True + + except Exception as e: + log_message(f"Error adding model columns to grid: {e}", level=Qgis.Critical) + return False + + +def write_raster_values_to_grid( + gpkg_path: str, + raster_path: str, + column_name: str, + area_name: Optional[str] = None, +) -> int: + """Sample raster values at grid cell centroids and write to grid column. + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + raster_path: Path to the raster file to sample. + column_name: Name of the column to write values to. + area_name: Optional area name to filter grid cells. If None, processes all cells. + + Returns: + Number of cells updated, or -1 on error. + """ + from osgeo import gdal + + if not os.path.exists(raster_path): + log_message(f"Raster file not found: {raster_path}", level=Qgis.Warning) + return -1 + + try: + # Open the raster + raster_ds = gdal.Open(raster_path) + if not raster_ds: + log_message(f"Could not open raster: {raster_path}", level=Qgis.Critical) + return -1 + + band = raster_ds.GetRasterBand(1) + nodata = band.GetNoDataValue() + gt = raster_ds.GetGeoTransform() + + # Open the GeoPackage for updating + ds = ogr.Open(gpkg_path, 1) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + raster_ds = None + return -1 + + layer = ds.GetLayerByName("study_area_grid") + if not layer: + log_message("study_area_grid layer not found", level=Qgis.Critical) + ds = None + raster_ds = None + return -1 + + # Sanitize column name + sanitized_column = column_name.replace(" ", "_").replace("-", "_")[:63].lower() + + # Check if column exists + layer_defn = layer.GetLayerDefn() + field_idx = layer_defn.GetFieldIndex(sanitized_column) + if field_idx < 0: + log_message(f"Column {sanitized_column} not found in grid layer", level=Qgis.Warning) + ds = None + raster_ds = None + return -1 + + # Set attribute filter if area_name is provided + if area_name: + layer.SetAttributeFilter(f"area_name = '{area_name}'") + + # Process each grid cell + updated_count = 0 + layer.StartTransaction() + + try: + for feature in layer: + geom = feature.GetGeometryRef() + if not geom: + continue + + # Get centroid + centroid = geom.Centroid() + x = centroid.GetX() + y = centroid.GetY() + + # Convert to pixel coordinates + px = int((x - gt[0]) / gt[1]) + py = int((y - gt[3]) / gt[5]) + + # Check bounds + if px < 0 or px >= raster_ds.RasterXSize or py < 0 or py >= raster_ds.RasterYSize: + continue + + # Read pixel value + try: + pixel_value = band.ReadAsArray(px, py, 1, 1) + if pixel_value is not None: + value = float(pixel_value[0, 0]) + # Skip nodata values + if nodata is not None and value == nodata: + continue + feature.SetField(sanitized_column, value) + layer.SetFeature(feature) + updated_count += 1 + except (RuntimeError, ValueError, IndexError): + # Skip cells where pixel read fails (out of bounds, invalid data) + continue + + layer.CommitTransaction() + + except Exception as e: + layer.RollbackTransaction() + log_message(f"Error writing values to grid: {e}", level=Qgis.Critical) + ds = None + raster_ds = None + return -1 + + # Reset filter + layer.SetAttributeFilter(None) + + ds.FlushCache() + ds = None + raster_ds = None + + log_message(f"Updated {updated_count} grid cells for column {sanitized_column}") + return updated_count + + except Exception as e: + log_message(f"Error in write_raster_values_to_grid: {e}", level=Qgis.Critical) + return -1 diff --git a/geest/resources/model.json b/geest/resources/model.json index db0b563c..6997e05d 100644 --- a/geest/resources/model.json +++ b/geest/resources/model.json @@ -133,8 +133,8 @@ "dimension_weighting": 0.333333, "indicators": [ { - "indicator": "WBL 2024 Entrepeneurship Index Score", - "id": "Entrepeneurship_Index", + "indicator": "WBL 2024 Entrepreneurship Index Score", + "id": "Entrepreneurship_Index", "output_filename": "FIN_output", "description": "", "default_factor_weighting": 1.0, @@ -333,7 +333,7 @@ "indicators": [ { "indicator": "Location of public transportation stops, including maritime", - "id": "Pulic_Transport_location", + "id": "Public_Transport_location", "output_filename": "PBT_output", "description": "", "default_factor_weighting": 1.0, @@ -868,4 +868,4 @@ ] } ] -} \ No newline at end of file +} From 17ae08978819976b3dfe09640887ac0b6d08d5ce Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Mon, 30 Mar 2026 21:42:54 +0100 Subject: [PATCH 03/55] Implement grid-first architecture for all workflows - Add write_buffer_values_to_grid() for spatial join with buffer polygons - Simplify write_aggregation_to_grid() to use single SQL UPDATE - Improve write_raster_values_to_grid() with spatial filtering and batch SQL - Add grid-first approach to polyline_per_cell and polygon_per_cell workflows - Add grid-first approach to single_point_buffer and multi_buffer workflows - Add grid-first approach to classified_polygon workflow - Fix button styles to use white text on blue backgrounds - Fix unused imports and f-string placeholders - Fix shellcheck warnings in start scripts --- geest/__init__.py | 6 +- geest/core/algorithms/area_iterator.py | 3 +- geest/core/grid_column_utils.py | 1121 ++++++++++++++++- .../core/tasks/study_area_processing_task.py | 12 +- geest/core/workflows/acled_impact_workflow.py | 46 +- .../workflows/aggregation_workflow_base.py | 321 ++++- .../workflows/classified_polygon_workflow.py | 117 +- .../contextual_index_score_workflow.py | 108 +- geest/core/workflows/dont_use_workflow.py | 2 + geest/core/workflows/eplex_workflow.py | 26 +- .../index_score_with_ghsl_workflow.py | 27 +- .../index_score_with_ookla_workflow.py | 31 +- geest/core/workflows/index_score_workflow.py | 126 +- .../multi_buffer_distances_native_workflow.py | 315 +++-- .../multi_buffer_distances_ors_workflow.py | 78 +- ...sm_transport_polyline_per_cell_workflow.py | 18 +- .../core/workflows/point_per_cell_workflow.py | 114 +- .../workflows/polygon_per_cell_workflow.py | 110 +- .../workflows/polyline_per_cell_workflow.py | 116 +- .../raster_reclassification_workflow.py | 25 +- .../core/workflows/safety_polygon_workflow.py | 14 +- .../core/workflows/safety_raster_workflow.py | 51 +- .../workflows/single_point_buffer_workflow.py | 174 ++- .../street_lights_buffer_workflow.py | 37 +- geest/core/workflows/workflow_base.py | 118 +- geest/gui/panels/create_project_panel.py | 4 +- geest/gui/panels/tree_panel.py | 15 +- geest/utilities.py | 12 +- scripts/start_qgis.sh | 22 +- scripts/start_qgis_ltr.sh | 28 +- 30 files changed, 2516 insertions(+), 681 deletions(-) diff --git a/geest/__init__.py b/geest/__init__.py index 5516b729..76686a7e 100644 --- a/geest/__init__.py +++ b/geest/__init__.py @@ -39,6 +39,7 @@ import os import pstats import subprocess # nosec B404 +import sys import tempfile import unittest from shutil import which @@ -161,8 +162,6 @@ def get_test_directory(self): Raises: ValueError: If GEOE3_TEST_DIR (or GEEST_TEST_DIR) is not set or points to invalid directory """ - import sys - # Get test directory from environment variable (with fallback for backward compatibility) env_test_dir = os.getenv("GEOE3_TEST_DIR") or os.getenv("GEEST_TEST_DIR") @@ -836,7 +835,8 @@ def debug(self): title="GeoE3", message=f"Visual Studio Code debugger is now attached on port {self.DEBUG_PORT}", ) - self.debug_action.setEnabled(False) # prevent user starting it twice + if self.debug_action: + self.debug_action.setEnabled(False) # prevent user starting it twice self.debug_running = True def run(self): diff --git a/geest/core/algorithms/area_iterator.py b/geest/core/algorithms/area_iterator.py index 18fb5d02..1ace5d22 100644 --- a/geest/core/algorithms/area_iterator.py +++ b/geest/core/algorithms/area_iterator.py @@ -7,9 +7,10 @@ from typing import Iterator, Tuple -from geoe3.utilities import log_message from qgis.core import Qgis, QgsFeatureRequest, QgsGeometry, QgsVectorLayer +from geest.utilities import log_message + class AreaIterator: """ diff --git a/geest/core/grid_column_utils.py b/geest/core/grid_column_utils.py index 75b6be6e..d9a1c94d 100644 --- a/geest/core/grid_column_utils.py +++ b/geest/core/grid_column_utils.py @@ -3,29 +3,36 @@ This module provides utilities for extracting IDs from the JSON model and managing grid columns for indicators, factors, dimensions, and aggregate scores. + +The module supports a grid-first architecture where workflow results are written +directly to study_area_grid columns, then optionally rasterized using gdal_rasterize. """ import json import os -from typing import Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +from osgeo import gdal, ogr +from qgis.core import Qgis, QgsFeedback, QgsVectorLayer -from geoe3.utilities import log_message -from osgeo import ogr -from qgis.core import Qgis +from geest.utilities import log_message def extract_model_ids(model_path: str) -> Dict[str, List[str]]: """Extract all IDs from the model JSON file. Traverses the model structure and extracts IDs for dimensions, - factors, and indicators. + factors, and indicators. Prefixes are added to avoid namespace collisions: + - Dimensions: dim_ + - Factors: fac_ + - Indicators: (no prefix, most commonly referenced) Args: model_path: Path to the model.json file. Returns: Dictionary with keys 'dimensions', 'factors', 'indicators' containing - lists of IDs for each category. + lists of prefixed IDs for each category. """ ids = { "dimensions": [], @@ -44,12 +51,12 @@ def extract_model_ids(model_path: str) -> Dict[str, List[str]]: for dimension in model.get("dimensions", []): dim_id = dimension.get("id", "") if dim_id: - ids["dimensions"].append(dim_id.lower()) + ids["dimensions"].append(f"dim_{dim_id.lower()}") for factor in dimension.get("factors", []): factor_id = factor.get("id", "") if factor_id: - ids["factors"].append(factor_id.lower()) + ids["factors"].append(f"fac_{factor_id.lower()}") for indicator in factor.get("indicators", []): indicator_id = indicator.get("id", "") @@ -107,8 +114,8 @@ def get_all_column_names(model_path: str) -> List[str]: def add_model_columns_to_grid(gpkg_path: str, model_path: str) -> bool: """Add model-based columns to the study_area_grid layer. - Adds one float column for each indicator, factor, dimension, and aggregate score - based on the IDs from the model.json file. + Adds one Real/Float column for each indicator, factor, dimension, and aggregate + score based on the IDs from the model.json file. Args: gpkg_path: Path to the GeoPackage containing study_area_grid. @@ -141,14 +148,13 @@ def add_model_columns_to_grid(gpkg_path: str, model_path: str) -> bool: for i in range(layer_defn.GetFieldCount()): existing_fields.add(layer_defn.GetFieldDefn(i).GetName().lower()) - # Add new columns + # Add new columns as Real/Float type added_count = 0 for col_name in column_names: # Sanitize column name (replace spaces with underscores, limit length) sanitized_name = col_name.replace(" ", "_").replace("-", "_")[:63] if sanitized_name.lower() in existing_fields: - log_message(f"Column {sanitized_name} already exists, skipping") continue field_defn = ogr.FieldDefn(sanitized_name, ogr.OFTReal) @@ -176,6 +182,9 @@ def write_raster_values_to_grid( ) -> int: """Sample raster values at grid cell centroids and write to grid column. + Uses the raster's extent to spatially filter grid cells, then samples + only those cells that fall within the raster bounds. Skips nodata values. + Args: gpkg_path: Path to the GeoPackage containing study_area_grid. raster_path: Path to the raster file to sample. @@ -185,8 +194,6 @@ def write_raster_values_to_grid( Returns: Number of cells updated, or -1 on error. """ - from osgeo import gdal - if not os.path.exists(raster_path): log_message(f"Raster file not found: {raster_path}", level=Qgis.Warning) return -1 @@ -202,6 +209,12 @@ def write_raster_values_to_grid( nodata = band.GetNoDataValue() gt = raster_ds.GetGeoTransform() + # Calculate raster extent for spatial filtering + xmin = gt[0] + ymax = gt[3] + xmax = gt[0] + gt[1] * raster_ds.RasterXSize + ymin = gt[3] + gt[5] * raster_ds.RasterYSize + # Open the GeoPackage for updating ds = ogr.Open(gpkg_path, 1) if not ds: @@ -228,67 +241,1071 @@ def write_raster_values_to_grid( raster_ds = None return -1 + # Set spatial filter to raster extent (only process cells within raster bounds) + layer.SetSpatialFilterRect(xmin, ymin, xmax, ymax) + # Set attribute filter if area_name is provided if area_name: layer.SetAttributeFilter(f"area_name = '{area_name}'") - # Process each grid cell + # Collect FIDs and values first, then batch update + fid_values = {} + for feature in layer: + geom = feature.GetGeometryRef() + if not geom: + continue + + # Get centroid + centroid = geom.Centroid() + x = centroid.GetX() + y = centroid.GetY() + + # Convert to pixel coordinates + px = int((x - gt[0]) / gt[1]) + py = int((y - gt[3]) / gt[5]) + + # Check bounds (should be within due to spatial filter, but double-check) + if px < 0 or px >= raster_ds.RasterXSize or py < 0 or py >= raster_ds.RasterYSize: + continue + + # Read pixel value + try: + pixel_value = band.ReadAsArray(px, py, 1, 1) + if pixel_value is not None: + value = float(pixel_value[0, 0]) + # Skip nodata values + if nodata is not None and value == nodata: + continue + fid_values[feature.GetFID()] = value + except (RuntimeError, ValueError, IndexError): + # Skip cells where pixel read fails + continue + + log_message(f"Found {len(fid_values)} grid cells with valid raster values") + + # Reset filters before updating + layer.SetSpatialFilter(None) + layer.SetAttributeFilter(None) + layer.ResetReading() + + # Batch update using SQL for efficiency + updated_count = 0 + batch_size = 500 + fids = list(fid_values.keys()) + + for batch_start in range(0, len(fids), batch_size): + batch_fids = fids[batch_start : batch_start + batch_size] + + # Build CASE statement for this batch + case_parts = [] + for fid in batch_fids: + value = fid_values[fid] + case_parts.append(f"WHEN fid = {fid} THEN {value}") + + if case_parts: + fid_list = ",".join(str(f) for f in batch_fids) + sql = ( + f"UPDATE study_area_grid " # nosec B608 + f'SET "{sanitized_column}" = CASE {" ".join(case_parts)} END ' + f"WHERE fid IN ({fid_list})" + ) + ds.ExecuteSQL(sql) + updated_count += len(batch_fids) + + ds = None + raster_ds = None + + log_message(f"Updated {updated_count} grid cells for column {sanitized_column}") + return updated_count + + except Exception as e: + log_message(f"Error in write_raster_values_to_grid: {e}", level=Qgis.Critical) + return -1 + + +def _sanitize_column_name(column_name: str) -> str: + """Sanitize a column name for use in SQL and as a field name. + + Args: + column_name: The column name to sanitize. + + Returns: + Sanitized column name (lowercase, underscores, max 63 chars). + """ + return column_name.replace(" ", "_").replace("-", "_")[:63].lower() + + +def _get_grid_layer_and_field_index( + ds: ogr.DataSource, + column_name: str, +) -> Tuple[Optional[ogr.Layer], int]: + """Get the study_area_grid layer and field index for a column. + + Args: + ds: Open OGR DataSource for the GeoPackage. + column_name: The column name to look up. + + Returns: + Tuple of (layer, field_index) or (None, -1) if not found. + """ + layer = ds.GetLayerByName("study_area_grid") + if not layer: + log_message("study_area_grid layer not found", level=Qgis.Critical) + return None, -1 + + sanitized_column = _sanitize_column_name(column_name) + layer_defn = layer.GetLayerDefn() + field_idx = layer_defn.GetFieldIndex(sanitized_column) + + if field_idx < 0: + log_message(f"Column {sanitized_column} not found in grid layer", level=Qgis.Warning) + return layer, -1 + + return layer, field_idx + + +def write_uniform_value_to_grid( + gpkg_path: str, + column_name: str, + value: float, + area_name: Optional[str] = None, + clip_geometry: Optional[ogr.Geometry] = None, +) -> int: + """Write a constant value to all cells in an area using SQL UPDATE. + + This is useful for index_score workflows where a single value applies + to all grid cells in an area. + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + column_name: Name of the column to write values to. + value: The constant value to write to all matching cells. + area_name: Optional area name to filter grid cells. + clip_geometry: Optional geometry to spatially filter cells (not used in SQL mode). + + Returns: + Number of cells updated, or -1 on error. + """ + _ = clip_geometry # Not used in SQL mode + + if not os.path.exists(gpkg_path): + log_message(f"GeoPackage not found: {gpkg_path}", level=Qgis.Warning) + return -1 + + sanitized_column = _sanitize_column_name(column_name) + + try: + ds = ogr.Open(gpkg_path, 1) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return -1 + + # Verify column exists + layer = ds.GetLayerByName("study_area_grid") + if not layer: + log_message("study_area_grid layer not found", level=Qgis.Critical) + ds = None + return -1 + + layer_defn = layer.GetLayerDefn() + if layer_defn.GetFieldIndex(sanitized_column) < 0: + log_message(f"Column {sanitized_column} not found in grid layer", level=Qgis.Warning) + ds = None + return -1 + + # Simple SQL UPDATE - no area_name filter, update ALL cells + sql = f'UPDATE study_area_grid SET "{sanitized_column}" = {value}' # nosec B608 + log_message(f"Executing: {sql}") + ds.ExecuteSQL(sql) # nosec B608 + ds = None + + return 0 + + except Exception as e: + log_message(f"Error in write_uniform_value_to_grid: {e}", level=Qgis.Critical) + return -1 + + +def clear_grid_column(gpkg_path: str, column_name: str) -> bool: + """Set all values in a grid column to NULL. + + Should be called before populating a column to ensure clean state. + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + column_name: Name of the column to clear. + + Returns: + True if successful, False otherwise. + """ + if not os.path.exists(gpkg_path): + log_message(f"GeoPackage not found: {gpkg_path}", level=Qgis.Warning) + return False + + sanitized_column = _sanitize_column_name(column_name) + + try: + ds = ogr.Open(gpkg_path, 1) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return False + + sql = f'UPDATE study_area_grid SET "{sanitized_column}" = NULL' # nosec B608 + log_message(f"Clearing column: {sql}") + ds.ExecuteSQL(sql) # nosec B608 + ds = None + return True + + except Exception as e: + log_message(f"Error clearing grid column: {e}", level=Qgis.Critical) + return False + + +def count_features_per_grid_cell( + gpkg_path: str, + column_name: str, + features_layer: QgsVectorLayer, + feedback: QgsFeedback = None, +) -> int: + """Count features intersecting each grid cell and assign scores. + + Writes directly to study_area_grid without creating copies. + Score mapping: 0 features = NULL, 1 feature = 3, 2+ features = 5 + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + column_name: Name of the column to write values to. + features_layer: QgsVectorLayer containing features to count. + feedback: Optional feedback for progress reporting. + + Returns: + Number of cells updated, or -1 on error. + """ + from qgis.core import QgsFeatureRequest, QgsSpatialIndex + + if not os.path.exists(gpkg_path): + log_message(f"GeoPackage not found: {gpkg_path}", level=Qgis.Warning) + return -1 + + sanitized_column = _sanitize_column_name(column_name) + + try: + # Load grid layer + grid_layer = QgsVectorLayer(f"{gpkg_path}|layername=study_area_grid", "grid", "ogr") + if not grid_layer.isValid(): + log_message("Could not load study_area_grid layer", level=Qgis.Critical) + return -1 + + # Create spatial index for grid + grid_index = QgsSpatialIndex(grid_layer.getFeatures()) + + # Count features per cell + grid_feature_counts = {} + feature_count = features_layer.featureCount() + log_message(f"Counting {feature_count} features against grid cells") + + for i, feature in enumerate(features_layer.getFeatures()): + geom = feature.geometry() + if geom.isEmpty(): + continue + + # Find intersecting grid cells + intersecting_ids = grid_index.intersects(geom.boundingBox()) + + # Refine with actual intersection test for non-point geometries + if geom.type() != 0: # Not point + request = QgsFeatureRequest().setFilterFids(intersecting_ids) + intersecting_ids = [f.id() for f in grid_layer.getFeatures(request) if f.geometry().intersects(geom)] + + for grid_id in intersecting_ids: + grid_feature_counts[grid_id] = grid_feature_counts.get(grid_id, 0) + 1 + + if feedback and i % 1000 == 0: + feedback.setProgress((i / feature_count) * 50) + + log_message(f"Found {len(grid_feature_counts)} grid cells with features") + + # Build SQL CASE statement for batch update + ds = ogr.Open(gpkg_path, 1) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return -1 + + # Update in batches using SQL + # First, get the fid field name (usually 'fid' for GeoPackage) + updated_count = 0 + batch_size = 500 + fids = list(grid_feature_counts.keys()) + + for batch_start in range(0, len(fids), batch_size): + batch_fids = fids[batch_start : batch_start + batch_size] + + # Build CASE statement for this batch + case_parts = [] + for fid in batch_fids: + count = grid_feature_counts[fid] + score = 3 if count == 1 else 5 + case_parts.append(f"WHEN fid = {fid} THEN {score}") + + if case_parts: + fid_list = ",".join(str(f) for f in batch_fids) + sql = ( + f"UPDATE study_area_grid " # nosec B608 + f'SET "{sanitized_column}" = CASE {" ".join(case_parts)} END ' + f"WHERE fid IN ({fid_list})" + ) + ds.ExecuteSQL(sql) + updated_count += len(batch_fids) + + if feedback: + progress = 50 + (batch_start / len(fids)) * 50 + feedback.setProgress(progress) + + ds = None + log_message(f"Updated {updated_count} grid cells with feature counts") + return updated_count + + except Exception as e: + log_message(f"Error in count_features_per_grid_cell: {e}", level=Qgis.Critical) + return -1 + + +def write_spatial_join_to_grid( + gpkg_path: str, + column_name: str, + features_gpkg: str, + features_layer: str, + score_expression: Union[str, Callable[[ogr.Feature], float]], + area_name: Optional[str] = None, + aggregation_method: str = "MAX", + save_buffers: bool = True, + workflow_directory: Optional[str] = None, +) -> int: + """Write scores to grid cells based on spatial intersection with features. + + This function performs a spatial join between grid cells and input features, + applying an aggregation method (MAX, MIN, AVG, SUM) to determine the final + score for each cell. + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + column_name: Name of the column to write values to. + features_gpkg: Path to the GeoPackage containing the features to join. + features_layer: Name of the layer containing features (e.g., buffer polygons). + score_expression: Either a field name containing scores, or a callable + that takes a feature and returns a score. + area_name: Optional area name to filter grid cells. + aggregation_method: How to combine multiple intersecting features + (MAX, MIN, AVG, SUM, COUNT). Defaults to MAX. + save_buffers: If True, save intermediate buffer table for review. + workflow_directory: Directory to save intermediate files. + + Returns: + Number of cells updated, or -1 on error. + """ + if not os.path.exists(gpkg_path): + log_message(f"GeoPackage not found: {gpkg_path}", level=Qgis.Warning) + return -1 + + if not os.path.exists(features_gpkg): + log_message(f"Features GeoPackage not found: {features_gpkg}", level=Qgis.Warning) + return -1 + + try: + # Open the main GeoPackage for updating + ds = ogr.Open(gpkg_path, 1) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return -1 + + layer, field_idx = _get_grid_layer_and_field_index(ds, column_name) + if layer is None or field_idx < 0: + ds = None + return -1 + + sanitized_column = _sanitize_column_name(column_name) + + # Open the features GeoPackage + features_ds = ogr.Open(features_gpkg, 0) + if not features_ds: + log_message(f"Could not open features GeoPackage: {features_gpkg}", level=Qgis.Critical) + ds = None + return -1 + + features_lyr = features_ds.GetLayerByName(features_layer) + if not features_lyr: + log_message(f"Features layer not found: {features_layer}", level=Qgis.Critical) + features_ds = None + ds = None + return -1 + + # Build spatial index for features if not already indexed + # Note: GeoPackage layers should have spatial index by default + + # Set attribute filter on grid layer + if area_name: + layer.SetAttributeFilter(f"area_name = '{area_name}'") + + # First pass: collect FIDs and compute scores + fid_scores = {} + for grid_feature in layer: + grid_geom = grid_feature.GetGeometryRef() + if not grid_geom: + continue + + fid = grid_feature.GetFID() + + # Find intersecting features + features_lyr.SetSpatialFilter(grid_geom) + scores = [] + + for feat in features_lyr: + feat_geom = feat.GetGeometryRef() + if feat_geom and grid_geom.Intersects(feat_geom): + # Get score from expression + if callable(score_expression): + score = score_expression(feat) + else: + # It's a field name + score = feat.GetField(score_expression) + + if score is not None: + scores.append(float(score)) + + features_lyr.SetSpatialFilter(None) + + # Aggregate scores + if scores: + if aggregation_method == "MAX": + final_score = max(scores) + elif aggregation_method == "MIN": + final_score = min(scores) + elif aggregation_method == "AVG": + final_score = sum(scores) / len(scores) + elif aggregation_method == "SUM": + final_score = sum(scores) + elif aggregation_method == "COUNT": + final_score = float(len(scores)) + else: + final_score = max(scores) + + fid_scores[fid] = final_score + + log_message(f"Found {len(fid_scores)} grid cells with intersecting features for spatial join") + + # Reset filter before updating + layer.SetAttributeFilter(None) + layer.ResetReading() + + # Second pass: update features by FID updated_count = 0 layer.StartTransaction() try: - for feature in layer: - geom = feature.GetGeometryRef() - if not geom: - continue - - # Get centroid - centroid = geom.Centroid() - x = centroid.GetX() - y = centroid.GetY() - - # Convert to pixel coordinates - px = int((x - gt[0]) / gt[1]) - py = int((y - gt[3]) / gt[5]) - - # Check bounds - if px < 0 or px >= raster_ds.RasterXSize or py < 0 or py >= raster_ds.RasterYSize: - continue - - # Read pixel value - try: - pixel_value = band.ReadAsArray(px, py, 1, 1) - if pixel_value is not None: - value = float(pixel_value[0, 0]) - # Skip nodata values - if nodata is not None and value == nodata: - continue - feature.SetField(sanitized_column, value) - layer.SetFeature(feature) + for fid, score in fid_scores.items(): + feature = layer.GetFeature(fid) + if feature: + feature.SetField(sanitized_column, score) + if layer.SetFeature(feature) == 0: updated_count += 1 - except (RuntimeError, ValueError, IndexError): - # Skip cells where pixel read fails (out of bounds, invalid data) - continue layer.CommitTransaction() except Exception as e: layer.RollbackTransaction() - log_message(f"Error writing values to grid: {e}", level=Qgis.Critical) + log_message(f"Error in spatial join: {e}", level=Qgis.Critical) + features_ds = None ds = None - raster_ds = None return -1 - # Reset filter + # Save intermediate buffers if requested + if save_buffers and workflow_directory: + buffer_output = os.path.join(workflow_directory, f"{features_layer}_buffers.gpkg") + try: + driver = ogr.GetDriverByName("GPKG") + if os.path.exists(buffer_output): + driver.DeleteDataSource(buffer_output) + buffer_ds = driver.CreateDataSource(buffer_output) + buffer_ds.CopyLayer(features_lyr, features_layer) + buffer_ds = None + log_message(f"Saved intermediate buffers to {buffer_output}") + except Exception as e: + log_message(f"Could not save intermediate buffers: {e}", level=Qgis.Warning) + + features_ds = None + ds.FlushCache() + ds = None + + log_message(f"Updated {updated_count} grid cells via spatial join for column {sanitized_column}") + return updated_count + + except Exception as e: + log_message(f"Error in write_spatial_join_to_grid: {e}", level=Qgis.Critical) + return -1 + + +def write_point_count_to_grid( + gpkg_path: str, + column_name: str, + features_gpkg: str, + features_layer: str, + area_name: Optional[str] = None, + count_to_score_mapping: Optional[Dict[int, float]] = None, + max_count_score: float = 5.0, +) -> int: + """Count points per grid cell and map counts to scores. + + This function counts point features within each grid cell and converts + the count to a score using the provided mapping or a default linear scale. + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + column_name: Name of the column to write values to. + features_gpkg: Path to the GeoPackage containing the point features. + features_layer: Name of the layer containing point features. + area_name: Optional area name to filter grid cells. + count_to_score_mapping: Dict mapping count values to scores. + Example: {0: 0, 1: 3, 2: 5} means 0 points = score 0, + 1 point = score 3, 2+ points = score 5. + If None, uses default {0: 0, 1: 3} with max as 5. + max_count_score: Score to assign when count exceeds all mappings. + + Returns: + Number of cells updated, or -1 on error. + """ + # Default mapping based on typical GeoE3 point per cell scoring + if count_to_score_mapping is None: + count_to_score_mapping = {0: 0, 1: 3} + + if not os.path.exists(gpkg_path): + log_message(f"GeoPackage not found: {gpkg_path}", level=Qgis.Warning) + return -1 + + if not os.path.exists(features_gpkg): + log_message(f"Features GeoPackage not found: {features_gpkg}", level=Qgis.Warning) + return -1 + + try: + # Open the main GeoPackage for updating + ds = ogr.Open(gpkg_path, 1) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return -1 + + layer, field_idx = _get_grid_layer_and_field_index(ds, column_name) + if layer is None or field_idx < 0: + ds = None + return -1 + + sanitized_column = _sanitize_column_name(column_name) + + # Open the features GeoPackage + features_ds = ogr.Open(features_gpkg, 0) + if not features_ds: + log_message(f"Could not open features GeoPackage: {features_gpkg}", level=Qgis.Critical) + ds = None + return -1 + + features_lyr = features_ds.GetLayerByName(features_layer) + if not features_lyr: + log_message(f"Features layer not found: {features_layer}", level=Qgis.Critical) + features_ds = None + ds = None + return -1 + + # Set attribute filter on grid layer + if area_name: + layer.SetAttributeFilter(f"area_name = '{area_name}'") + + # Get sorted mapping keys for lookup + sorted_counts = sorted(count_to_score_mapping.keys()) + + # First pass: collect FIDs and compute scores + fid_scores = {} + for grid_feature in layer: + grid_geom = grid_feature.GetGeometryRef() + if not grid_geom: + continue + + fid = grid_feature.GetFID() + + # Count intersecting features + features_lyr.SetSpatialFilter(grid_geom) + point_count = 0 + + for feat in features_lyr: + feat_geom = feat.GetGeometryRef() + if feat_geom and grid_geom.Intersects(feat_geom): + point_count += 1 + + features_lyr.SetSpatialFilter(None) + + # Map count to score + score = max_count_score # Default to max if count exceeds all mappings + for count_threshold in sorted_counts: + if point_count <= count_threshold: + score = count_to_score_mapping[count_threshold] + break + + # If point_count exceeds all thresholds, use max_count_score + if point_count > max(sorted_counts): + score = max_count_score + + fid_scores[fid] = score + + log_message(f"Found {len(fid_scores)} grid cells to update with point counts") + + # Reset filter before updating layer.SetAttributeFilter(None) + layer.ResetReading() + + # Second pass: update features by FID + updated_count = 0 + layer.StartTransaction() + + try: + for fid, score in fid_scores.items(): + feature = layer.GetFeature(fid) + if feature: + feature.SetField(sanitized_column, score) + if layer.SetFeature(feature) == 0: + updated_count += 1 + + layer.CommitTransaction() + + except Exception as e: + layer.RollbackTransaction() + log_message(f"Error in point count: {e}", level=Qgis.Critical) + features_ds = None + ds = None + return -1 + features_ds = None ds.FlushCache() ds = None - raster_ds = None - log_message(f"Updated {updated_count} grid cells for column {sanitized_column}") + log_message(f"Updated {updated_count} grid cells with point counts for column {sanitized_column}") return updated_count except Exception as e: - log_message(f"Error in write_raster_values_to_grid: {e}", level=Qgis.Critical) + log_message(f"Error in write_point_count_to_grid: {e}", level=Qgis.Critical) return -1 + + +def write_aggregation_to_grid( + gpkg_path: str, + target_column: str, + source_columns_weights: Dict[str, float], + area_name: Optional[str] = None, + use_coalesce: bool = True, +) -> int: + """Perform weighted aggregation of grid columns using SQL UPDATE. + + This replaces the raster-based QgsRasterCalculator approach for + factor, dimension, and analysis aggregations. + + Uses a single SQL UPDATE statement: + UPDATE study_area_grid SET target = (w1*COALESCE(c1,0) + w2*COALESCE(c2,0) + ...) + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + target_column: Name of the column to write aggregated values to. + source_columns_weights: Dict mapping source column names to their weights. + Example: {"indicator1": 0.3, "indicator2": 0.3, "indicator3": 0.4} + area_name: Optional area name to filter grid cells (not used - aggregates all). + use_coalesce: If True, use COALESCE(col, 0) to handle NULL values. + Defaults to True. + + Returns: + 0 on success, or -1 on error. + """ + _ = area_name # Not used - we aggregate all cells + + if not source_columns_weights: + log_message("No source columns provided for aggregation", level=Qgis.Warning) + return -1 + + if not os.path.exists(gpkg_path): + log_message(f"GeoPackage not found: {gpkg_path}", level=Qgis.Warning) + return -1 + + try: + ds = ogr.Open(gpkg_path, 1) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return -1 + + layer, field_idx = _get_grid_layer_and_field_index(ds, target_column) + if layer is None or field_idx < 0: + ds = None + return -1 + + sanitized_target = _sanitize_column_name(target_column) + + # Verify all source columns exist + layer_defn = layer.GetLayerDefn() + for source_col in source_columns_weights.keys(): + sanitized_source = _sanitize_column_name(source_col) + if layer_defn.GetFieldIndex(sanitized_source) < 0: + log_message(f"Source column {sanitized_source} not found in grid layer", level=Qgis.Warning) + ds = None + return -1 + + # Build the weighted sum expression + # Example: (0.3 * COALESCE("indicator1", 0) + 0.4 * COALESCE("indicator2", 0)) + terms = [] + for source_col, weight in source_columns_weights.items(): + sanitized_source = _sanitize_column_name(source_col) + if use_coalesce: + terms.append(f'({weight} * COALESCE("{sanitized_source}", 0))') + else: + terms.append(f'({weight} * "{sanitized_source}")') + + expression = " + ".join(terms) + + # Build and execute SQL UPDATE + sql = f'UPDATE study_area_grid SET "{sanitized_target}" = ({expression})' # nosec B608 + log_message(f"Executing aggregation SQL: {sql[:200]}...") + ds.ExecuteSQL(sql) # nosec B608 + ds = None + + log_message(f"Aggregated {len(source_columns_weights)} columns into {sanitized_target}") + return 0 + + except Exception as e: + log_message(f"Error in write_aggregation_to_grid: {e}", level=Qgis.Critical) + return -1 + + +def rasterize_grid_column( + gpkg_path: str, + column_name: str, + output_raster_path: str, + cell_size: float, + extent: Optional[Tuple[float, float, float, float]] = None, + nodata: float = -9999.0, + area_name: Optional[str] = None, + output_type: int = gdal.GDT_Float32, +) -> bool: + """Convert a grid column to a raster using gdal_rasterize. + + This function creates a raster from the study_area_grid layer, + burning values from the specified column into the output raster. + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + column_name: Name of the column to rasterize. + output_raster_path: Path for the output raster file. + cell_size: Cell size in map units (meters for projected CRS). + extent: Optional tuple of (xmin, ymin, xmax, ymax). If None, + computed from the grid layer extent. + nodata: NoData value for the output raster. Defaults to -9999. + area_name: Optional area name to filter grid cells. + output_type: GDAL data type for output. Defaults to GDT_Float32. + + Returns: + True if rasterization succeeded, False otherwise. + """ + if not os.path.exists(gpkg_path): + log_message(f"GeoPackage not found: {gpkg_path}", level=Qgis.Warning) + return False + + sanitized_column = _sanitize_column_name(column_name) + + # Build the layer specification with optional attribute filter + if area_name: + layer_spec = "study_area_grid" + where_clause = f"area_name = '{area_name}'" + else: + layer_spec = "study_area_grid" + where_clause = None + + try: + # Open the GeoPackage to get extent and CRS info + ds = ogr.Open(gpkg_path, 0) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return False + + layer = ds.GetLayerByName("study_area_grid") + if not layer: + log_message("study_area_grid layer not found", level=Qgis.Critical) + ds = None + return False + + # Apply filter to get correct extent + if area_name: + layer.SetAttributeFilter(f"area_name = '{area_name}'") + + # Get extent if not provided + if extent is None: + layer_extent = layer.GetExtent() + extent = (layer_extent[0], layer_extent[2], layer_extent[1], layer_extent[3]) + # extent is (xmin, ymin, xmax, ymax) + + # Get spatial reference + srs = layer.GetSpatialRef() + srs_wkt = srs.ExportToWkt() if srs else None + + # Reset filter + layer.SetAttributeFilter(None) + ds = None + + # Calculate raster dimensions + xmin, ymin, xmax, ymax = extent + width = int((xmax - xmin) / cell_size) + height = int((ymax - ymin) / cell_size) + + if width <= 0 or height <= 0: + log_message(f"Invalid raster dimensions: {width}x{height}", level=Qgis.Critical) + return False + + # Build gdal_rasterize options + rasterize_options = gdal.RasterizeOptions( + format="GTiff", + outputType=output_type, + width=width, + height=height, + outputBounds=[xmin, ymin, xmax, ymax], + noData=nodata, + initValues=[nodata], + attribute=sanitized_column, + layers=[layer_spec], + where=where_clause, + creationOptions=["COMPRESS=LZW", "TILED=YES"], + ) + + # Run rasterization + result = gdal.Rasterize( + output_raster_path, + gpkg_path, + options=rasterize_options, + ) + + if result is None: + log_message(f"gdal_rasterize failed for column {sanitized_column}", level=Qgis.Critical) + return False + + # Set spatial reference on output + if srs_wkt: + result.SetProjection(srs_wkt) + + # Ensure data is written + result.FlushCache() + result = None + + log_message(f"Rasterized column {sanitized_column} to {output_raster_path}") + return True + + except Exception as e: + log_message(f"Error in rasterize_grid_column: {e}", level=Qgis.Critical) + return False + + +def write_buffer_values_to_grid( + gpkg_path: str, + column_name: str, + buffer_layer: QgsVectorLayer, + value_field: str = "value", + aggregation_method: str = "MAX", + feedback: QgsFeedback = None, +) -> int: + """Write buffer polygon scores to intersecting grid cells. + + For each grid cell, finds intersecting buffer polygons and aggregates + their scores (using MAX by default) to determine the cell's value. + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + column_name: Name of the column to write values to. + buffer_layer: QgsVectorLayer containing buffer polygons with scores. + value_field: Name of the field containing scores in buffer_layer. + aggregation_method: How to combine multiple intersecting buffers + (MAX, MIN, AVG). Defaults to MAX. + feedback: Optional feedback for progress reporting. + + Returns: + Number of cells updated, or -1 on error. + """ + from qgis.core import QgsFeatureRequest, QgsSpatialIndex + + if not os.path.exists(gpkg_path): + log_message(f"GeoPackage not found: {gpkg_path}", level=Qgis.Warning) + return -1 + + if not buffer_layer or not buffer_layer.isValid(): + log_message("Invalid buffer layer provided", level=Qgis.Warning) + return -1 + + sanitized_column = _sanitize_column_name(column_name) + + try: + # Load grid layer + grid_layer = QgsVectorLayer(f"{gpkg_path}|layername=study_area_grid", "grid", "ogr") + if not grid_layer.isValid(): + log_message("Could not load study_area_grid layer", level=Qgis.Critical) + return -1 + + # Create spatial index for buffer layer + buffer_index = QgsSpatialIndex(buffer_layer.getFeatures()) + + # Collect scores per grid cell + grid_scores = {} + total_features = grid_layer.featureCount() + log_message(f"Processing {total_features} grid cells against buffer layer") + + for i, grid_feature in enumerate(grid_layer.getFeatures()): + grid_geom = grid_feature.geometry() + if grid_geom.isEmpty(): + continue + + grid_fid = grid_feature.id() + + # Find intersecting buffer polygons + candidate_ids = buffer_index.intersects(grid_geom.boundingBox()) + if not candidate_ids: + continue + + # Get actual intersecting features and their scores + scores = [] + request = QgsFeatureRequest().setFilterFids(candidate_ids) + for buffer_feature in buffer_layer.getFeatures(request): + buffer_geom = buffer_feature.geometry() + if buffer_geom.intersects(grid_geom): + score = buffer_feature.attribute(value_field) + if score is not None: + scores.append(float(score)) + + # Aggregate scores + if scores: + if aggregation_method == "MAX": + final_score = max(scores) + elif aggregation_method == "MIN": + final_score = min(scores) + elif aggregation_method == "AVG": + final_score = sum(scores) / len(scores) + else: + final_score = max(scores) + + grid_scores[grid_fid] = final_score + + if feedback and i % 1000 == 0: + feedback.setProgress((i / total_features) * 50) + + log_message(f"Found {len(grid_scores)} grid cells with intersecting buffers") + + # Update grid using SQL batched updates + ds = ogr.Open(gpkg_path, 1) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return -1 + + updated_count = 0 + batch_size = 500 + fids = list(grid_scores.keys()) + + for batch_start in range(0, len(fids), batch_size): + batch_fids = fids[batch_start : batch_start + batch_size] + + # Build CASE statement for this batch + case_parts = [] + for fid in batch_fids: + score = grid_scores[fid] + case_parts.append(f"WHEN fid = {fid} THEN {score}") + + if case_parts: + fid_list = ",".join(str(f) for f in batch_fids) + sql = ( + f"UPDATE study_area_grid " # nosec B608 + f'SET "{sanitized_column}" = CASE {" ".join(case_parts)} END ' + f"WHERE fid IN ({fid_list})" + ) + ds.ExecuteSQL(sql) + updated_count += len(batch_fids) + + if feedback: + progress = 50 + (batch_start / max(len(fids), 1)) * 50 + feedback.setProgress(progress) + + ds = None + log_message(f"Updated {updated_count} grid cells with buffer scores") + return updated_count + + except Exception as e: + log_message(f"Error in write_buffer_values_to_grid: {e}", level=Qgis.Critical) + return -1 + + +def get_grid_column_statistics( + gpkg_path: str, + column_name: str, + area_name: Optional[str] = None, +) -> Dict[str, Any]: + """Calculate statistics for a grid column. + + Args: + gpkg_path: Path to the GeoPackage containing study_area_grid. + column_name: Name of the column to analyze. + area_name: Optional area name to filter grid cells. + + Returns: + Dict with keys: count, min, max, mean, sum, null_count. + Returns empty dict on error. + """ + if not os.path.exists(gpkg_path): + log_message(f"GeoPackage not found: {gpkg_path}", level=Qgis.Warning) + return {} + + try: + ds = ogr.Open(gpkg_path, 0) + if not ds: + log_message(f"Could not open GeoPackage: {gpkg_path}", level=Qgis.Critical) + return {} + + layer, field_idx = _get_grid_layer_and_field_index(ds, column_name) + if layer is None or field_idx < 0: + ds = None + return {} + + sanitized_column = _sanitize_column_name(column_name) + + # Set attribute filter + if area_name: + layer.SetAttributeFilter(f"area_name = '{area_name}'") + + # Calculate statistics + values = [] + null_count = 0 + + for feature in layer: + value = feature.GetField(sanitized_column) + if value is None: + null_count += 1 + else: + values.append(float(value)) + + layer.SetAttributeFilter(None) + ds = None + + if not values: + return { + "count": 0, + "min": None, + "max": None, + "mean": None, + "sum": None, + "null_count": null_count, + } + + return { + "count": len(values), + "min": min(values), + "max": max(values), + "mean": sum(values) / len(values), + "sum": sum(values), + "null_count": null_count, + } + + except Exception as e: + log_message(f"Error in get_grid_column_statistics: {e}", level=Qgis.Critical) + return {} diff --git a/geest/core/tasks/study_area_processing_task.py b/geest/core/tasks/study_area_processing_task.py index f7589062..75eac6d7 100644 --- a/geest/core/tasks/study_area_processing_task.py +++ b/geest/core/tasks/study_area_processing_task.py @@ -11,12 +11,6 @@ import time import traceback -from geoe3.core.algorithms import GHSLDownloader, GHSLProcessor -from geoe3.core.grid_column_utils import add_model_columns_to_grid -from geoe3.core.h3_utils import get_h3_resolution_for_scale -from geoe3.core.settings import setting -from geoe3.utilities import calculate_utm_zone, log_message - # GDAL / OGR / OSR imports from osgeo import gdal, ogr, osr from qgis.core import ( @@ -36,6 +30,12 @@ pyqtSignal, ) +from geest.core.algorithms import GHSLDownloader, GHSLProcessor +from geest.core.grid_column_utils import add_model_columns_to_grid +from geest.core.h3_utils import get_h3_resolution_for_scale +from geest.core.settings import setting +from geest.utilities import calculate_utm_zone, log_message + from .grid_chunker_task import GridChunkerTask from .grid_from_bbox_h3_task import GridFromBboxH3Task from .grid_from_bbox_task import GridFromBboxTask diff --git a/geest/core/workflows/acled_impact_workflow.py b/geest/core/workflows/acled_impact_workflow.py index 560afe20..111f496a 100644 --- a/geest/core/workflows/acled_impact_workflow.py +++ b/geest/core/workflows/acled_impact_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 Acled Impact Workflow module. - This module contains functionality for acled impact workflow. """ - import csv import os @@ -80,33 +78,28 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """ Executes the actual workflow logic for a single area Must be implemented by subclasses. - :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. - :return: Raster file path of the output. """ - # Step 1: Buffer the selected features by relevant # distance for each event type and assign values # to the buffer layer buffered_layer = self._buffer_features(area_features) self.feedback.setProgress(10.0) - # Step 2: Assign values based on event_type # scored_layer = self._assign_scores(buffered_layer) self.feedback.setProgress(40.0) - # Step 3: Dissolve and remove overlapping areas, keeping areas with the lowest value dissolved_layer = self._overlay_analysis(buffered_layer) self.feedback.setProgress(60.0) - # Step 4: Rasterize the dissolved layer raster_output = self._rasterize( dissolved_layer, @@ -116,7 +109,6 @@ def _process_features_for_area( default_value=5, ) self.feedback.setProgress(80.0) - return raster_output def _load_csv_as_point_layer(self) -> QgsVectorLayer: @@ -124,7 +116,6 @@ def _load_csv_as_point_layer(self) -> QgsVectorLayer: Load the CSV file, extract relevant columns (latitude, longitude, event_type), create a point layer from the retained columns, reproject the points to match the CRS of the layers from the GeoPackage, and save the result as a shapefile. - Returns: QgsVectorLayer: The reprojected point layer created from the CSV. """ @@ -132,20 +123,17 @@ def _load_csv_as_point_layer(self) -> QgsVectorLayer: # Set up a coordinate transform from WGS84 to the target CRS transform_context = self.context.project().transformContext() coordinate_transform = QgsCoordinateTransform(source_crs, self.target_crs, transform_context) - # Define fields for the point layer fields = QgsFields() fields.append(QgsField("event_type", QVariant.String)) fields.append(QgsField("value", QVariant.Int)) fields.append(QgsField("buffer_m", QVariant.Int)) fields.append(QgsField("score", QVariant.Int)) - # Create an in-memory point layer in the target CRS point_layer = QgsVectorLayer(f"Point?crs={self.target_crs.authid()}", "acled_points", "memory") point_provider = point_layer.dataProvider() point_provider.addAttributes(fields) # type: ignore point_layer.updateFields() - # Read the CSV and add reprojected points to the layer with open(self.csv_file, newline="", encoding="utf-8") as csvfile: reader = csv.DictReader(csvfile) @@ -154,11 +142,9 @@ def _load_csv_as_point_layer(self) -> QgsVectorLayer: lat = float(row["latitude"]) lon = float(row["longitude"]) event_type = row["event_type"] - # Transform point to the target CRS point_wgs84 = QgsPointXY(lon, lat) point_transformed = coordinate_transform.transform(point_wgs84) - feature = QgsFeature() feature.setGeometry(QgsGeometry.fromPointXY(point_transformed)) value = self.event_scores.get(event_type, 5) @@ -166,7 +152,6 @@ def _load_csv_as_point_layer(self) -> QgsVectorLayer: score = 0 # this will be replaced later with the lowest overlapping score feature.setAttributes([event_type, value, buffer_m, score]) features.append(feature) - point_provider.addFeatures(features) # type: ignore log_message(f"Loaded {len(features)} points from CSV") # Save the layer to disk as a shapefile @@ -178,32 +163,26 @@ def _load_csv_as_point_layer(self) -> QgsVectorLayer: error = QgsVectorFileWriter.writeAsVectorFormat( point_layer, shapefile_path, "utf-8", self.target_crs, "ESRI Shapefile" ) - if error[0] != 0: raise QgsProcessingException(f"Error saving point layer to disk: {error[1]}") - log_message( f"Point layer created from CSV saved to {shapefile_path}", tag="GeoE3", level=Qgis.Info, ) - # Reload the saved shapefile as the final point layer to ensure consistency saved_layer = QgsVectorLayer(shapefile_path, "acled_points", "ogr") if not saved_layer.isValid(): raise QgsProcessingException(f"Failed to reload saved point layer from {shapefile_path}") - return saved_layer def _buffer_features(self, layer: QgsVectorLayer) -> QgsVectorLayer: """ Buffer the input features by 5 km. - Args: layer (QgsVectorLayer): The input feature layer. This layer should be a point layer with two columns: value and buffer_m representing the geoe3 score for the event and the distance to buffer in m. - Returns: QgsVectorLayer: The buffered features layer. """ @@ -245,7 +224,6 @@ def _buffer_features(self, layer: QgsVectorLayer) -> QgsVectorLayer: }, )["OUTPUT"] del subset_layer - output_name = f"{self.layer_id}_buffered" output_path = os.path.join(self.workflow_directory, f"{output_name}.shp") log_message(f"Writing buffered layer to {output_path}") @@ -264,58 +242,47 @@ def _overlay_analysis(self, input_layer): """ Perform an overlay analysis on a set of circular polygons, prioritizing areas with the lowest value in overlapping regions, and save the result as a shapefile. - This function processes an input shapefile containing circular polygons, each with a value between 1 and 4, representing different priority levels. The function performs an overlay analysis where the polygons overlap and ensures that for any overlapping areas, the polygon with the lowest value (i.e., highest priority) is retained, while polygons with higher values are removed from those regions. - The analysis is performed as follows: 1. The input layer is loaded from the provided shapefile path. 2. A dissolve operation is performed on the input layer to combine any adjacent polygons with the same value. 3. A union operation is performed on the input layer to break the polygons into distinct, non-overlapping areas. 4. For each distinct area, the value from the overlapping polygons is compared, and the minimum value (representing the highest priority) is assigned to that area. 5. The resulting dataset, which consists of non-overlapping polygons with the highest priority (smallest value), is saved to a new shapefile at the specified output path. - Parameters: ----------- input_layer : QgsVectorLayer The input shapefile containing the circular polygons with values between 1 and 4. - output_filepath : str The file path where the output shapefile with the results of the overlay analysis will be saved. The output will be saved in self.workflow_directory. - Returns: -------- None The function does not return a value but writes the result to the specified output shapefile. - Logging: -------- Messages related to the status of the operation (success or failure) are logged using QgsMessageLog with the tag 'GeoE3' and the log level set to Qgis.Info. - Raises: ------- IOError: If the input layer cannot be loaded or if an error occurs during the file writing process. - Example: -------- To perform an overlay analysis on a shapefile located at "path/to/input.shp" and save the result to "path/to/output.shp", use the following: - overlay_analysis(qgis_vector_layer) """ log_message("Overlay analysis started") # Step 1: Load the input layer from the provided shapefile path # layer = QgsVectorLayer(input_filepath, "circles_layer", "ogr") - if not input_layer.isValid(): log_message("Layer failed to load!") return - # Step 2: Perform the dissolve operation to separate disjoint polygons dissolve_output_path = os.path.join(self.workflow_directory, f"{self.layer_id}_dissolve.shp") dissolve = processing.run( # type: ignore[index] @@ -350,16 +317,13 @@ def _overlay_analysis(self, input_layer): log_message(f"Input layer field types: {[field.typeName() for field in union.fields()]}") # Step 4: Iterate through the unioned features to assign the minimum value in overlapping areas unique_geometries = {} - for feature in union.getFeatures(): geom = feature.geometry().asWkt() attrs = feature.attributes() # Use geometry as a key to identify unique areas value = attrs[union.fields().indexFromName("value")] - log_message( f"Processing feature with min value: {value}", ) - # Check if this geometry is already in the dictionary if geom in unique_geometries: # If it exists, update only if the new min_value is lower @@ -371,19 +335,16 @@ def _overlay_analysis(self, input_layer): new_feature.setGeometry(feature.geometry()) new_feature.setAttributes([value]) unique_geometries[geom] = new_feature - # Step 5: Create a memory layer to store the result result_layer = QgsVectorLayer("Polygon", "result_layer", "memory") result_layer.setCrs(self.target_crs) provider = result_layer.dataProvider() - # Step 6: Add a field to store the minimum value (lower number = higher rank) provider.addAttributes([QgsField("min_value", QVariant.Int)]) result_layer.updateFields() # Step 7: Add the filtered features to the result layer for unique_feature in unique_geometries.values(): provider.addFeature(unique_feature) - full_output_filepath = os.path.join(self.workflow_directory, f"{self.layer_id}_final.shp") # Step 8: Save the result layer to the specified output shapefile error = QgsVectorFileWriter.writeAsVectorFormat( @@ -393,7 +354,6 @@ def _overlay_analysis(self, input_layer): result_layer.crs(), "ESRI Shapefile", ) - if error[0] == 0: log_message( f"Overlay analysis complete, output saved to {full_output_filepath}", @@ -412,16 +372,15 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - :current_area: Current polygon from our study area. :clip_area: Polygon to clip the raster to which is aligned to cell edges. :current_bbox: Bounding box of the above area. :area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. :index: Index of the current area. - :return: Path to the reclassified raster. """ pass @@ -432,6 +391,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/aggregation_workflow_base.py b/geest/core/workflows/aggregation_workflow_base.py index c13666b3..1dc5a8b2 100644 --- a/geest/core/workflows/aggregation_workflow_base.py +++ b/geest/core/workflows/aggregation_workflow_base.py @@ -2,9 +2,14 @@ """📦 Aggregation Workflow Base module. This module contains functionality for aggregation workflow base. + +Supports both raster-first (legacy) and grid-first aggregation approaches. +The grid-first approach writes aggregated values directly to study_area_grid +columns, then optionally rasterizes from the grid. """ import os +from typing import Dict, Optional from qgis.analysis import QgsRasterCalculator, QgsRasterCalculatorEntry from qgis.core import ( @@ -16,6 +21,10 @@ ) from geest.core import JsonTreeItem +from geest.core.grid_column_utils import ( + rasterize_grid_column, + write_aggregation_to_grid, +) from geest.utilities import log_message from .workflow_base import WorkflowBase @@ -51,6 +60,9 @@ def __init__( self.id = None # This should be set by the child class self.weight_key = None # This should be set by the child class self.aggregation = True + # Grid-first mode: write results to grid columns first, then rasterize + # Set to True to use the new grid-first approach + self.use_grid_first = True self.feedback.setProgress(10.0) def aggregate(self, input_files: list, index: int) -> str: @@ -236,26 +248,253 @@ def get_raster_dict(self, index) -> list: ) return raster_files + def get_grid_columns_and_weights(self) -> Dict[str, float]: + """Get the list of grid columns and weights for aggregation. + + This is the grid-first alternative to get_raster_dict(). Instead of + returning raster file paths, it returns column names from study_area_grid. + + Returns: + Dict mapping column names to their weights. + Example: {"indicator1": 0.3, "indicator2": 0.3, "indicator3": 0.4} + """ + columns_weights = {} + if self.guids is None: + raise ValueError("No GUIDs provided for aggregation") + + for guid in self.guids: + item = self.item.getItemByGuid(guid) + status = item.getStatus() == "Completed successfully" + mode = item.attributes().get("analysis_mode", "Do Not Use") == "Do Not Use" + excluded = item.getStatus() == "Excluded from analysis" + disabled = not item.is_enabled() + raw_id = item.attribute("id").lower().replace(" ", "_").replace("-", "_") + # Add prefix based on item role to match column naming + item_role = item.role if hasattr(item, "role") else "" + if item_role == "dimension": + item_id = f"dim_{raw_id}" + elif item_role == "factor": + item_id = f"fac_{raw_id}" + else: + item_id = raw_id # indicators keep raw ID + + if not status and not mode and not excluded and not disabled: + raise ValueError( + f"{item_id} is not completed successfully and is not set to 'Do Not Use' or 'Excluded from analysis'" + ) + + if mode: + log_message( + f"Skipping {item.attribute('id')} as it is set to 'Do Not Use'", + tag="GeoE3", + level=Qgis.Info, + ) + continue + if excluded: + log_message( + f"Skipping {item.attribute('id')} as it is excluded from analysis", + tag="GeoE3", + level=Qgis.Info, + ) + continue + if disabled: + log_message( + f"Skipping {item.attribute('id')} as it is disabled (women considerations)", + tag="GeoE3", + level=Qgis.Info, + ) + continue + + # Get weight for this item + weight = item.attribute(self.weight_key, "") + try: + weight = float(weight) + except (ValueError, TypeError): + weight = 1.0 # Default fallback to 1.0 if weight is invalid + + # Column name is the sanitized item ID + column_name = item_id[:63] # Match sanitization in grid_column_utils + columns_weights[column_name] = weight + + log_message(f"Adding column: {column_name} with weight: {weight}") + + log_message( + f"Total columns found for aggregation: {len(columns_weights)}", + tag="GeoE3", + level=Qgis.Info, + ) + return columns_weights + + def aggregate_grid(self, area_name: str) -> int: + """Perform weighted aggregation directly on grid columns. + + This is the grid-first alternative to aggregate(). Instead of using + QgsRasterCalculator on raster files, it uses SQL to aggregate values + directly in the study_area_grid table. + + Args: + area_name: The name of the area being processed. + + Returns: + Number of cells updated, or -1 on error. + """ + columns_weights = self.get_grid_columns_and_weights() + + if not columns_weights: + log_message( + "Error: Found no columns to aggregate.", + tag="GeoE3", + level=Qgis.Warning, + ) + return -1 + + log_message( + f"Aggregating {len(columns_weights)} columns into {self.layer_id} for area {area_name}", + tag="GeoE3", + level=Qgis.Info, + ) + + # Use the grid-first aggregation function + updated_count = write_aggregation_to_grid( + gpkg_path=self.gpkg_path, + target_column=self.layer_id, + source_columns_weights=columns_weights, + area_name=area_name, + use_coalesce=True, + ) + + if updated_count >= 0: + log_message( + f"Grid aggregation completed: updated {updated_count} cells for {self.layer_id}", + tag="GeoE3", + level=Qgis.Info, + ) + else: + log_message( + f"Grid aggregation failed for {self.layer_id}", + tag="GeoE3", + level=Qgis.Warning, + ) + + return updated_count + + def rasterize_from_grid( + self, + area_name: str, + bbox: QgsGeometry, + index: int, + ) -> Optional[str]: + """Rasterize the grid column to create a raster output. + + This creates a raster from the aggregated grid column using gdal_rasterize. + + Args: + area_name: The name of the area being processed. + bbox: Bounding box geometry for the output raster extent. + index: The index of the area being processed. + + Returns: + Path to the output raster, or None on error. + """ + output_path = os.path.join( + self.workflow_directory, + f"{self.layer_id}_aggregated_{index}.tif", + ) + + # Get extent from bbox + rect = bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + success = rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=-9999.0, + area_name=area_name, + ) + + if success: + log_message( + f"Rasterized grid column {self.layer_id} to {output_path}", + tag="GeoE3", + level=Qgis.Info, + ) + # Write the output path to attributes + self.attributes[self.result_file_key] = output_path + return output_path + else: + log_message( + f"Failed to rasterize grid column {self.layer_id}", + tag="GeoE3", + level=Qgis.Warning, + ) + return None + def _process_aggregate_for_area( self, current_area: QgsGeometry, clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: Optional[str] = None, ): - """ - Executes the workflow, reporting progress through the feedback object and checking for cancellation. - """ - _ = current_area # Unused in this analysis - _ = clip_area # Unused in this analysis - _ = current_bbox # Unused in this analysis + """Execute aggregation workflow for a single area. + + Supports both raster-first (legacy) and grid-first aggregation modes. + The mode is controlled by self.use_grid_first flag. + + Args: + current_area: Current polygon from our study area. + clip_area: Polygon to clip the raster to which is aligned to cell edges. + current_bbox: Bounding box of the above area. + index: Index of the current area. + area_name: Name of the area being processed (for grid-first mode). + Returns: + Path to the aggregated raster file, or None on error. + """ # Log the execution log_message( - f"Executing {self.analysis_mode} Aggregation Workflow", + f"Executing {self.analysis_mode} Aggregation Workflow (grid_first={self.use_grid_first})", tag="GeoE3", level=Qgis.Info, ) + + if self.use_grid_first: + # Grid-first mode: aggregate directly in grid columns + return self._process_aggregate_grid_first( + current_area=current_area, + clip_area=clip_area, + current_bbox=current_bbox, + index=index, + area_name=area_name, + ) + else: + # Legacy raster-first mode + return self._process_aggregate_raster_first( + current_area=current_area, + clip_area=clip_area, + current_bbox=current_bbox, + index=index, + ) + + def _process_aggregate_raster_first( + self, + current_area: QgsGeometry, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + index: int, + ) -> Optional[str]: + """Legacy raster-first aggregation. + + Uses QgsRasterCalculator to aggregate raster files. + """ + _ = current_area # Unused + _ = clip_area # Unused + _ = current_bbox # Unused + raster_files = self.get_raster_dict(index) if not raster_files or not isinstance(raster_files, dict): @@ -270,13 +509,77 @@ def _process_aggregate_for_area( return None log_message( - f"Found {len(raster_files)} raster files in 'Result File'. Proceeding with aggregation.", + f"Found {len(raster_files)} raster files in 'Result File'. Proceeding with raster aggregation.", tag="GeoE3", level=Qgis.Info, ) - # Perform aggregation only if raster files are provided + # Perform aggregation using raster calculator result_file = self.aggregate(raster_files, index) + return result_file + + def _process_aggregate_grid_first( + self, + current_area: QgsGeometry, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + index: int, + area_name: Optional[str] = None, + ) -> Optional[str]: + """Grid-first aggregation. + + Aggregates values directly in grid columns using SQL, then + optionally rasterizes from the grid. + """ + _ = current_area # Unused + + # Step 1: Aggregate grid columns + try: + columns_weights = self.get_grid_columns_and_weights() + except ValueError as e: + error = str(e) + log_message( + error, + tag="GeoE3", + level=Qgis.Warning, + ) + self.attributes[self.result_key] = f"{self.analysis_mode} Aggregation Workflow Skipped" + self.attributes["error"] = error + return None + + if not columns_weights: + error = "No valid columns found for aggregation. Cannot proceed (likely all factors disabled or excluded)." + log_message( + error, + tag="GeoE3", + level=Qgis.Warning, + ) + self.attributes[self.result_key] = f"{self.analysis_mode} Aggregation Workflow Skipped" + self.attributes["error"] = error + return None + + log_message( + f"Found {len(columns_weights)} columns for grid aggregation: {list(columns_weights.keys())}", + tag="GeoE3", + level=Qgis.Info, + ) + + # Perform SQL aggregation on grid + updated_count = self.aggregate_grid(area_name) + if updated_count < 0: + log_message( + f"Grid aggregation failed for area {area_name}", + tag="GeoE3", + level=Qgis.Warning, + ) + return None + + # Step 2: Rasterize from grid column for VRT generation + result_file = self.rasterize_from_grid( + area_name=area_name, + bbox=current_bbox, + index=index, + ) return result_file diff --git a/geest/core/workflows/classified_polygon_workflow.py b/geest/core/workflows/classified_polygon_workflow.py index 5aaf26c0..62fa1f91 100644 --- a/geest/core/workflows/classified_polygon_workflow.py +++ b/geest/core/workflows/classified_polygon_workflow.py @@ -1,10 +1,16 @@ # -*- coding: utf-8 -*- -from urllib.parse import unquote - """📦 Classified Polygon Workflow module. This module contains functionality for classified polygon workflow. + +Supports grid-first mode where polygon classification scores are written +directly to the study_area_grid column, then rasterized. """ + +import os +from typing import Optional +from urllib.parse import unquote + from qgis.core import ( Qgis, QgsFeedback, @@ -17,6 +23,11 @@ from qgis.PyQt.QtCore import QVariant from geest.core import JsonTreeItem +from geest.core.grid_column_utils import ( + clear_grid_column, + rasterize_grid_column, + write_buffer_values_to_grid, +) from geest.utilities import log_message from .workflow_base import WorkflowBase @@ -38,6 +49,7 @@ def __init__( ): """ Initialize the workflow with attributes and feedback. + :param attributes: Item containing workflow parameters. :param feedback: QgsFeedback object for progress reporting and cancellation. :context: QgsProcessingContext object for processing. This can be used to pass objects to the thread. e.g. the QgsProject Instance @@ -50,7 +62,6 @@ def __init__( layer_path = self.attributes.get("classify_polygon_into_classes_shapefile", None) if layer_path: layer_path = unquote(layer_path) - if not layer_path: log_message( "Invalid layer found in use_classify_polygon_into_classes_shapefile, trying use_classify_polygon_into_classes_source.", @@ -65,10 +76,13 @@ def __init__( level=Qgis.Warning, ) return False - self.features_layer = QgsVectorLayer(layer_path, "features_layer", "ogr") - self.selected_field = self.attributes.get("classify_polygon_into_classes_selected_field", "") + self.workflow_name = "classified_polygon" + # Grid-first mode: write results to grid columns first, then rasterize + self.use_grid_first = True + # Track if we've cleared the column (only do once, not per area) + self._column_cleared = False def _process_features_for_area( self, @@ -77,17 +91,21 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: Optional[str] = None, ) -> str: """ - Executes the actual workflow logic for a single area - Must be implemented by subclasses. + Executes the actual workflow logic for a single area. + + Supports grid-first mode where classification scores are written + directly to study_area_grid. :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. + :area_name: Name of the area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. + :return: A raster layer file path if processing completes successfully. """ area_features_count = area_features.featureCount() log_message( @@ -95,9 +113,30 @@ def _process_features_for_area( tag="GeoE3", level=Qgis.Info, ) + + if self.use_grid_first: + return self._process_grid_first( + current_bbox=current_bbox, + area_features=area_features, + index=index, + area_name=area_name, + ) + else: + return self._process_raster_first( + current_bbox=current_bbox, + area_features=area_features, + index=index, + ) + + def _process_raster_first( + self, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + ) -> str: + """Legacy raster-first processing.""" # Step 1: Assign reclassification values based on perceived safety reclassified_layer = self._assign_reclassification_to_safety(area_features) - # Step 2: Rasterize the data raster_output = self._rasterize( reclassified_layer, @@ -108,6 +147,64 @@ def _process_features_for_area( ) return raster_output + def _process_grid_first( + self, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + area_name: str, + ) -> str: + """Grid-first processing - writes classification scores directly to study_area_grid.""" + # Clear column once at the start (not per area) + if not self._column_cleared: + log_message(f"Clearing column {self.layer_id} before processing") + clear_grid_column(self.gpkg_path, self.layer_id) + self._column_cleared = True + + self.progressChanged.emit(10.0) + + # Step 1: Assign reclassification values (scale 0-100 to 0-5) + log_message(f"Scaling classification values for {area_features.featureCount()} polygons") + reclassified_layer = self._assign_reclassification_to_safety(area_features) + + self.progressChanged.emit(40.0) + + # Step 2: Write polygon scores to grid cells + log_message(f"Writing classification scores to grid column {self.layer_id}") + write_buffer_values_to_grid( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + buffer_layer=reclassified_layer, + value_field="value", + aggregation_method="MAX", + feedback=self.feedback, + ) + + self.progressChanged.emit(70.0) + + # Step 3: Rasterize from grid column + output_path = os.path.join( + self.workflow_directory, + f"{self.layer_id}_{index}.tif", + ) + + rect = current_bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=-9999.0, + area_name=area_name, + ) + + self.progressChanged.emit(100.0) + log_message(f"Rasterized grid column to {output_path}") + return output_path + def _assign_reclassification_to_safety(self, layer: QgsVectorLayer) -> QgsVectorLayer: """ Assign reclassification values to polygons based on thresholds. @@ -157,6 +254,7 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. @@ -177,6 +275,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/contextual_index_score_workflow.py b/geest/core/workflows/contextual_index_score_workflow.py index 6292611d..96b733e1 100644 --- a/geest/core/workflows/contextual_index_score_workflow.py +++ b/geest/core/workflows/contextual_index_score_workflow.py @@ -1,10 +1,12 @@ # -*- coding: utf-8 -*- - """ Specialised index score workflow for use in contextual dimensions. -""" +Supports grid-first mode where the index score is written directly to +the study_area_grid column, then optionally rasterized. +""" import os +from typing import Optional from qgis import processing # noqa: F401 # QGIS processing toolbox from qgis.core import ( # noqa: F401 @@ -20,6 +22,10 @@ from qgis.PyQt.QtCore import QVariant from geest.core import JsonTreeItem # noqa: unused F401 +from geest.core.grid_column_utils import ( + rasterize_grid_column, + write_uniform_value_to_grid, +) from geest.utilities import log_message from .contextual_index_score_mappings import score_mapping @@ -52,11 +58,9 @@ def __init__( super().__init__( item, cell_size_m, analysis_scale, feedback, context, working_directory ) # ⭐️ Item is a reference - whatever you change in this item will directly update the tree - index_score = self.attributes.get("index_score", 0) log_message(f"Index score before rescaling to contextual scale: {index_score}") # Define mapping rules as (min_score, output_score) pairs - # Find the highest threshold less than or equal to index_score for threshold in sorted(score_mapping.keys(), reverse=True): if index_score >= threshold: @@ -68,6 +72,8 @@ def __init__( True # Normally we would set this to a QgsVectorLayer but in this workflow it is not needed ) self.workflow_name = "contextual_index_score" + # Grid-first mode: write results to grid columns first, then rasterize + self.use_grid_first = True def _process_features_for_area( self, @@ -76,31 +82,50 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: Optional[str] = None, ) -> str: """ - Executes the actual workflow logic for a single area - Must be implemented by subclasses. + Executes the actual workflow logic for a single area. + + Supports both raster-first (legacy) and grid-first modes. :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. - :area_features: A vector layer of features to analyse that includes only features in the study area. + :area_features: A vector layer of features to analyse (unused for index_score). :index: Iteration / number of area being processed. - + :area_name: Name of the area being processed (for grid-first mode). :return: Raster file path of the output. """ _ = area_features # unused - log_message(f"Processing area {index} score workflow") - + log_message(f"Processing area {index} contextual score workflow (grid_first={self.use_grid_first})") log_message(f"Index score: {self.index_score}") - self.progressChanged.emit(10.0) # We just use nominal intervals for progress updates - - # Create a scored boundary layer filtered by current_area + self.progressChanged.emit(10.0) + + if self.use_grid_first and area_name: + return self._process_grid_first( + current_bbox=current_bbox, + index=index, + area_name=area_name, + ) + else: + return self._process_raster_first( + clip_area=clip_area, + current_bbox=current_bbox, + index=index, + ) + + def _process_raster_first( + self, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + index: int, + ) -> str: + """Legacy raster-first processing.""" scored_layer = self.create_scored_boundary_layer( clip_area=clip_area, index=index, ) - self.progressChanged.emit(30.0) # We just use nominal intervals for progress updates - # Create a scored boundary layer + self.progressChanged.emit(30.0) raster_output = self._rasterize( scored_layer, current_bbox, @@ -108,21 +133,59 @@ def _process_features_for_area( value_field="score", default_value=255, ) - self.progressChanged.emit(100.0) # We just use nominal intervals for progress updates - + self.progressChanged.emit(100.0) log_message(f"Raster output: {raster_output}") log_message(f"Workflow completed for area {index}") return raster_output + def _process_grid_first( + self, + current_bbox: QgsGeometry, + index: int, + area_name: str, + ) -> str: + """Grid-first processing - writes to grid column, then rasterizes.""" + self.progressChanged.emit(20.0) + log_message(f"Writing contextual index score {self.index_score} to grid column {self.layer_id}") + + write_uniform_value_to_grid( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + value=self.index_score, + ) + + self.progressChanged.emit(50.0) + + # Rasterize from grid column + output_path = os.path.join( + self.workflow_directory, + f"{self.layer_id}_{index}.tif", + ) + + rect = current_bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=-9999.0, + area_name=area_name, + ) + + self.progressChanged.emit(100.0) + log_message(f"Rasterized grid column to {output_path}") + return output_path + def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> QgsVectorLayer: """ Create a scored boundary layer, filtering features by the current_area. - :param index: The index of the current processing area. :return: A vector layer with a 'score' attribute. """ output_prefix = f"{self.layer_id}_area_{index}" - self.progressChanged.emit(20.0) # We just use nominal intervals for progress updates # Create a new memory layer with the target CRS (EPSG:4326) subset_layer = QgsVectorLayer("Polygon", "subset", "memory") @@ -134,7 +197,6 @@ def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> Qg subset_layer_data.addAttributes(fields) subset_layer.updateFields() self.progressChanged.emit(40.0) # We just use nominal intervals for progress updates - feature = QgsFeature(subset_layer.fields()) feature.setGeometry(clip_area) score_field_index = subset_layer.fields().indexFromName("score") @@ -144,7 +206,6 @@ def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> Qg subset_layer_data.addFeatures(features) subset_layer.commitChanges() self.progressChanged.emit(60.0) # We just use nominal intervals for progress updates - shapefile_path = os.path.join(self.workflow_directory, f"{output_prefix}.shp") # Use QgsVectorFileWriter to save the layer to a shapefile QgsVectorFileWriter.writeAsVectorFormat( @@ -156,7 +217,6 @@ def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> Qg ) layer = QgsVectorLayer(shapefile_path, "area_layer", "ogr") self.progressChanged.emit(80.0) # We just use nominal intervals for progress updates - return layer # Default implementation of the abstract method - not used in this workflow @@ -167,16 +227,15 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - :current_area: Current polygon from our study area. :clip_area: Polygon to clip the raster to which is aligned to cell edges. :current_bbox: Bounding box of the above area. :area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. :index: Index of the current area. - :return: Path to the reclassified raster. """ pass @@ -187,6 +246,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/dont_use_workflow.py b/geest/core/workflows/dont_use_workflow.py index f7f13dad..ae030226 100644 --- a/geest/core/workflows/dont_use_workflow.py +++ b/geest/core/workflows/dont_use_workflow.py @@ -53,6 +53,7 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. @@ -73,6 +74,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/eplex_workflow.py b/geest/core/workflows/eplex_workflow.py index 0a2b13eb..26733ff9 100644 --- a/geest/core/workflows/eplex_workflow.py +++ b/geest/core/workflows/eplex_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 EPLEX Workflow module. - This module contains functionality for EPLEX score workflow. """ - import os from qgis.core import ( @@ -27,7 +25,6 @@ class EPLEXWorkflow(WorkflowBase): """ Concrete implementation of 'use_eplex_score' workflow. - Creates a raster filled with the EPLEX score value for the study area. This is used when women considerations are disabled, providing a single contextual score based on Employment Protection Legislation Index. @@ -43,7 +40,6 @@ def __init__( working_directory: str = None, ): """Initialize the EPLEX workflow with attributes and feedback. - Args: item: JsonTreeItem representing the indicator to process. cell_size_m: Cell size in meters for rasterization. @@ -53,7 +49,6 @@ def __init__( working_directory: Folder containing study_area.gpkg and outputs. """ super().__init__(item, cell_size_m, analysis_scale, feedback, context, working_directory) - # Get EPLEX score from attributes self.eplex_score = self.attributes.get("eplex_score", 0.0) log_message( @@ -61,7 +56,6 @@ def __init__( tag="GeoE3", level=Qgis.Info, ) - self.features_layer = True # Not needed for this workflow self.workflow_name = "eplex_score" @@ -72,44 +66,34 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features, index: int, + area_name: str = None, ) -> str: """Create a raster filled with EPLEX score for the study area. - Uses the grid layer directly and rasterizes it with the EPLEX score value. - Args: current_area: Current polygon from our study area. clip_area: Polygon to clip the raster to, aligned to cell edges. current_bbox: Bounding box of the above area. area_features: Not used in this workflow. index: Iteration / number of area being processed. - Returns: Raster file path of the output. """ log_message(f"Processing area {index} for EPLEX score workflow", tag="GeoE3", level=Qgis.Info) - self.progressChanged.emit(10.0) - # Create a memory layer with a single feature covering the clip area fields = QgsFields() fields.append(QgsField("value", QVariant.Double)) - eplex_layer = QgsVectorLayer(f"Polygon?crs={self.target_crs.authid()}", "eplex_temp", "memory") eplex_layer.dataProvider().addAttributes(fields) eplex_layer.updateFields() - self.progressChanged.emit(30.0) - # Create a single feature with the clip_area geometry and EPLEX score feature = QgsFeature(fields) feature.setGeometry(clip_area) feature.setAttribute("value", self.eplex_score) - eplex_layer.dataProvider().addFeatures([feature]) - self.progressChanged.emit(50.0) - # Rasterize this layer output_path = self._rasterize( eplex_layer, @@ -118,9 +102,7 @@ def _process_features_for_area( value_field="value", default_value=0, ) - self.progressChanged.emit(90.0) - if output_path and os.path.exists(output_path): log_message( f"EPLEX raster created successfully: {output_path}", @@ -134,9 +116,7 @@ def _process_features_for_area( level=Qgis.Critical, ) return None - self.progressChanged.emit(100.0) - return output_path # Default implementations of abstract methods - not used in this workflow @@ -147,9 +127,9 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """Not used in EPLEX workflow. - Args: current_area: Current polygon from study area. clip_area: Polygon to clip the raster to. @@ -165,9 +145,9 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """Not used in EPLEX workflow. - Args: current_area: Current polygon from study area. clip_area: Polygon to clip the raster to. diff --git a/geest/core/workflows/index_score_with_ghsl_workflow.py b/geest/core/workflows/index_score_with_ghsl_workflow.py index 949aa7fc..56fd2d72 100644 --- a/geest/core/workflows/index_score_with_ghsl_workflow.py +++ b/geest/core/workflows/index_score_with_ghsl_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 Index Score With Ghsl Workflow module. - This module contains functionality for index score with ghsl workflow. """ - import os from typing import Optional @@ -36,7 +34,6 @@ class IndexScoreWithGHSLException(Exception): class IndexScoreWithGHSLWorkflow(WorkflowBase): """ Concrete implementation of a 'use_index_score_with_ghsl' workflow. - This workflow scores areas using an index value, masked to GHSL settlement boundaries. Study area clip polygons are pre-filtered during study area creation to only include areas that intersect GHSL, so this workflow intersects with GHSL to get the precise @@ -54,7 +51,6 @@ def __init__( ): """ Initialize the workflow with attributes and feedback. - Args: item: JsonTreeItem representing the analysis, dimension, or factor to process. cell_size_m: Cell size in meters for rasterization. @@ -80,9 +76,7 @@ def __init__( self.workflow_name = "index_score" # Get the analysis extents self.study_area_bbox = self._study_area_bbox_4326() - self.ghsl_layer_path = f"{self.gpkg_path}|layername=ghsl_settlements" - # Check if GHSL layer exists, try to download if not if not self.ensure_ghsl_data(): log_message( @@ -105,25 +99,23 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """ Executes the actual workflow logic for a single area Must be implemented by sub classes. - Args: current_area: Current polygon from our study area. clip_area: Current area but expanded to coincide with grid cell boundaries. current_bbox: Bounding box of the above area. area_features: A vector layer of features to analyse that includes only features in the study area. index: Iteration / number of area being processed. - Returns: Raster file path of the output. """ _ = area_features # unused log_message(f"Processing area {index} with index score {self.index_score}") self.progressChanged.emit(10.0) - # Load GHSL layer and get features intersecting this area # Clip polygons are pre-filtered during study area creation, so we just need # to intersect with GHSL to get precise settlement boundaries for scoring @@ -138,7 +130,6 @@ def _process_features_for_area( for feat in ghsl_layer.getFeatures(request): if feat.geometry().intersects(current_area): ghsl_geometries.append(feat.geometry()) - if ghsl_geometries: ghsl_union = QgsGeometry.unaryUnion(ghsl_geometries) masked_geom = clip_area.intersection(ghsl_union) @@ -148,13 +139,10 @@ def _process_features_for_area( else: log_message(f"No GHSL features found for area {index}, using full clip area") masked_geom = clip_area - self.progressChanged.emit(40.0) - # Create scored layer with GHSL-masked geometry scored_layer = self.create_scored_boundary_layer(clip_area=masked_geom, index=index) self.progressChanged.emit(60.0) - # Rasterize raster_output = self._rasterize( scored_layer, @@ -164,23 +152,19 @@ def _process_features_for_area( default_value=255, ) self.progressChanged.emit(100.0) - log_message(f"Raster output: {raster_output}") return raster_output def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> QgsVectorLayer: """ Create a scored boundary layer, filtering features by the current_area. - Args: clip_area: The clipping area geometry. index: The index of the current processing area. - Returns: A vector layer with a 'score' attribute. """ output_prefix = f"{self.layer_id}_area_{index}" - self.progressChanged.emit(20.0) # We just use nominal intervals for progress updates # Create a new memory layer with the target CRS (EPSG:4326) subset_layer = QgsVectorLayer("Polygon", "subset", "memory") @@ -192,7 +176,6 @@ def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> Qg subset_layer_data.addAttributes(fields) subset_layer.updateFields() self.progressChanged.emit(40.0) # We just use nominal intervals for progress updates - feature = QgsFeature(subset_layer.fields()) feature.setGeometry(clip_area) score_field_index = subset_layer.fields().indexFromName("score") @@ -202,7 +185,6 @@ def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> Qg subset_layer_data.addFeatures(features) subset_layer.commitChanges() self.progressChanged.emit(60.0) # We just use nominal intervals for progress updates - shapefile_path = os.path.join(self.workflow_directory, f"{output_prefix}.shp") # Use QgsVectorFileWriter to save the layer to a shapefile QgsVectorFileWriter.writeAsVectorFormat( @@ -214,7 +196,6 @@ def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> Qg ) layer = QgsVectorLayer(shapefile_path, "area_layer", "ogr") self.progressChanged.emit(80.0) # We just use nominal intervals for progress updates - return layer # Default implementation of the abstract method - not used in this workflow @@ -225,17 +206,16 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - Args: current_area: Current polygon from our study area. clip_area: Polygon to clip the raster to which is aligned to cell edges. current_bbox: Bounding box of the above area. area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. index: Index of the current area. - Returns: Path to the reclassified raster. """ @@ -247,16 +227,15 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using an aggregate. - Args: current_area: Current polygon from our study area. clip_area: Polygon to clip the raster to which is aligned to cell edges. current_bbox: Bounding box of the above area. index: Index of the current area. - Returns: Path to the reclassified raster. """ diff --git a/geest/core/workflows/index_score_with_ookla_workflow.py b/geest/core/workflows/index_score_with_ookla_workflow.py index b1409af7..8849ff42 100644 --- a/geest/core/workflows/index_score_with_ookla_workflow.py +++ b/geest/core/workflows/index_score_with_ookla_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 Index Score With Ookla Workflow module. - This module contains functionality for index score with ookla workflow. """ - import os from typing import Optional @@ -58,7 +56,6 @@ def isCanceled(self): class IndexScoreWithOoklaWorkflow(WorkflowBase): """ Concrete implementation of a 'use_index_score_with_ookla' workflow. - This follows the same logic as the index score workflow but additionally masks the result using the Ookla coverage layer to ensure that only areas that have Ookla data are included in the final output. @@ -99,7 +96,6 @@ def __init__( self.workflow_name = "index_score" # Get the analysis extents self.study_area_bbox = self._study_area_bbox_4326() - # Lazy load OOKLA data during execute to avoid blocking __init__ self.ookla_layer_path = None self.ookla_downloaded = False @@ -111,14 +107,11 @@ def _download_ookla_data(self): """ if self.ookla_downloaded: return - log_message("Downloading Ookla data (this may take several minutes)...") self.updateStatus("Downloading Ookla data — this may take several minutes...") self.progressChanged.emit(1.0) - # Bridge feedback to workflow progress signals bridge_feedback = ProgressBridgeFeedback(self, self.feedback) - # Prepare Ookla coverage layer - adds a minute or two to the workflow # and requires internet access ookla_layer_path = os.path.join(self.working_directory, "study_area") @@ -151,27 +144,23 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """ Executes the actual workflow logic for a single area Must be implemented by sub classes. - :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. - :return: Raster file path of the output. """ _ = area_features # unused - # Download OOKLA data on first area if index == 0: self._download_ookla_data() - log_message(f"Index score: {self.index_score}") self.progressChanged.emit(10.0) # We just use nominal intervals for progress updates - # Mask with OOKLA coverage ookla_layer = QgsVectorLayer(self.ookla_layer_path, "ookla_layer", "ogr") expr = f"intersects($geometry, geom_from_wkt('{current_area.asWkt()}'))" @@ -183,18 +172,15 @@ def _process_features_for_area( final_geom = clip_area.intersection(ookla_union_geom) else: log_message(f"No Ookla coverage in area {index}, skipping ookla masking.") - if not final_geom or final_geom.isEmpty(): log_message(f"No Ookla coverage in area {index} after intersection, skipping rasterization.") return None - # Create scored layer only if we have valid geometry scored_layer = self.create_scored_boundary_layer( clip_area=final_geom, index=index, ) self.progressChanged.emit(60.0) # We just use nominal intervals for progress - # Rasterize raster_output = self._rasterize( scored_layer, @@ -204,7 +190,6 @@ def _process_features_for_area( default_value=255, ) self.progressChanged.emit(100.0) # We just use nominal intervals for progress updates - log_message(f"Raster output: {raster_output}") log_message(f"Workflow completed for area {index}") return raster_output @@ -212,12 +197,10 @@ def _process_features_for_area( def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> QgsVectorLayer: """ Create a scored boundary layer, filtering features by the current_area. - :param index: The index of the current processing area. :return: A vector layer with a 'score' attribute. """ output_prefix = f"{self.layer_id}_area_{index}" - self.progressChanged.emit(20.0) # We just use nominal intervals for progress updates # Create memory layer subset_layer = QgsVectorLayer("Polygon", "subset", "memory") @@ -228,7 +211,6 @@ def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> Qg subset_layer_data.addAttributes(fields) subset_layer.updateFields() self.progressChanged.emit(40.0) # We just use nominal intervals for progress updates - feature = QgsFeature(subset_layer.fields()) feature.setGeometry(clip_area) score_field_index = subset_layer.fields().indexFromName("score") @@ -237,10 +219,8 @@ def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> Qg subset_layer_data.addFeatures(features) subset_layer.commitChanges() self.progressChanged.emit(60.0) # We just use nominal intervals for progress updates - shapefile_path = os.path.join(self.workflow_directory, f"{output_prefix}.shp") os.makedirs(self.workflow_directory, exist_ok=True) - # Write to shapefile error, error_string = QgsVectorFileWriter.writeAsVectorFormat( subset_layer, @@ -249,22 +229,17 @@ def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> Qg subset_layer.crs(), "ESRI Shapefile", ) - if error != QgsVectorFileWriter.NoError: log_message(f"Error writing shapefile: {error_string} (code: {error})") return None - if not os.path.exists(shapefile_path): log_message(f"Error: Shapefile not created at {shapefile_path}") return None - layer = QgsVectorLayer(shapefile_path, "area_layer", "ogr") - if not layer.isValid(): log_message(f"Error loading layer: {layer.error().message()}") return None self.progressChanged.emit(80.0) # We just use nominal intervals for progress updates - return layer # Default implementation of the abstract method - not used in this workflow @@ -275,16 +250,15 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - :current_area: Current polygon from our study area. :clip_area: Polygon to clip the raster to which is aligned to cell edges. :current_bbox: Bounding box of the above area. :area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. :index: Index of the current area. - :return: Path to the reclassified raster. """ pass @@ -295,6 +269,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/index_score_workflow.py b/geest/core/workflows/index_score_workflow.py index 89936ed2..d7e3f5e6 100644 --- a/geest/core/workflows/index_score_workflow.py +++ b/geest/core/workflows/index_score_workflow.py @@ -2,9 +2,13 @@ """📦 Index Score Workflow module. This module contains functionality for index score workflow. + +Supports grid-first mode where the index score is written directly to +the study_area_grid column, then optionally rasterized. """ import os +from typing import Optional from qgis import processing # noqa: F401 # QGIS processing toolbox from qgis.core import ( # noqa: F401 @@ -20,6 +24,10 @@ from qgis.PyQt.QtCore import QVariant from geest.core import JsonTreeItem +from geest.core.grid_column_utils import ( + rasterize_grid_column, + write_uniform_value_to_grid, +) from geest.utilities import log_message from .workflow_base import WorkflowBase @@ -61,6 +69,8 @@ def __init__( True # Normally we would set this to a QgsVectorLayer but in this workflow it is not needed ) self.workflow_name = "index_score" + # Grid-first mode: write results to grid columns first, then rasterize + self.use_grid_first = True def _process_features_for_area( self, @@ -69,31 +79,61 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: Optional[str] = None, ) -> str: """ - Executes the actual workflow logic for a single area - Must be implemented by subclasses. + Executes the actual workflow logic for a single area. - :current_area: Current polygon from our study area. - :current_bbox: Bounding box of the above area. - :area_features: A vector layer of features to analyse that includes only features in the study area. - :index: Iteration / number of area being processed. + Supports both raster-first (legacy) and grid-first modes. - :return: Raster file path of the output. + Args: + current_area: Current polygon from our study area. + clip_area: Clipping polygon aligned to grid cells. + current_bbox: Bounding box of the above area. + area_features: A vector layer of features to analyse (unused for index_score). + index: Iteration / number of area being processed. + area_name: Name of the area being processed (for grid-first mode). + + Returns: + Raster file path of the output. """ _ = area_features # unused - log_message(f"Processing area {index} score workflow") + log_message(f"Processing area {index} score workflow (grid_first={self.use_grid_first})") log_message(f"Index score: {self.index_score}") - self.progressChanged.emit(10.0) # We just use nominal intervals for progress updates + self.progressChanged.emit(10.0) + + if self.use_grid_first and area_name: + return self._process_grid_first( + current_bbox=current_bbox, + index=index, + area_name=area_name, + ) + else: + return self._process_raster_first( + clip_area=clip_area, + current_bbox=current_bbox, + index=index, + ) + + def _process_raster_first( + self, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + index: int, + ) -> str: + """Legacy raster-first processing. + Creates a polygon layer with the score and rasterizes it. + """ # Create a scored boundary layer filtered by current_area scored_layer = self.create_scored_boundary_layer( clip_area=clip_area, index=index, ) - self.progressChanged.emit(30.0) # We just use nominal intervals for progress updates - # Create a scored boundary layer + self.progressChanged.emit(30.0) + + # Rasterize the scored layer raster_output = self._rasterize( scored_layer, current_bbox, @@ -101,12 +141,73 @@ def _process_features_for_area( value_field="score", default_value=255, ) - self.progressChanged.emit(100.0) # We just use nominal intervals for progress updates + self.progressChanged.emit(100.0) log_message(f"Raster output: {raster_output}") log_message(f"Workflow completed for area {index}") return raster_output + def _process_grid_first( + self, + current_bbox: QgsGeometry, + index: int, + area_name: str, + ) -> str: + """Grid-first processing. + + Writes the index score directly to the grid column, then rasterizes. + """ + # Step 1: Write uniform value to grid + self.progressChanged.emit(20.0) + log_message(f"Writing index score {self.index_score} to grid column {self.layer_id} for area {area_name}") + + updated_count = write_uniform_value_to_grid( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + value=self.index_score, + area_name=area_name, + ) + + if updated_count < 0: + log_message(f"Failed to write index score to grid for area {area_name}", level=Qgis.Warning) + # Fall back to raster-first method + return self._process_raster_first( + clip_area=None, # Not available in this path + current_bbox=current_bbox, + index=index, + ) + + log_message(f"Updated {updated_count} grid cells with index score {self.index_score}") + self.progressChanged.emit(50.0) + + # Step 2: Rasterize from grid column + output_path = os.path.join( + self.workflow_directory, + f"{self.layer_id}_{index}.tif", + ) + + rect = current_bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + success = rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=-9999.0, + area_name=area_name, + ) + + self.progressChanged.emit(100.0) + + if success: + log_message(f"Rasterized grid column to {output_path}") + return output_path + else: + log_message(f"Failed to rasterize grid column for area {area_name}", level=Qgis.Warning) + return None + def create_scored_boundary_layer(self, clip_area: QgsGeometry, index: int) -> QgsVectorLayer: """ Create a scored boundary layer, filtering features by the current_area. @@ -180,6 +281,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/multi_buffer_distances_native_workflow.py b/geest/core/workflows/multi_buffer_distances_native_workflow.py index dac150d4..a2fee6a0 100644 --- a/geest/core/workflows/multi_buffer_distances_native_workflow.py +++ b/geest/core/workflows/multi_buffer_distances_native_workflow.py @@ -1,6 +1,12 @@ # -*- coding: utf-8 -*- -"""Multi-buffer distances workflow using native QGIS network analysis.""" +"""Multi-buffer distances workflow using native QGIS network analysis. + +Supports grid-first mode where buffer scores are written directly to +the study_area_grid column, then rasterized. +""" + import os +from typing import Optional from urllib.parse import unquote from qgis import processing @@ -20,6 +26,11 @@ from geest.core import JsonTreeItem from geest.core.algorithms import NativeNetworkAnalysisProcessingTask +from geest.core.grid_column_utils import ( + clear_grid_column, + rasterize_grid_column, + write_buffer_values_to_grid, +) from geest.core.workflows.mappings import MAPPING_REGISTRY from geest.utilities import log_message @@ -30,7 +41,7 @@ class MultiBufferDistancesNativeWorkflow(WorkflowBase): """Multi-buffer workflow using native QGIS network analysis. Creates concentric isochrones around points using road network distances. - Results are rasterized and combined into a VRT. + Results are written to grid columns and rasterized. """ def __init__( @@ -108,7 +119,6 @@ def __init__( level=Qgis.Warning, ) raise Exception("Invalid travel distances provided.") - layer_path = self.attributes.get("multi_buffer_point_shapefile", None) if layer_path: layer_path = unquote(layer_path) @@ -135,7 +145,6 @@ def __init__( level=Qgis.Warning, ) raise Exception("Invalid points layer found.") - mode = self.attributes.get("multi_buffer_travel_mode", "Walking") self.mode = None if mode == "Walking": @@ -154,14 +163,20 @@ def __init__( ) raise Exception("Invalid network layer found.") log_message("Multi Buffer Distances Native Workflow initialized") + self.workflow_name = "multi_buffer_point" + # Grid-first mode: write results to grid columns first, then rasterize + self.use_grid_first = True + # Track if we've cleared the column (only do once, not per area) + self._column_cleared = False def _process_features_for_area( self, - current_area: "QgsGeometry", - clip_area: "QgsGeometry", - current_bbox: "QgsGeometry", - area_features: "QgsVectorLayer", + current_area: QgsGeometry, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, index: int, + area_name: Optional[str] = None, ) -> str: """Process a single area. @@ -171,30 +186,56 @@ def _process_features_for_area( current_bbox: Bounding box. area_features: Features to analyze. index: Area number being processed. + area_name: Name of the area being processed. Returns: Raster file path, or False if failed. """ + if self.use_grid_first: + return self._process_grid_first( + current_area=current_area, + clip_area=clip_area, + current_bbox=current_bbox, + area_features=area_features, + index=index, + area_name=area_name, + ) + else: + return self._process_raster_first( + current_area=current_area, + clip_area=clip_area, + current_bbox=current_bbox, + area_features=area_features, + index=index, + ) + + def _process_raster_first( + self, + current_area: QgsGeometry, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + ) -> str: + """Legacy raster-first processing.""" # Check if we should use simple buffer (Regional scale) instead of network analysis if self.use_simple_buffer and self.buffer_distance: log_message( f"Using simple buffer for Regional scale: {self.buffer_distance}m", level=Qgis.Info, ) - return self._process_features_with_simple_buffer( + return self._process_features_with_simple_buffer_legacy( current_area=current_area, clip_area=clip_area, current_bbox=current_bbox, area_features=area_features, index=index, ) - # Original network analysis approach for National/Local scale log_message( f"Starting network analysis for area {index + 1}", level=Qgis.Info, ) - isochrones_gpkg = self.create_isochrones( point_layer=area_features, clip_geometry=current_area, @@ -207,23 +248,165 @@ def _process_features_for_area( level=Qgis.Warning, ) return False - bands = self._create_bands(isochrones_gpkg_path=isochrones_gpkg, index=index) scored_buffers = self._assign_scores(bands) - if scored_buffers is False: log_message("No scored buffers were created.", level=Qgis.Warning) return False - raster_output = self._rasterize( input_layer=scored_buffers, bbox=current_bbox, index=index, value_field="value", ) - return raster_output + def _process_grid_first( + self, + current_area: QgsGeometry, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + area_name: str, + ) -> str: + """Grid-first processing - writes buffer scores directly to study_area_grid.""" + # Clear column once at the start (not per area) + if not self._column_cleared: + log_message(f"Clearing column {self.layer_id} before processing") + clear_grid_column(self.gpkg_path, self.layer_id) + self._column_cleared = True + + self.progressChanged.emit(5.0) + + # Check if we should use simple buffer (Regional scale) instead of network analysis + if self.use_simple_buffer and self.buffer_distance: + log_message( + f"Using simple buffer for Regional scale: {self.buffer_distance}m", + level=Qgis.Info, + ) + scored_buffers = self._create_simple_buffer_scored(area_features, index) + else: + # Original network analysis approach for National/Local scale + log_message( + f"Starting network analysis for area {index + 1}", + level=Qgis.Info, + ) + isochrones_gpkg = self.create_isochrones( + point_layer=area_features, + clip_geometry=current_area, + area_index=index, + ) + if not isochrones_gpkg: + log_message( + f"No isochrones created for area {index}.", + tag="GeoE3", + level=Qgis.Warning, + ) + return False + + self.progressChanged.emit(40.0) + + bands = self._create_bands(isochrones_gpkg_path=isochrones_gpkg, index=index) + scored_buffers = self._assign_scores(bands) + + if scored_buffers is False: + log_message("No scored buffers were created.", level=Qgis.Warning) + return False + + self.progressChanged.emit(50.0) + + # Write buffer scores to grid cells + log_message(f"Writing buffer scores to grid column {self.layer_id}") + write_buffer_values_to_grid( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + buffer_layer=scored_buffers, + value_field="value", + aggregation_method="MAX", + feedback=self.feedback, + ) + + self.progressChanged.emit(80.0) + + # Rasterize from grid column + output_path = os.path.join( + self.workflow_directory, + f"{self.layer_id}_{index}.tif", + ) + + rect = current_bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=-9999.0, + area_name=area_name, + ) + + self.progressChanged.emit(100.0) + log_message(f"Rasterized grid column to {output_path}") + return output_path + + def _create_simple_buffer_scored( + self, + area_features: QgsVectorLayer, + index: int, + ) -> QgsVectorLayer: + """Create simple buffer and assign scores for Regional scale. + + Args: + area_features: Features to buffer. + index: Area index. + + Returns: + Scored buffer layer with "value" field. + """ + buffer_output = os.path.join(self.workflow_directory, f"simple_buffer_{index}.gpkg") + if os.path.exists(buffer_output): + os.remove(buffer_output) + + buffer_params = { + "INPUT": area_features, + "DISTANCE": self.buffer_distance, + "OUTPUT": buffer_output, + } + result = processing.run("native:buffer", buffer_params, feedback=QgsProcessingFeedback()) + buffered_layer_path = result["OUTPUT"] + + if not buffered_layer_path or not os.path.exists(buffered_layer_path): + log_message( + f"Failed to create buffer for area {index}", + level=Qgis.Warning, + ) + return False + + buffered_layer = QgsVectorLayer(buffered_layer_path, "buffered", "ogr") + if not buffered_layer.isValid(): + log_message( + f"Failed to load buffered layer for area {index}", + level=Qgis.Warning, + ) + return False + + # Add value field with score 5 (highest accessibility) + field_names = [field.name() for field in buffered_layer.fields()] + if "value" not in field_names: + buffered_layer.dataProvider().addAttributes([QgsField("value", QVariant.Int)]) + buffered_layer.updateFields() + + buffered_layer.startEditing() + for feature in buffered_layer.getFeatures(): + feature.setAttribute("value", 5) + buffered_layer.updateFeature(feature) + buffered_layer.commitChanges() + + return buffered_layer + def _clip_network_to_area( self, clip_geometry: QgsGeometry, @@ -240,13 +423,9 @@ def _clip_network_to_area( """ buffer_distance = max(self.distances) if self.distances else 5000 buffered_geometry = clip_geometry.buffer(buffer_distance, 5) - bbox = buffered_geometry.boundingBox() - clipped_network_path = os.path.join(self.workflow_directory, f"clipped_network_area_{area_index}.gpkg") - if os.path.exists(clipped_network_path): os.remove(clipped_network_path) - try: road_network_layer = QgsVectorLayer(self.road_network_layer_path, "network", "ogr") if not road_network_layer.isValid(): @@ -255,13 +434,11 @@ def _clip_network_to_area( level=Qgis.Critical, ) return None - road_crs = road_network_layer.crs() log_message( f"Area {area_index}: Road network CRS: {road_crs.authid()}, Target CRS: {self.target_crs.authid()}", level=Qgis.Info, ) - # Auto-reproject road network if CRS mismatch detected if road_crs != self.target_crs: log_message( @@ -269,7 +446,6 @@ def _clip_network_to_area( f"{road_crs.authid()} to {self.target_crs.authid()}", level=Qgis.Info, ) - # Reproject to target CRS in memory (consistent with check_and_reproject_layer behavior) try: reproject_result = processing.run( @@ -282,14 +458,12 @@ def _clip_network_to_area( context=self.context, ) road_network_layer = reproject_result["OUTPUT"] - if not road_network_layer.isValid(): log_message( f"ERROR: Failed to reproject road network for area {area_index}", level=Qgis.Critical, ) return None - log_message( f"Successfully reprojected road network to {self.target_crs.authid()}", level=Qgis.Info, @@ -300,20 +474,16 @@ def _clip_network_to_area( level=Qgis.Critical, ) return None - log_message( f"Clipping road network to area {area_index} with {buffer_distance}m buffer", level=Qgis.Info, ) - temp_layer = QgsVectorLayer(f"Polygon?crs={self.target_crs.authid()}", "clip_geometry", "memory") temp_provider = temp_layer.dataProvider() - temp_feature = QgsFeature() temp_feature.setGeometry(buffered_geometry) temp_provider.addFeatures([temp_feature]) temp_layer.updateExtents() - # Use road_network_layer (potentially reprojected) instead of path result = processing.run( "native:clip", @@ -324,15 +494,12 @@ def _clip_network_to_area( }, context=self.context, ) - clipped_layer = result["OUTPUT"] - if isinstance(clipped_layer, str): check_layer = QgsVectorLayer(clipped_layer, "check", "ogr") feature_count = check_layer.featureCount() else: feature_count = clipped_layer.featureCount() - if feature_count == 0: log_message( f"Warning: Clipped network for area {area_index} has no features. " @@ -340,14 +507,11 @@ def _clip_network_to_area( level=Qgis.Warning, ) return None - log_message( f"Successfully clipped network to area {area_index}: {feature_count} road segments", level=Qgis.Info, ) - return clipped_network_path - except Exception as e: log_message( f"Error clipping network for area {area_index}: {e}", @@ -375,13 +539,11 @@ def create_isochrones( if total_features == 0: log_message(f"No features to process for area {area_index}.") return False - point_crs = point_layer.crs() log_message( f"Area {area_index}: Point layer CRS: {point_crs.authid()}, Target CRS: {self.target_crs.authid()}", level=Qgis.Info, ) - if point_crs != self.target_crs: log_message( f"ERROR: CRS mismatch for area {area_index}! " @@ -390,21 +552,17 @@ def create_isochrones( level=Qgis.Critical, ) return False - isochrone_layer_path = os.path.join(self.workflow_directory, f"isochrones_area_{area_index}.gpkg") - clipped_network_path = self._clip_network_to_area( clip_geometry=clip_geometry, area_index=area_index, ) - if not clipped_network_path: log_message( f"No road network available for area {area_index}. Skipping network analysis.", level=Qgis.Warning, ) return False - task = NativeNetworkAnalysisProcessingTask( point_layer=point_layer, distances=self.distances, @@ -412,10 +570,8 @@ def create_isochrones( output_gpkg_path=isochrone_layer_path, target_crs=self.target_crs, ) - task.progressChanged.connect(lambda progress: self.feedback.setProgress(progress)) success = task.run() - if os.path.exists(clipped_network_path): try: os.remove(clipped_network_path) @@ -428,7 +584,6 @@ def create_isochrones( f"Warning: Failed to clean up clipped network {clipped_network_path}: {e}", level=Qgis.Warning, ) - if not success: error_msg = task.error_message or "Unknown error" log_message( @@ -436,23 +591,25 @@ def create_isochrones( level=Qgis.Warning, ) return False - # Return the path to the created GeoPackage return task.result_path - def _process_features_with_simple_buffer( + def _process_features_with_simple_buffer_legacy( self, - current_area: "QgsGeometry", - clip_area: "QgsGeometry", - current_bbox: "QgsGeometry", - area_features: "QgsVectorLayer", + current_area: QgsGeometry, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """Process a single area using simple buffer (no network analysis). Used for Regional scale - creates a single buffer around POIs and scores grid cells based on percentage intersection. + This is the legacy raster-first version. + Args: current_area: Polygon from study area. clip_area: Polygon to clip features to. @@ -463,18 +620,15 @@ def _process_features_with_simple_buffer( Returns: Raster file path, or False if failed. """ - from qgis import processing from geest.core.algorithms.utilities import subset_vector_layer log_message( f"Creating simple buffer for area {index + 1} (buffer: {self.buffer_distance}m)", level=Qgis.Info, ) - buffer_output = os.path.join(self.workflow_directory, f"simple_buffer_{index}.gpkg") if os.path.exists(buffer_output): os.remove(buffer_output) - buffer_params = { "INPUT": area_features, "DISTANCE": self.buffer_distance, @@ -482,14 +636,12 @@ def _process_features_with_simple_buffer( } result = processing.run("native:buffer", buffer_params, feedback=QgsProcessingFeedback()) buffered_layer_path = result["OUTPUT"] - if not buffered_layer_path or not os.path.exists(buffered_layer_path): log_message( f"Failed to create buffer for area {index}", level=Qgis.Warning, ) return False - buffered_layer = QgsVectorLayer(buffered_layer_path, "buffered", "ogr") if not buffered_layer.isValid(): log_message( @@ -497,11 +649,9 @@ def _process_features_with_simple_buffer( level=Qgis.Warning, ) return False - grid_output = os.path.join(self.workflow_directory, f"grid_area_{index}.gpkg") if os.path.exists(grid_output): os.remove(grid_output) - area_grid = subset_vector_layer( self.workflow_directory, self.grid_layer, @@ -514,33 +664,29 @@ def _process_features_with_simple_buffer( level=Qgis.Warning, ) return False - scored_grid = self._score_grid_for_percentage( grid_layer=area_grid, buffered_layer=buffered_layer, ) - if scored_grid is False: log_message( "No scored grid cells were created.", level=Qgis.Warning, ) return False - raster_output = self._rasterize( input_layer=scored_grid, bbox=current_bbox, index=index, value_field="value", ) - return raster_output def _score_grid_for_percentage( self, - grid_layer: "QgsVectorLayer", - buffered_layer: "QgsVectorLayer", - ) -> "QgsVectorLayer": + grid_layer: QgsVectorLayer, + buffered_layer: QgsVectorLayer, + ) -> QgsVectorLayer: """Score grid cells based on percentage intersection with buffered features. For Regional scale: calculates what percentage of each grid cell @@ -554,44 +700,35 @@ def _score_grid_for_percentage( The grid layer with "value" field containing assigned scores. """ log_message("Scoring grid cells based on percentage intersection") - field_names = [field.name() for field in grid_layer.fields()] if "value" not in field_names: grid_layer.dataProvider().addAttributes([QgsField("value", QVariant.Int)]) grid_layer.updateFields() - grid_layer.startEditing() for grid_feature in grid_layer.getFeatures(): grid_geom = grid_feature.geometry() if grid_geom.isNull(): continue - grid_area = grid_geom.area() if grid_area == 0: continue - max_score = 0 max_overlap_percent = 0 - for buffered_feature in buffered_layer.getFeatures(): buffered_geom = buffered_feature.geometry() if buffered_geom.isNull(): continue - intersection = grid_geom.intersection(buffered_geom) if intersection.isNull() or intersection.area() == 0: continue - # Calculate % of buffer within hexagon (not % of hexagon covered) buffer_area = buffered_geom.area() if buffer_area > 0: overlap_percent = (intersection.area() / buffer_area) * 100 else: overlap_percent = 0 - if overlap_percent > max_overlap_percent: max_overlap_percent = overlap_percent - # Calculate score based on final max_overlap_percent after all buffers checked # Table ranges: Score 0: 0%, Score 1: 0.01-6%, Score 2: 6.01-12%, etc. sorted_items = sorted(self.percentage_scores.items()) @@ -628,15 +765,12 @@ def _score_grid_for_percentage( ) if in_range: max_score = score - log_message( f"DEBUG: Feature {grid_feature.id()} - FINAL: max_overlap={max_overlap_percent:.4f}%, assigned_score={max_score}", level=Qgis.Info, ) - grid_feature.setAttribute("value", max_score) grid_layer.updateFeature(grid_feature) - grid_layer.commitChanges() return grid_layer @@ -658,21 +792,17 @@ def _create_bands(self, isochrones_gpkg_path, index): KeyError: If the value field does not exist in the isochrone layer. """ isochrone_layer_path = f"{isochrones_gpkg_path}|layername=isochrones" - layer = QgsVectorLayer(isochrone_layer_path, "isochrones", "ogr") if not layer.isValid(): raise ValueError(f"Failed to load isochrone layer from {isochrone_layer_path}") output_path = os.path.join(self.workflow_directory, f"final_isochrones_{index}.shp") - ranges_field = "value" field_index = layer.fields().indexFromName(ranges_field) if field_index == -1: raise KeyError( f"Field '{ranges_field}' does not exist in isochrones layer: {isochrone_layer_path}" # noqa E713 ) - unique_ranges = sorted(self.distances, reverse=False) - range_layers = {} for value in unique_ranges: expression = f'"value" = {value}' @@ -685,7 +815,6 @@ def _create_bands(self, isochrones_gpkg_path, index): data_provider.addAttributes(layer.fields()) range_layer.updateFields() data_provider.addFeatures(features) - dissolve_params = { "INPUT": range_layer, "FIELD": [], @@ -694,7 +823,6 @@ def _create_bands(self, isochrones_gpkg_path, index): dissolve_result = processing.run("native:dissolve", dissolve_params) dissolved_layer = dissolve_result["OUTPUT"] range_layers[value] = dissolved_layer - band_layers = [] sorted_ranges = sorted(range_layers.keys(), reverse=True) for i in range(len(sorted_ranges) - 1): @@ -702,7 +830,6 @@ def _create_bands(self, isochrones_gpkg_path, index): next_range = sorted_ranges[i + 1] current_layer = range_layers[current_range] next_layer = range_layers[next_range] - difference_params = { "INPUT": current_layer, "OVERLAY": next_layer, @@ -710,7 +837,6 @@ def _create_bands(self, isochrones_gpkg_path, index): } diff_result = processing.run("native:difference", difference_params) diff_layer = diff_result["OUTPUT"] - diff_layer.dataProvider().addAttributes( [ QgsField("distance", QVariant.Int), @@ -721,14 +847,11 @@ def _create_bands(self, isochrones_gpkg_path, index): for feat in diff_layer.getFeatures(): feat["distance"] = current_range diff_layer.updateFeature(feat) - band_layers.append(diff_layer) - try: smallest_range = sorted_ranges[-1] except IndexError: return None - smallest_layer = range_layers[smallest_range] smallest_layer.dataProvider().addAttributes([QgsField("distance", QVariant.Int)]) smallest_layer.updateFields() @@ -737,7 +860,6 @@ def _create_bands(self, isochrones_gpkg_path, index): feat["distance"] = smallest_range smallest_layer.updateFeature(feat) band_layers.append(smallest_layer) - merge_bands_params = { "LAYERS": band_layers, "CRS": self.target_crs, @@ -759,7 +881,6 @@ def _assign_scores(self, layer: QgsVectorLayer) -> QgsVectorLayer: """ if not layer or not layer.isValid(): return False - # Check if the "value" field already exists field_names = [field.name() for field in layer.fields()] log_message(f"Field names: {field_names}") @@ -768,17 +889,15 @@ def _assign_scores(self, layer: QgsVectorLayer) -> QgsVectorLayer: layer.dataProvider().addAttributes([QgsField("value", QVariant.Int)]) layer.updateFields() log_message('Added "value" field to input layer') - # Check if we should use percentage-based scoring (Regional scale) if self.scoring_method == "percentage_intersection" and self.percentage_scores: layer = self._assign_percentage_scores(layer) else: # Original distance-based scoring layer = self._assign_distance_scores(layer) - return layer - def _assign_percentage_scores(self, layer: "QgsVectorLayer") -> "QgsVectorLayer": + def _assign_percentage_scores(self, layer: QgsVectorLayer) -> QgsVectorLayer: """Assign scores based on percentage intersection with buffer. For Regional scale: calculates what percentage of each grid cell @@ -791,7 +910,6 @@ def _assign_percentage_scores(self, layer: "QgsVectorLayer") -> "QgsVectorLayer" The same layer with a "value" field containing the assigned scores. """ log_message("Using percentage-based scoring for Regional scale") - buffer_distance = 0 if hasattr(self, "buffer_distances") and self.buffer_distances: buffer_distance = ( @@ -799,46 +917,37 @@ def _assign_percentage_scores(self, layer: "QgsVectorLayer") -> "QgsVectorLayer" ) elif self.distances: buffer_distance = max(self.distances) if isinstance(self.distances, list) else self.distances - log_message(f"Buffer distance for percentage scoring: {buffer_distance}m") - layer.startEditing() for feature in layer.getFeatures(): grid_geom = feature.geometry() if grid_geom.isNull(): continue - grid_area = grid_geom.area() - buffer_geom = feature.geometry() if buffer_geom.isNull(): continue - intersection = grid_geom.intersection(buffer_geom) if intersection.isNull() or intersection.area() == 0: feature.setAttribute("value", 0) else: overlap_percent = (intersection.area() / grid_area) * 100 - score = 0 for min_pct, score_value in sorted(self.percentage_scores.items(), reverse=True): if overlap_percent >= min_pct: score = score_value break - feature.setAttribute("value", score) log_message( f"Grid cell overlap: {overlap_percent:.2f}%, score: {score}", tag="GeoE3", level=Qgis.Info, ) - layer.updateFeature(feature) - layer.commitChanges() return layer - def _assign_distance_scores(self, layer: "QgsVectorLayer") -> "QgsVectorLayer": + def _assign_distance_scores(self, layer: QgsVectorLayer) -> QgsVectorLayer: """Assign scores based on distance band (original method). Args: @@ -876,6 +985,7 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """Execute the actual workflow logic for a single area using a raster. @@ -896,6 +1006,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """Execute the workflow, reporting progress and checking for cancellation. diff --git a/geest/core/workflows/multi_buffer_distances_ors_workflow.py b/geest/core/workflows/multi_buffer_distances_ors_workflow.py index 66705fae..7290ad44 100644 --- a/geest/core/workflows/multi_buffer_distances_ors_workflow.py +++ b/geest/core/workflows/multi_buffer_distances_ors_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 Multi Buffer Distances Ors Workflow module. - This module contains functionality for multi buffer distances ors workflow. """ - import os import traceback from urllib.parse import unquote @@ -36,19 +34,13 @@ class MultiBufferDistancesORSWorkflow(WorkflowBase): """ Concrete implementation of a 'multi_buffer_distances' workflow. - This uses ORS (OpenRouteService) to calculate the distances between the study area and the selected points of interest. - It will create concentric buffers (isochrones) around the study area and calculate the distances to the points of interest. - The buffers will be calcuated either using travel time or travel distance. - The results will be stored as a collection of tif files scaled to the likert scale. - These results will be be combined into a VRT file and added to the QGIS map. - """ def __init__( @@ -113,7 +105,6 @@ def __init__( level=Qgis.Warning, ) raise Exception("Invalid travel distances provided.") - layer_path = self.attributes.get("multi_buffer_point_shapefile", None) if layer_path: layer_path = unquote(layer_path) @@ -140,7 +131,6 @@ def __init__( level=Qgis.Warning, ) raise Exception("Invalid points layer found.") - mode = self.attributes.get("multi_buffer_travel_mode", "Walking") self.mode = None if mode == "Walking": @@ -153,13 +143,11 @@ def __init__( self.measurement = "distance" else: self.measurement = "time" - # How many features to pass with each ORS API call # Managed in the settings panel self.subset_size = int(setting(key="ors_request_size", default=5)) if self.subset_size > 5: self.subset_size = 5 # Maxiumum of 5 features per request allowed by ORS - self.ors_client = ORSClient("https://api.openrouteservice.org/v2/isochrones") self.api_key = self.ors_client.check_api_key() # Create the masked API key for logging @@ -172,16 +160,13 @@ def __init__( def _mask_api_key(self, api_key: str) -> str: """ Safely mask an API key for logging purposes. - Args: api_key (str): The API key to mask - Returns: str: The masked API key """ if not api_key: return "****" - key_len = len(api_key) if key_len <= 8: # For short keys, show only asterisks @@ -197,38 +182,32 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """ Executes the actual workflow logic for a single area. Must be implemented by subclasses. - :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. """ - # Step 2: Process these areas in batches and create buffers buffers = self.create_multibuffers( point_layer=area_features, index=index, ) - scored_buffers = self._assign_scores(buffers) - if scored_buffers is False: log_message("No scored buffers were created.", level=Qgis.Warning) return False - raster_output = self._rasterize( input_layer=scored_buffers, bbox=current_bbox, index=index, value_field="value", ) - return raster_output def create_multibuffers( @@ -238,28 +217,23 @@ def create_multibuffers( ): """ Create multiple buffers (isochrones) for each point in the input point layer using ORSClient. - This method processes the point features in subsets (to handle large datasets), makes API calls to the OpenRouteService to fetch the isochrones (buffers) for each subset, and merges the results into a final output layer. - :param point_layer: QgsVectorLayer containing point features to process. :param index: Index of the current area being processed. :return: QgsVectorLayer containing the buffers as polygons. """ # codeql[python/clear-text-logging-sensitive-data] - API key is properly masked before logging log_message(f"Using ORS API key: {self.masked_api_key}") - # Collect intermediate layers from ORS API features = list(point_layer.getFeatures()) log_message(f"Creating buffers for {len(features)} points") total_features = len(features) - # Process features in subsets to handle large datasets for i in range(0, total_features, self.subset_size): subset_features = features[i : i + self.subset_size] # noqa E203 subset_layer = self._create_subset_layer(subset_features, point_layer) - # Make API calls using ORSClient for the subset json = self._fetch_isochrones(subset_layer) layer = self._create_isochrone_layer(json) @@ -270,7 +244,6 @@ def create_multibuffers( tag="GeoE3", level=Qgis.Info, ) - # Merge all isochrone layers into one final output if self.temp_layers: log_message( @@ -299,53 +272,41 @@ def _create_subset_layer(self, subset_features, point_layer): """ Create a subset layer for processing, with reprojection of points from the point_layer CRS to EPSG:4326 (WGS 84). - :param subset_features: List of QgsFeature objects to add to the subset layer. :param point_layer: The original point layer (QgsVectorLayer) to reproject from. :return: A QgsVectorLayer (subset layer) with reprojected features. """ target_crs = QgsCoordinateReferenceSystem("EPSG:4326") - # Create a new memory layer with the target CRS (EPSG:4326) subset_layer = QgsVectorLayer(f"Point?crs={target_crs.authid()}", "subset", "memory") subset_layer_data = subset_layer.dataProvider() - # Add attributes (fields) from the point_layer subset_layer_data.addAttributes(point_layer.fields()) subset_layer.updateFields() - # Create coordinate transformation from point_layer CRS to the target CRS (EPSG:4326) source_crs = point_layer.crs() transform_context = self.context.project().transformContext() transform = QgsCoordinateTransform(source_crs, target_crs, transform_context) - # Reproject and add features to the subset layer reprojected_features = [] for feature in subset_features: reprojected_feature = QgsFeature(feature) geom = reprojected_feature.geometry() - # Transform the geometry to the target CRS geom.transform(transform) reprojected_feature.setGeometry(geom) - reprojected_features.append(reprojected_feature) - # Add reprojected features to the new subset layer subset_layer_data.addFeatures(reprojected_features) - return subset_layer def _fetch_isochrones(self, layer: QgsVectorLayer) -> dict: """ Fetch isochrones for the given subset of features using ORSClient. - Args: layer (QgsVectorLayer): A QgsVectorLayer containing the subset of features. - Returns: dict: A dict representing the JSON response from the ORS API. - Raises: ValueError: If no valid coordinates are found in the layer. Any exceptions raised by ORSClient.make_request will propagate. @@ -357,17 +318,14 @@ def _fetch_isochrones(self, layer: QgsVectorLayer) -> dict: if geom and not geom.isMultipart(): # Single point geometry coords = geom.asPoint() coordinates.append([coords.x(), coords.y()]) - if not coordinates: raise ValueError("No valid coordinates found in the layer") - # Prepare parameters for ORS API params = { "locations": coordinates, "range": self.distances, # Distances or times in the list "range_type": self.measurement, } - # Make the request to ORS API using ORSClient # Any exceptions will be propogated try: @@ -381,7 +339,6 @@ def _fetch_isochrones(self, layer: QgsVectorLayer) -> dict: with open(error_path, "w") as f: f.write(f"Failed to process {self.workflow_name}: {e}\n") f.write(traceback.format_exc()) - log_message( f"Failed to fetch isochrones layer for {self.workflow_name}: {e}", tag="GeoE3", @@ -402,18 +359,15 @@ def _fetch_isochrones(self, layer: QgsVectorLayer) -> dict: def _create_isochrone_layer(self, isochrone_data): """ Create a QgsVectorLayer from the ORS isochrone data. - :param isochrone_data: JSON data returned from ORS. :return: A QgsVectorLayer containing the isochrones as polygons. """ isochrone_layer = QgsVectorLayer("Polygon?crs=EPSG:4326", "isochrones", "memory") provider = isochrone_layer.dataProvider() - # Add the 'value' field to the layer's attribute table isochrone_layer.startEditing() isochrone_layer.addAttribute(QgsField("value", QVariant.Int)) isochrone_layer.commitChanges() - # Parse the features from ORS response verbose_mode = int(setting(key="verbose_mode", default=0)) if isochrone_data and "features" in isochrone_data: @@ -447,18 +401,15 @@ def _create_isochrone_layer(self, isochrone_data): feat.setGeometry(qgs_geometry) feat.setAttributes([feature_data["properties"].get("value", 0)]) # Add attributes as needed features.append(feat) - provider.addFeatures(features) return isochrone_layer def _merge_layers(self, layers=None, index=None): """ Merge all temporary isochrone layers into a single layer. - :param layers: List of temporary QgsVectorLayers to merge. :param crs: The CRS to use for the merged layer. :param index: The index of the current area being processed. - :return: A QgsVectorLayer representing the merged isochrone layers. """ merge_output = os.path.join(self.workflow_directory, f"{self.layer_id}_merged_isochrones_{index}.shp") @@ -474,26 +425,20 @@ def _merge_layers(self, layers=None, index=None): def _create_bands(self, layer, index): """ Create bands by computing differences between isochrone ranges. - This method computes the differences between isochrone ranges to create bands of non overlapping polygons. The bands are then merged into a final output layer. - :param layer: The merged isochrone layer. :param crs: Coordinate reference system for the output. :param index: The index of the current area being processed. - Returns: QgsVectoryLayer: The final output QgsVectorLayer layer path containing the bands. """ output_path = os.path.join(self.workflow_directory, f"final_isochrones_{index}.shp") - ranges_field = "value" field_index = layer.fields().indexFromName(ranges_field) if field_index == -1: raise KeyError(f"Field '{ranges_field}' does not exist in the merged layer.") # noqa E713 - unique_ranges = sorted({feat[ranges_field] for feat in layer.getFeatures()}) - range_layers = {} for r in unique_ranges: expr = f'"{ranges_field}" = {r}' @@ -506,7 +451,6 @@ def _create_bands(self, layer, index): dp.addAttributes(layer.fields()) range_layer.updateFields() dp.addFeatures(features) - dissolve_params = { "INPUT": range_layer, "FIELD": [], @@ -515,7 +459,6 @@ def _create_bands(self, layer, index): dissolve_result = processing.run("native:dissolve", dissolve_params) dissolved_layer = dissolve_result["OUTPUT"] range_layers[r] = dissolved_layer - band_layers = [] sorted_ranges = sorted(range_layers.keys(), reverse=True) for i in range(len(sorted_ranges) - 1): @@ -523,7 +466,6 @@ def _create_bands(self, layer, index): next_range = sorted_ranges[i + 1] current_layer = range_layers[current_range] next_layer = range_layers[next_range] - difference_params = { "INPUT": current_layer, "OVERLAY": next_layer, @@ -531,7 +473,6 @@ def _create_bands(self, layer, index): } diff_result = processing.run("native:difference", difference_params) diff_layer = diff_result["OUTPUT"] - diff_layer.dataProvider().addAttributes( [ QgsField("distance", QVariant.Int), @@ -542,9 +483,7 @@ def _create_bands(self, layer, index): for feat in diff_layer.getFeatures(): feat["distance"] = current_range diff_layer.updateFeature(feat) - band_layers.append(diff_layer) - smallest_range = sorted_ranges[-1] smallest_layer = range_layers[smallest_range] smallest_layer.dataProvider().addAttributes([QgsField("distance", QVariant.Int)]) @@ -554,7 +493,6 @@ def _create_bands(self, layer, index): feat["distance"] = smallest_range smallest_layer.updateFeature(feat) band_layers.append(smallest_layer) - merge_bands_params = { "LAYERS": band_layers, "CRS": self.target_crs, @@ -569,16 +507,13 @@ def _create_bands(self, layer, index): def _assign_scores(self, layer: QgsVectorLayer) -> QgsVectorLayer: """ Assign values to buffered polygons based 5 for presence of a polygon. - Args: layer QgsVectorLayer: The buffered features layer. - Returns: QgsVectorLayer: The same layer with a "value" field containing the assigned scores. """ if not layer or not layer.isValid(): return False - # Check if the "value" field already exists field_names = [field.name() for field in layer.fields()] log_message(f"Field names: {field_names}") @@ -587,10 +522,8 @@ def _assign_scores(self, layer: QgsVectorLayer) -> QgsVectorLayer: # Add the burn field to the input layer if it doesn't exist layer.dataProvider().addAttributes([QgsField("value", QVariant.Int)]) layer.updateFields() - # Log message when the field is added log_message('Added "value" field to input layer') - # Calculate the burn field value based on the item number in the distance list layer.startEditing() for i, feature in enumerate(layer.getFeatures()): @@ -616,16 +549,12 @@ def _assign_scores(self, layer: QgsVectorLayer) -> QgsVectorLayer: def reproject_isochrones(self, layer: QgsVectorLayer): """ Reproject the isochrone layer to target crs. - The resulting layer will be saved in the working directory too. - Parameters: layer (QgsVectorLayer): The input isochrone layer to reproject. - Returns: QgsVectorLayer: The reprojected isochrone layer. """ - # reproject the later to self.target_crs input_path = layer.source() reprojected_layer_path = input_path.replace(".shp", f"_epsg{self.target_crs.postgisSrid()}.shp") @@ -641,7 +570,6 @@ def reproject_isochrones(self, layer: QgsVectorLayer): ) reprojected_layer_result = processing.run("native:reprojectlayer", transform_params) reprojected_layer = QgsVectorLayer(reprojected_layer_result["OUTPUT"], "reprojected_layer", "ogr") - if not reprojected_layer.isValid(): raise ValueError(f"Failed to reproject input layer to {self.target_crs.authid()}") return reprojected_layer @@ -654,16 +582,15 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - :current_area: Current polygon from our study area. :clip_area: Polygon to clip the raster to which is aligned to cell edges. :current_bbox: Bounding box of the above area. :area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. :index: Index of the current area. - :return: Path to the reclassified raster. """ pass @@ -674,6 +601,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/osm_transport_polyline_per_cell_workflow.py b/geest/core/workflows/osm_transport_polyline_per_cell_workflow.py index cdf46b76..62ac9c01 100644 --- a/geest/core/workflows/osm_transport_polyline_per_cell_workflow.py +++ b/geest/core/workflows/osm_transport_polyline_per_cell_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 Osm Transport Polyline Per Cell Workflow module. - This module contains functionality for osm transport polyline per cell workflow. """ - import os from typing import Optional from urllib.parse import unquote @@ -40,7 +38,6 @@ def __init__( ): """ Initialize the workflow with attributes and feedback. - Args: :param item: JsonTreeItem representing the analysis, dimension, or factor to process. :param cell_size_m: Cell size in meters @@ -53,14 +50,11 @@ def __init__( item, cell_size_m, analysis_scale, feedback, context, working_directory ) # ⭐️ Item is a reference - whatever you change in this item will directly update the tree self.workflow_name = "use_osm_transport_polyline_per_cell" - # Use unified active transport - combines both highway and cycleway with best score logic self.osm_processing_type = OSMDownloadType.ACTIVE_TRANSPORT - layer_path = self.attributes.get("osm_transport_polyline_per_cell_shapefile", None) if layer_path: layer_path = unquote(layer_path) - if not layer_path: log_message( "Nothing found in osm_transport_polyline_per_cell_shapefile, trying osm_transport_polyline_per_cell_layer_source.", @@ -68,7 +62,6 @@ def __init__( level=Qgis.Warning, ) layer_path = self.attributes.get("osm_transport_polyline_per_cell_layer_source", None) - if not layer_path: log_message( "No osm_transport_polyline_per_cell_layer_source found, trying road_network_layer_path.", @@ -76,12 +69,10 @@ def __init__( level=Qgis.Warning, ) layer_path = self.attributes.get("road_network_layer_path", None) - if not layer_path: error_msg = "No transport layer found. Please configure a data source or download the active transport network." log_message(error_msg, tag="GeoE3", level=Qgis.Critical) raise ValueError(error_msg) - self.features_layer = QgsVectorLayer(layer_path, "OSM Transport Layer", "ogr") def _process_features_for_area( @@ -91,16 +82,15 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """ Executes the actual workflow logic for a single area Must be implemented by subclasses. - :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. """ area_features_count = area_features.featureCount() @@ -120,14 +110,12 @@ def _process_features_for_area( self.feedback, analysis_scale=self.analysis_scale, ) - log_message( "OSM Transport Polyline per Cell - Selected grid cells and assigned transport scores.", tag="GeoE3", level=Qgis.Info, ) log_message(f"Grid cells with transport scores saved to: {output_path}", tag="GeoE3", level=Qgis.Info) - # Step 2: Rasterize the grid layer using the assigned values # Create a scored boundary layer self.updateStatus("Rasterizing grid cells...") @@ -148,16 +136,15 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - :current_area: Current polygon from our study area. :clip_area: Polygon to clip the raster to which is aligned to cell edges. :current_bbox: Bounding box of the above area. :area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. :index: Index of the current area. - :return: Path to the reclassified raster. """ pass @@ -168,6 +155,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/point_per_cell_workflow.py b/geest/core/workflows/point_per_cell_workflow.py index c47702c4..55e6c0ea 100644 --- a/geest/core/workflows/point_per_cell_workflow.py +++ b/geest/core/workflows/point_per_cell_workflow.py @@ -2,9 +2,13 @@ """📦 Point Per Cell Workflow module. This module contains functionality for point per cell workflow. + +Supports grid-first mode where feature counts are written directly to +the study_area_grid column, then rasterized. """ import os +from typing import Optional from urllib.parse import unquote from qgis.core import ( @@ -16,9 +20,10 @@ ) from geest.core import JsonTreeItem -from geest.core.algorithms.features_per_cell_processor import ( - assign_values_to_grid, - select_grid_cells_and_count_features, +from geest.core.grid_column_utils import ( + clear_grid_column, + count_features_per_grid_cell, + rasterize_grid_column, ) from geest.utilities import log_message @@ -86,7 +91,12 @@ def __init__( self.attributes["result"] = f"{self.workflow_name} Workflow Failed" raise Exception(error) - self.feedback.setProgress(1.0) # We just use nominal intervals for progress updates + self.feedback.setProgress(1.0) + self.workflow_name = "point_per_cell" + # Grid-first mode: write results to grid columns first, then rasterize + self.use_grid_first = True + # Track if we've cleared the column (only do once, not per area) + self._column_cleared = False def _process_features_for_area( self, @@ -95,17 +105,20 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: Optional[str] = None, ) -> str: """ - Executes the actual workflow logic for a single area - Must be implemented by subclasses. + Executes the actual workflow logic for a single area. + + Supports grid-first mode where counts are written directly to study_area_grid. :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. - :area_features: A vector layer of features to analyse that includes only features in the study area. + :area_features: A vector layer of features to analyse. :index: Iteration / number of area being processed. + :area_name: Name of the area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. + :return: A raster layer file path if processing completes successfully. """ area_features_count = area_features.featureCount() log_message( @@ -113,15 +126,36 @@ def _process_features_for_area( tag="GeoE3", level=Qgis.Info, ) - # Step 1: Select grid cells that intersect with features + + if self.use_grid_first: + return self._process_grid_first( + current_bbox=current_bbox, + area_features=area_features, + index=index, + area_name=area_name, + ) + else: + return self._process_raster_first( + current_bbox=current_bbox, + area_features=area_features, + index=index, + ) + + def _process_raster_first( + self, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + ) -> str: + """Legacy raster-first processing using copied grid.""" + from geest.core.algorithms.features_per_cell_processor import ( + assign_values_to_grid, + select_grid_cells_and_count_features, + ) + output_path = os.path.join(self.workflow_directory, f"{self.layer_id}_grid_cells.gpkg") area_grid = select_grid_cells_and_count_features(self.grid_layer, area_features, output_path, self.feedback) - - # Step 2: Assign values to grid cells grid = assign_values_to_grid(area_grid, self.feedback) - - # Step 3: Rasterize the grid layer using the assigned values - # Create a scored boundary layer raster_output = self._rasterize( grid, current_bbox, @@ -131,6 +165,56 @@ def _process_features_for_area( ) return raster_output + def _process_grid_first( + self, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + area_name: str, + ) -> str: + """Grid-first processing - writes directly to study_area_grid.""" + # Clear column once at the start (not per area) + if not self._column_cleared: + log_message(f"Clearing column {self.layer_id} before processing") + clear_grid_column(self.gpkg_path, self.layer_id) + self._column_cleared = True + + self.progressChanged.emit(10.0) + + # Count features and write to grid + log_message(f"Counting features for column {self.layer_id}") + count_features_per_grid_cell( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + features_layer=area_features, + feedback=self.feedback, + ) + + self.progressChanged.emit(50.0) + + # Rasterize from grid column + output_path = os.path.join( + self.workflow_directory, + f"{self.layer_id}_{index}.tif", + ) + + rect = current_bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=-9999.0, + area_name=area_name, + ) + + self.progressChanged.emit(100.0) + log_message(f"Rasterized grid column to {output_path}") + return output_path + # Default implementation of the abstract method - not used in this workflow def _process_raster_for_area( self, @@ -139,6 +223,7 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. @@ -159,6 +244,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/polygon_per_cell_workflow.py b/geest/core/workflows/polygon_per_cell_workflow.py index f7711748..06e5f65c 100644 --- a/geest/core/workflows/polygon_per_cell_workflow.py +++ b/geest/core/workflows/polygon_per_cell_workflow.py @@ -2,9 +2,13 @@ """📦 Polygon Per Cell Workflow module. This module contains functionality for polygon per cell workflow. + +Supports grid-first mode where feature counts are written directly to +the study_area_grid column, then rasterized. """ import os +from typing import Optional from urllib.parse import unquote from qgis.core import ( @@ -16,8 +20,10 @@ ) from geest.core import JsonTreeItem -from geest.core.algorithms.polygon_per_cell_processor import ( - assign_reclassification_to_polygons, +from geest.core.grid_column_utils import ( + clear_grid_column, + count_features_per_grid_cell, + rasterize_grid_column, ) from geest.utilities import log_message @@ -50,13 +56,10 @@ def __init__( super().__init__( item, cell_size_m, analysis_scale, feedback, context, working_directory ) # ⭐️ Item is a reference - whatever you change in this item will directly update the tree - # TODO fix inconsistent abbreviation below for Poly self.workflow_name = "use_polygon_per_cell" - layer_path = self.attributes.get("polygon_per_cell_shapefile", None) if layer_path: layer_path = unquote(layer_path) - if not layer_path: log_message( "Invalid raster found in polygon_per_cell_shapefile, trying polygon_per_cell_layer_source.", @@ -71,8 +74,12 @@ def __init__( level=Qgis.Warning, ) return False - self.features_layer = QgsVectorLayer(layer_path, "polygon_per_cell_layer", "ogr") + self.workflow_name = "polygon_per_cell" + # Grid-first mode: write results to grid columns first, then rasterize + self.use_grid_first = True + # Track if we've cleared the column (only do once, not per area) + self._column_cleared = False def _process_features_for_area( self, @@ -81,17 +88,20 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: Optional[str] = None, ) -> str: """ - Executes the actual workflow logic for a single area - Must be implemented by subclasses. + Executes the actual workflow logic for a single area. + + Supports grid-first mode where counts are written directly to study_area_grid. :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. + :area_name: Name of the area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. + :return: A raster layer file path if processing completes successfully. """ area_features_count = area_features.featureCount() log_message( @@ -99,10 +109,32 @@ def _process_features_for_area( tag="GeoE3", level=Qgis.Info, ) - # Step 1: Select grid cells that intersect with features - output_path = os.path.join(self.workflow_directory, f"{self.layer_id}_grid_cells.gpkg") - del output_path - # Step 2: Assign reclassification values to polygons based on their perimeter + + if self.use_grid_first: + return self._process_grid_first( + current_bbox=current_bbox, + area_features=area_features, + index=index, + area_name=area_name, + ) + else: + return self._process_raster_first( + current_bbox=current_bbox, + area_features=area_features, + index=index, + ) + + def _process_raster_first( + self, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + ) -> str: + """Legacy raster-first processing using polygon perimeter classification.""" + from geest.core.algorithms.polygon_per_cell_processor import ( + assign_reclassification_to_polygons, + ) + polygon_areas = assign_reclassification_to_polygons(area_features) raster_output = self._rasterize( polygon_areas, @@ -113,6 +145,56 @@ def _process_features_for_area( ) return raster_output + def _process_grid_first( + self, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + area_name: str, + ) -> str: + """Grid-first processing - writes directly to study_area_grid.""" + # Clear column once at the start (not per area) + if not self._column_cleared: + log_message(f"Clearing column {self.layer_id} before processing") + clear_grid_column(self.gpkg_path, self.layer_id) + self._column_cleared = True + + self.progressChanged.emit(10.0) + + # Count features and write to grid + log_message(f"Counting features for column {self.layer_id}") + count_features_per_grid_cell( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + features_layer=area_features, + feedback=self.feedback, + ) + + self.progressChanged.emit(50.0) + + # Rasterize from grid column + output_path = os.path.join( + self.workflow_directory, + f"{self.layer_id}_{index}.tif", + ) + + rect = current_bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=-9999.0, + area_name=area_name, + ) + + self.progressChanged.emit(100.0) + log_message(f"Rasterized grid column to {output_path}") + return output_path + # Default implementation of the abstract method - not used in this workflow def _process_raster_for_area( self, @@ -121,6 +203,7 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. @@ -141,6 +224,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/polyline_per_cell_workflow.py b/geest/core/workflows/polyline_per_cell_workflow.py index 89cdc831..bb126503 100644 --- a/geest/core/workflows/polyline_per_cell_workflow.py +++ b/geest/core/workflows/polyline_per_cell_workflow.py @@ -2,9 +2,13 @@ """📦 Polyline Per Cell Workflow module. This module contains functionality for polyline per cell workflow. + +Supports grid-first mode where feature counts are written directly to +the study_area_grid column, then rasterized. """ import os +from typing import Optional from urllib.parse import unquote from qgis.core import ( @@ -16,9 +20,10 @@ ) from geest.core import JsonTreeItem -from geest.core.algorithms.features_per_cell_processor import ( - assign_values_to_grid, - select_grid_cells_and_count_features, +from geest.core.grid_column_utils import ( + clear_grid_column, + count_features_per_grid_cell, + rasterize_grid_column, ) from geest.utilities import log_message @@ -52,11 +57,9 @@ def __init__( item, cell_size_m, analysis_scale, feedback, context, working_directory ) # ⭐️ Item is a reference - whatever you change in this item will directly update the tree self.workflow_name = "use_polyline_per_cell" - layer_path = self.attributes.get("polyline_per_cell_shapefile", None) if layer_path: layer_path = unquote(layer_path) - if not layer_path: log_message( "Nothing found in polyline_per_cell_shapefile, trying polygline_per_cell_layer_source.", @@ -71,8 +74,12 @@ def __init__( level=Qgis.Warning, ) return False - self.features_layer = QgsVectorLayer(layer_path, "polyline_per_cell Layer", "ogr") + self.workflow_name = "polyline_per_cell" + # Grid-first mode: write results to grid columns first, then rasterize + self.use_grid_first = True + # Track if we've cleared the column (only do once, not per area) + self._column_cleared = False def _process_features_for_area( self, @@ -81,17 +88,20 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: Optional[str] = None, ) -> str: """ - Executes the actual workflow logic for a single area - Must be implemented by subclasses. + Executes the actual workflow logic for a single area. + + Supports grid-first mode where counts are written directly to study_area_grid. :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. + :area_name: Name of the area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. + :return: A raster layer file path if processing completes successfully. """ area_features_count = area_features.featureCount() log_message( @@ -100,17 +110,35 @@ def _process_features_for_area( level=Qgis.Info, ) - # Step 1: Select grid cells that intersect with features - self.updateStatus(f"Counting intersections ({area_features_count} features)...") - output_path = os.path.join(self.workflow_directory, f"{self.layer_id}_grid_cells.gpkg") - area_grid = select_grid_cells_and_count_features(self.grid_layer, area_features, output_path, self.feedback) + if self.use_grid_first: + return self._process_grid_first( + current_bbox=current_bbox, + area_features=area_features, + index=index, + area_name=area_name, + ) + else: + return self._process_raster_first( + current_bbox=current_bbox, + area_features=area_features, + index=index, + ) - # Step 2: Assign values to grid cells - self.updateStatus("Assigning scores to grid cells...") - grid = assign_values_to_grid(area_grid, feedback=self.feedback) + def _process_raster_first( + self, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + ) -> str: + """Legacy raster-first processing using copied grid.""" + from geest.core.algorithms.features_per_cell_processor import ( + assign_values_to_grid, + select_grid_cells_and_count_features, + ) - # Step 3: Rasterize the grid layer using the assigned values - self.updateStatus("Rasterizing grid...") + output_path = os.path.join(self.workflow_directory, f"{self.layer_id}_grid_cells.gpkg") + area_grid = select_grid_cells_and_count_features(self.grid_layer, area_features, output_path, self.feedback) + grid = assign_values_to_grid(area_grid, self.feedback) raster_output = self._rasterize( grid, current_bbox, @@ -120,6 +148,56 @@ def _process_features_for_area( ) return raster_output + def _process_grid_first( + self, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + area_name: str, + ) -> str: + """Grid-first processing - writes directly to study_area_grid.""" + # Clear column once at the start (not per area) + if not self._column_cleared: + log_message(f"Clearing column {self.layer_id} before processing") + clear_grid_column(self.gpkg_path, self.layer_id) + self._column_cleared = True + + self.progressChanged.emit(10.0) + + # Count features and write to grid + log_message(f"Counting features for column {self.layer_id}") + count_features_per_grid_cell( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + features_layer=area_features, + feedback=self.feedback, + ) + + self.progressChanged.emit(50.0) + + # Rasterize from grid column + output_path = os.path.join( + self.workflow_directory, + f"{self.layer_id}_{index}.tif", + ) + + rect = current_bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=-9999.0, + area_name=area_name, + ) + + self.progressChanged.emit(100.0) + log_message(f"Rasterized grid column to {output_path}") + return output_path + # Default implementation of the abstract method - not used in this workflow def _process_raster_for_area( self, @@ -128,6 +206,7 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. @@ -148,6 +227,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/raster_reclassification_workflow.py b/geest/core/workflows/raster_reclassification_workflow.py index 4e67ffe2..0c1504fe 100644 --- a/geest/core/workflows/raster_reclassification_workflow.py +++ b/geest/core/workflows/raster_reclassification_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 Raster Reclassification Workflow module. - This module contains functionality for raster reclassification workflow. """ - import os from urllib.parse import unquote @@ -52,16 +50,13 @@ def __init__( item, cell_size_m, analysis_scale, feedback, context, working_directory ) # ⭐️ Item is a reference - whatever you change in this item will directly update the tree self.workflow_name = "use_environmental_hazards" - if self.layer_id == "landslide": self.range_boundaries = 2 # min and max values are included else: self.range_boundaries = 0 # default value for range boundaries - layer_name = self.attributes.get("environmental_hazards_raster", None) if layer_name: layer_name = unquote(layer_name) - if not layer_name: log_message( "Invalid layer found in environmental_hazards_raster, trying environmental_hazards_layer_source.", @@ -76,9 +71,7 @@ def __init__( level=Qgis.Warning, ) return - self.raster_layer = QgsRasterLayer(layer_name, "Environmental Hazards Raster", "gdal") - if self.layer_id == "fire": self.reclassification_rules = [ "-inf", @@ -184,7 +177,6 @@ def __init__( 5, 0, # new value = 0 ] - log_message( f"Reclassification Rules for {self.layer_id}: {self.reclassification_rules}", tag="GeoE3", @@ -198,28 +190,25 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - :current_area: Current polygon from our study area. :clip_area: Polygon to clip the raster to which is aligned to cell edges. :current_bbox: Bounding box of the above area. :area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. :index: Index of the current area. - :return: Path to the reclassified raster. """ del current_area # Unused in this analysis noqa F841 del clip_area # Unused in this analysis noqa F841 - # Apply the reclassification rules reclassified_raster = self._apply_reclassification( area_raster, index, bbox=current_bbox, ) - return reclassified_raster def _apply_reclassification( @@ -232,9 +221,7 @@ def _apply_reclassification( Apply the reclassification using the raster calculator and save the output. """ bbox = bbox.boundingBox() - reclassified_raster_path = os.path.join(self.workflow_directory, f"{self.layer_id}_reclassified_{index}.tif") - # Set up the reclassification using reclassifybytable params = { "INPUT_RASTER": input_raster, @@ -244,10 +231,8 @@ def _apply_reclassification( "OUTPUT": "TEMPORARY_OUTPUT", "PROGRESS": self.feedback, } - # Perform the reclassification using the raster calculator reclass = processing.run("native:reclassifybytable", params, feedback=QgsProcessingFeedback())["OUTPUT"] - clip_params = { "INPUT": reclass, "MASK": self.clip_areas_layer, @@ -258,14 +243,12 @@ def _apply_reclassification( "OUTPUT": reclassified_raster_path, "PROGRESS": self.feedback, } - processing.run("gdal:cliprasterbymasklayer", clip_params, feedback=QgsProcessingFeedback()) log_message( f"Reclassification for area {index} complete. Saved to {reclassified_raster_path}", tag="GeoE3", level=Qgis.Info, ) - return reclassified_raster_path # Not used in this workflow since we work with rasters @@ -276,17 +259,16 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """ Executes the actual workflow logic for a single area Must be implemented by subclasses. - :current_area: Current polygon from our study area. :clip_area: Extended grid matched polygon for the study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. """ pass @@ -297,15 +279,14 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using an aggregate. - :current_area: Current polygon from our study area. :clip_area: Extended grid matched polygon for the study area. :current_bbox: Bounding box of the above area. :index: Index of the current area. - :return: Path to the reclassified raster. """ pass diff --git a/geest/core/workflows/safety_polygon_workflow.py b/geest/core/workflows/safety_polygon_workflow.py index 871780e1..c2fd7edd 100644 --- a/geest/core/workflows/safety_polygon_workflow.py +++ b/geest/core/workflows/safety_polygon_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 Safety Polygon Workflow module. - This module contains functionality for safety polygon workflow. """ - from urllib.parse import unquote from qgis.core import ( @@ -51,7 +49,6 @@ def __init__( ) # ⭐️ Item is a reference - whatever you change in this item will directly update the tree self.workflow_name = "use_classify_safety_polygon_into_classes" layer_path = unquote(self.attributes.get("classify_safety_polygon_into_classes_shapefile", "")) - if not layer_path: log_message( "Invalid layer found in classify_safety_polygon_into_classes_shapefile, trying classify_safety_polygon_into_classes_layer_source.", @@ -66,9 +63,7 @@ def __init__( level=Qgis.Warning, ) return False - self.features_layer = QgsVectorLayer(layer_path, "features_layer", "ogr") - self.selected_field = self.attributes.get("classify_safety_polygon_into_classes_selected_field", "") # This is a dict with keys being unique values from the selected field # and values from the aggregation dialog configuration table @@ -83,16 +78,15 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """ Executes the actual workflow logic for a single area Must be implemented by subclasses. - :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. """ area_features_count = area_features.featureCount() @@ -103,7 +97,6 @@ def _process_features_for_area( ) # Step 1: Assign reclassification values based on perceived safety reclassified_layer = self._assign_reclassification_to_safety(area_features) - # Step 2: Rasterize the safety data raster_output = self._rasterize( reclassified_layer, @@ -122,7 +115,6 @@ def _assign_reclassification_to_safety(self, layer: QgsVectorLayer) -> QgsVector if layer.fields().indexFromName("value") == -1: layer.dataProvider().addAttributes([QgsField("value", QVariant.Int)]) layer.updateFields() - feature_count = layer.featureCount() counter = 0 for feature in layer.getFeatures(): @@ -159,16 +151,15 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - :current_area: Current polygon from our study area. :clip_area: Polygon to clip the raster to which is aligned to cell edges. :current_bbox: Bounding box of the above area. :area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. :index: Index of the current area. - :return: Path to the reclassified raster. """ pass @@ -179,6 +170,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/safety_raster_workflow.py b/geest/core/workflows/safety_raster_workflow.py index 2f18c631..eec5e314 100644 --- a/geest/core/workflows/safety_raster_workflow.py +++ b/geest/core/workflows/safety_raster_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 Safety Raster Workflow module. - This module contains functionality for safety raster workflow. """ - import os from urllib.parse import unquote @@ -57,7 +55,6 @@ def __init__( ) # ⭐️ Item is a reference - whatever you change in this item will directly update the tree self.workflow_name = "use_nighttime_lights" layer_name = unquote(self.attributes.get("nighttime_lights_raster", None)) - if not layer_name: log_message( "Invalid raster found in nighttime_lights_raster, trying nighttime_lights_layer_source.", @@ -81,22 +78,19 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - :current_area: Current polygon from our study area. :clip_area: Polygon to clip the raster to which is aligned to cell edges. :current_bbox: Bounding box of the above area. :area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. :index: Index of the current area. - :return: Path to the reclassified raster. """ _ = current_area # Unused in this analysis - max_val, median, percentile_75, valid_data = self.calculate_raster_stats(area_raster) - # Check if we got valid statistics if valid_data is None or len(valid_data) == 0: log_message( @@ -105,7 +99,6 @@ def _process_raster_for_area( level=1, ) return None - # Dynamically build the reclassification table using Jenks Natural Breaks reclass_table = self._build_reclassification_table(max_val, median, valid_data) log_message( @@ -113,7 +106,6 @@ def _process_raster_for_area( tag="GeoE3", level=0, ) - # Apply the reclassification rules reclassified_raster = self._apply_reclassification( area_raster, @@ -134,9 +126,7 @@ def _apply_reclassification( Apply the reclassification using the raster calculator and save the output. """ bbox = bbox.boundingBox() - reclassified_raster = os.path.join(self.workflow_directory, f"{self.layer_id}_reclassified_{index}.tif") - # Set up the reclassification using reclassifybytable params = { "INPUT_RASTER": input_raster, @@ -148,46 +138,38 @@ def _apply_reclassification( "OUTPUT": reclassified_raster, "PROGRESS": self.feedback, } - # Perform the reclassification using the raster calculator processing.run( "native:reclassifybytable", # noqa F841 params, # noqa F841 feedback=QgsProcessingFeedback(), # noqa F841 )["OUTPUT"] - log_message( f"Reclassification for area {index} complete. Saved to {reclassified_raster}", tag="GeoE3", level=Qgis.Info, ) - return reclassified_raster def calculate_raster_stats(self, raster_path): """ Calculate statistics from a QGIS raster layer using NumPy. - Returns: Tuple of (max_value, median, percentile_75, valid_data) Returns (None, None, None, None) if raster cannot be read """ raster_layer = QgsRasterLayer(raster_path, "Input Raster") - # Check if the raster layer loaded successfully if not raster_layer.isValid(): log_message("Raster layer failed to load", tag="GeoE3", level=1) return None, None, None, None - provider = raster_layer.dataProvider() extent = raster_layer.extent() width = raster_layer.width() height = raster_layer.height() - # Fetch the raster data for band 1 block = provider.block(1, extent, width, height) byte_array = block.data() # This returns a QByteArray - # Determine the correct dtype based on the provider's data type data_type = provider.dataType(1) dtype = None @@ -203,24 +185,19 @@ def calculate_raster_stats(self, raster_path): dtype = np.uint32 elif data_type == 1: # Byte dtype = np.uint8 - if dtype is None: log_message("Unsupported data type", tag="GeoE3", level=1) return None, None, None, None - # Convert QByteArray to a numpy array with the correct dtype raster_array = np.frombuffer(byte_array, dtype=dtype).reshape((height, width)) - # Filter out NoData values no_data_value = provider.sourceNoDataValue(1) valid_data = raster_array[raster_array != no_data_value] - if valid_data.size > 0: # Compute statistics max_value = np.max(valid_data).astype(dtype) median = np.median(valid_data).astype(dtype) percentile_75 = np.percentile(valid_data, 75).astype(dtype) - return max_value, median, percentile_75, valid_data else: # Handle case with no valid data @@ -230,13 +207,10 @@ def calculate_raster_stats(self, raster_path): def _build_binary_table(self, max_val: float) -> list: """ Build binary classification table: no light vs light present. - Uses fixed threshold of 0.001 (VIIRS noise floor) to avoid false positives from sensor noise. - Args: max_val: Maximum value in the raster data - Returns: Reclassification table as list of strings Format: [min1, max1, class1, min2, max2, class2] @@ -248,35 +222,28 @@ def _build_binary_table(self, max_val: float) -> list: def _build_reclassification_table(self, max_val: float, median: float, valid_data: np.ndarray) -> list: """ Build reclassification table with automatic method selection. - Automatically chooses between Binary and Jenks Natural Breaks classification based on data distribution: - Binary: If > ntl_binary_threshold_percent zeros OR GVF < 0.3 - Jenks: Otherwise - The table maps nighttime lights intensity values to safety classes: - Binary: 2 classes (0=No Access, 5=Light Present) - Jenks: 6 classes (0=No Access to 5=Very High) - Args: max_val: Maximum value in the raster median: Median value in the raster valid_data: Array of all valid (non-NoData) raster values - Returns: Reclassification table as list of [min, max, class, min, max, class, ...] formatted as strings for QGIS native:reclassifybytable algorithm - Raises: ValueError: If Jenks Natural Breaks cannot compute valid classification breaks """ # Read threshold from settings (default 80%) threshold_percent = int(setting(key="ntl_binary_threshold_percent", default=80)) - # Calculate metrics for auto-detection zero_threshold = 0.001 non_zero_data = valid_data[valid_data > zero_threshold] - if len(non_zero_data) == 0: zero_percentage = 100.0 gvf = 0.0 @@ -284,10 +251,8 @@ def _build_reclassification_table(self, max_val: float, median: float, valid_dat zero_percentage = (len(valid_data) - len(non_zero_data)) / len(valid_data) * 100 breaks = jenks_natural_breaks(valid_data, n_classes=6) gvf = calculate_goodness_of_variance_fit(valid_data, breaks) - # Auto-decide: Binary or Jenks? use_binary = (zero_percentage > threshold_percent) or (gvf < 0.3) - if use_binary: log_message( f"🎯 Auto-selected Binary classification " @@ -296,10 +261,8 @@ def _build_reclassification_table(self, max_val: float, median: float, valid_dat level=0, ) return self._build_binary_table(max_val) - # Continue with Jenks Natural Breaks n_classes = 6 - log_message( f"📊 Computing Jenks Natural Breaks classification (max={max_val:.6f}, " f"median={median:.6f}, n={len(valid_data)}, zeros={zero_percentage:.1f}%, " @@ -307,29 +270,23 @@ def _build_reclassification_table(self, max_val: float, median: float, valid_dat tag="GeoE3", level=0, ) - try: # Calculate Jenks breaks for n_classes # Returns: [break₁, break₂, break₃, break₄, break₅, max_value] breaks = jenks_natural_breaks(valid_data, n_classes=n_classes) - # Build QGIS reclassification table format # Format: [min₁, max₁, class₁, min₂, max₂, class₂, ...] reclass_table = [] - # Class 0: From 0 to first break reclass_table.extend([0.0, breaks[0], 0]) - # Classes 1-5: Between consecutive breaks for i in range(len(breaks) - 1): class_num = i + 1 min_val = breaks[i] max_val_class = breaks[i + 1] reclass_table.extend([min_val, max_val_class, class_num]) - # Convert all values to strings for QGIS processing reclass_table = list(map(str, reclass_table)) - log_message( f"✅ Jenks Natural Breaks computed:\n" f" Class 0 (No Access): 0.000 - {breaks[0]:.3f}\n" @@ -342,9 +299,7 @@ def _build_reclassification_table(self, max_val: float, median: float, valid_dat tag="GeoE3", level=0, ) - return reclass_table - except Exception as e: # Fail workflow with clear error message unique_count = len(np.unique(valid_data)) @@ -368,16 +323,15 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """ Executes the actual workflow logic for a single area Must be implemented by subclasses. - :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. """ pass @@ -387,6 +341,7 @@ def _process_aggregate_for_area( current_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/single_point_buffer_workflow.py b/geest/core/workflows/single_point_buffer_workflow.py index 439e138c..d6595bd1 100644 --- a/geest/core/workflows/single_point_buffer_workflow.py +++ b/geest/core/workflows/single_point_buffer_workflow.py @@ -2,9 +2,13 @@ """📦 Single Point Buffer Workflow module. This module contains functionality for single point buffer workflow. + +Supports grid-first mode where buffer scores are written directly to +the study_area_grid column, then rasterized. """ import os +from typing import Optional from urllib.parse import unquote from qgis import processing @@ -19,6 +23,11 @@ from qgis.PyQt.QtCore import QVariant from geest.core import JsonTreeItem +from geest.core.grid_column_utils import ( + clear_grid_column, + rasterize_grid_column, + write_buffer_values_to_grid, +) from geest.core.workflows.mappings import MAPPING_REGISTRY from geest.utilities import log_message @@ -52,7 +61,6 @@ def __init__( item, cell_size_m, analysis_scale, feedback, context, working_directory ) # ⭐️ Item is a reference - whatever you change in this item will directly update the tree self.workflow_name = "use_single_buffer_point" - layer_source = self.attributes.get("single_buffer_point_shapefile", None) if layer_source is not None: layer_source = unquote(layer_source) @@ -81,39 +89,69 @@ def __init__( config = mapping.get(analysis_scale, mapping.get("national")) if mapping else None mapped_distance = config.get("buffer_distance") if config else None self.mapped_scores = config.get("scores", {}) if config else {} - # Load scoring method and percentage scores for Regional scale self.scoring_method = config.get("scoring_method", "") if config else "" self.percentage_scores = config.get("percentage_scores", {}) if config else {} - default_buffer_distance = int(self.attributes.get("default_single_buffer_distance", 0)) if mapped_distance: default_buffer_distance = int(mapped_distance) - buffer_distance = self.attributes.get("single_buffer_point_layer_distance", default_buffer_distance) self.buffer_distance = int(buffer_distance) if buffer_distance else int(default_buffer_distance) + self.workflow_name = "single_point_buffer" + # Grid-first mode: write results to grid columns first, then rasterize + self.use_grid_first = True + # Track if we've cleared the column (only do once, not per area) + self._column_cleared = False def _process_features_for_area( self, - current_area: "QgsGeometry", - clip_area: "QgsGeometry", - current_bbox: "QgsGeometry", - area_features: "QgsVectorLayer", + current_area: QgsGeometry, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, index: int, + area_name: Optional[str] = None, ) -> str: """ - Executes the actual workflow logic for a single area - Must be implemented by subclasses. + Executes the actual workflow logic for a single area. + + Supports grid-first mode where buffer scores are written directly to study_area_grid. :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. + :area_name: Name of the area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. + :return: A raster layer file path if processing completes successfully. """ - log_message(f"{self.workflow_name} Processing Started") + log_message(f"{self.workflow_name} Processing Started") + + if self.use_grid_first: + return self._process_grid_first( + current_bbox=current_bbox, + area_features=area_features, + index=index, + area_name=area_name, + ) + else: + return self._process_raster_first( + current_area=current_area, + clip_area=clip_area, + current_bbox=current_bbox, + area_features=area_features, + index=index, + ) + def _process_raster_first( + self, + current_area: QgsGeometry, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + ) -> str: + """Legacy raster-first processing.""" # Check if we should use percentage-based scoring (Regional scale) if self.scoring_method == "percentage_intersection" and self.percentage_scores: return self._process_with_percentage_scoring( @@ -123,26 +161,86 @@ def _process_features_for_area( area_features=area_features, index=index, ) - # Original binary scoring approach for National/Local scale # Step 1: Buffer the selected features buffered_layer = self._buffer_features(area_features, f"{self.layer_id}_buffered_{index}") - # Step 2: Assign values to the buffered polygons scored_layer = self._assign_scores(buffered_layer) - # Step 3: Rasterize the scored buffer layer raster_output = self._rasterize(scored_layer, current_bbox, index) - return raster_output + def _process_grid_first( + self, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, + index: int, + area_name: str, + ) -> str: + """Grid-first processing - writes buffer scores directly to study_area_grid.""" + # Clear column once at the start (not per area) + if not self._column_cleared: + log_message(f"Clearing column {self.layer_id} before processing") + clear_grid_column(self.gpkg_path, self.layer_id) + self._column_cleared = True + + self.progressChanged.emit(10.0) + + # Step 1: Buffer the selected features + log_message(f"Creating buffer with distance {self.buffer_distance}m") + buffered_layer = self._buffer_features(area_features, f"{self.layer_id}_buffered_{index}") + + self.progressChanged.emit(30.0) + + # Step 2: Assign values to the buffered polygons + scored_layer = self._assign_scores(buffered_layer) + + self.progressChanged.emit(40.0) + + # Step 3: Write buffer scores to grid cells + log_message(f"Writing buffer scores to grid column {self.layer_id}") + write_buffer_values_to_grid( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + buffer_layer=scored_layer, + value_field="value", + aggregation_method="MAX", + feedback=self.feedback, + ) + + self.progressChanged.emit(70.0) + + # Step 4: Rasterize from grid column + output_path = os.path.join( + self.workflow_directory, + f"{self.layer_id}_{index}.tif", + ) + + rect = current_bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=self.layer_id, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=-9999.0, + area_name=area_name, + ) + + self.progressChanged.emit(100.0) + log_message(f"Rasterized grid column to {output_path}") + return output_path + def _process_with_percentage_scoring( self, - current_area: "QgsGeometry", - clip_area: "QgsGeometry", - current_bbox: "QgsGeometry", - area_features: "QgsVectorLayer", + current_area: QgsGeometry, + clip_area: QgsGeometry, + current_bbox: QgsGeometry, + area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """Process using percentage-based scoring (Regional scale). @@ -165,12 +263,10 @@ def _process_with_percentage_scoring( f"Single Point Buffer: Using percentage scoring for Regional scale (buffer: {self.buffer_distance}m)", level=Qgis.Info, ) - # Step 1: Create buffer around points buffer_output = os.path.join(self.workflow_directory, f"simple_buffer_{index}.gpkg") if os.path.exists(buffer_output): os.remove(buffer_output) - buffer_params = { "INPUT": area_features, "DISTANCE": self.buffer_distance, @@ -178,14 +274,12 @@ def _process_with_percentage_scoring( } result = processing.run("native:buffer", buffer_params) buffered_layer_path = result["OUTPUT"] - if not buffered_layer_path or not os.path.exists(buffered_layer_path): log_message( f"Failed to create buffer for area {index}", level=Qgis.Warning, ) return False - buffered_layer = QgsVectorLayer(buffered_layer_path, "buffered", "ogr") if not buffered_layer.isValid(): log_message( @@ -193,7 +287,6 @@ def _process_with_percentage_scoring( level=Qgis.Warning, ) return False - # Step 2: Get grid cells for current area area_grid = subset_vector_layer( self.workflow_directory, @@ -207,20 +300,17 @@ def _process_with_percentage_scoring( level=Qgis.Warning, ) return False - # Step 3: Score grid cells based on percentage intersection scored_grid = self._score_grid_for_percentage( grid_layer=area_grid, buffered_layer=buffered_layer, ) - if scored_grid is False: log_message( "No scored grid cells were created.", level=Qgis.Warning, ) return False - # Step 4: Rasterize raster_output = self._rasterize( input_layer=scored_grid, @@ -228,14 +318,13 @@ def _process_with_percentage_scoring( index=index, value_field="value", ) - return raster_output def _score_grid_for_percentage( self, - grid_layer: "QgsVectorLayer", - buffered_layer: "QgsVectorLayer", - ) -> "QgsVectorLayer": + grid_layer: QgsVectorLayer, + buffered_layer: QgsVectorLayer, + ) -> QgsVectorLayer: """Score grid cells based on percentage intersection with buffered features. For Regional scale: calculates what percentage of each grid cell @@ -249,44 +338,34 @@ def _score_grid_for_percentage( The grid layer with "value" field containing the assigned scores. """ log_message("Scoring grid cells based on percentage intersection") - field_names = [field.name() for field in grid_layer.fields()] if "value" not in field_names: grid_layer.dataProvider().addAttributes([QgsField("value", QVariant.Int)]) grid_layer.updateFields() - grid_layer.startEditing() for grid_feature in grid_layer.getFeatures(): grid_geom = grid_feature.geometry() if grid_geom.isNull(): continue - grid_area = grid_geom.area() if grid_area == 0: continue - - max_score = 0 max_overlap_percent = 0 - for buffered_feature in buffered_layer.getFeatures(): buffered_geom = buffered_feature.geometry() if buffered_geom.isNull(): continue - intersection = grid_geom.intersection(buffered_geom) if intersection.isNull() or intersection.area() == 0: continue - # Calculate % of buffer within hexagon buffer_area = buffered_geom.area() if buffer_area > 0: overlap_percent = (intersection.area() / buffer_area) * 100 else: overlap_percent = 0 - if overlap_percent > max_overlap_percent: max_overlap_percent = overlap_percent - # Calculate score based on final max_overlap_percent after all buffers checked sorted_items = sorted(self.percentage_scores.items()) max_score = 0 @@ -304,15 +383,11 @@ def _score_grid_for_percentage( prev_pct = sorted_items[i - 1][0] if prev_pct < max_overlap_percent <= min_pct: max_score = score - grid_feature.setAttribute("value", max_score) grid_layer.updateFeature(grid_feature) - grid_layer.commitChanges() return grid_layer - return raster_output - def _buffer_features(self, layer: QgsVectorLayer, output_name: str) -> QgsVectorLayer: """ Buffer the input features by the buffer_distance km. @@ -335,7 +410,6 @@ def _buffer_features(self, layer: QgsVectorLayer, output_name: str) -> QgsVector "OUTPUT": output_path, }, )["OUTPUT"] - buffered_layer = QgsVectorLayer(output_path, output_name, "ogr") return buffered_layer @@ -349,21 +423,17 @@ def _assign_scores(self, layer: QgsVectorLayer) -> QgsVectorLayer: Returns: QgsVectorLayer: A new layer with a "value" field containing the assigned scores. """ - log_message(f"Assigning scores to {layer.name()}") # Create a new field in the layer for the scores layer.startEditing() layer.dataProvider().addAttributes([QgsField("value", QVariant.Int)]) layer.updateFields() - # Assign scores to the buffered polygons score = self.mapped_scores.get("intersects", 5) for feature in layer.getFeatures(): feature.setAttribute("value", score) layer.updateFeature(feature) - layer.commitChanges() - return layer # Default implementation of the abstract method - not used in this workflow @@ -374,6 +444,7 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. @@ -394,6 +465,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. diff --git a/geest/core/workflows/street_lights_buffer_workflow.py b/geest/core/workflows/street_lights_buffer_workflow.py index a0927702..bd27e6f5 100644 --- a/geest/core/workflows/street_lights_buffer_workflow.py +++ b/geest/core/workflows/street_lights_buffer_workflow.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- """📦 Street Lights Buffer Workflow module. - This module contains functionality for street lights buffer workflow. """ - import os from urllib.parse import unquote @@ -42,20 +40,14 @@ def __init__( ): """ Initialize the workflow with attributes and feedback. - :param item: JsonTreeItem representing the analysis, dimension, or factor to process. - :param cell_size_m: Cell size in meters. - :param analysis_scale: Scale of the analysis, e.g., 'local', 'national'. - :param feedback: QgsFeedback object for progress reporting and cancellation's. - :param context: QgsProcessingContext object for processing. This can be used to pass objects to the thread. e.g. the QgsProject Instance. - :param working_directory: Folder containing study_area.gpkg where the outputs will be placed. If not set, the value will be taken from QSettings. @@ -64,11 +56,9 @@ def __init__( # this item will directly update the tree super().__init__(item, cell_size_m, analysis_scale, feedback, context, working_directory) self.workflow_name = "use_street_lights" - layer_path = self.attributes.get("street_lights_shapefile", None) if layer_path: layer_path = unquote(layer_path) - if not layer_path: log_message( "Invalid raster found in street_lights_shapefile, trying street_lights_point_layer_source.", @@ -87,7 +77,6 @@ def __init__( error += f"Layer Source: {self.street_lights_layer_source} " self.attributes["error"] = error raise Exception(error) - self.features_layer = QgsVectorLayer(layer_path, "points", "ogr") if not self.features_layer.isValid(): log_message("street lights source file not valid", level=Qgis.Critical) @@ -96,7 +85,6 @@ def __init__( error += f"Layer Source: {self.street_lights_layer_source} " self.attributes["error"] = error raise Exception(error) - factor_id = None if item.isIndicator() and item.parentItem: factor_id = item.parentItem.attribute("id", None) @@ -108,7 +96,6 @@ def __init__( config = mapping.get(analysis_scale, mapping.get("national")) if not config: raise Exception("Streetlights mapping config not found.") - self.buffer_distance = int(config.get("buffer_distance", 0)) self.scoring_method = config.get("scoring_method", "") self.scores = config.get("scores", {}) @@ -123,29 +110,25 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: str = None, ) -> str: """ Executes the actual workflow logic for a single area Must be implemented by subclasses. - :current_area: Current polygon from our study area. :current_bbox: Bounding box of the above area. :area_features: A vector layer of features to analyse that includes only features in the study area. :index: Iteration / number of area being processed. - :return: A raster layer file path if processing completes successfully, False if canceled or failed. """ log_message(f"{self.workflow_name} Processing Started") - # Step 1: Buffer the selected features buffered_layer = self._buffer_features(area_features, f"{self.layer_id}_buffered_{index}") # Step 2: Select grid cells that intersect with features output_path = os.path.join(self.workflow_directory, f"{self.layer_id}_grid_cells.gpkg") area_grid = select_grid_cells_and_count_features(self.grid_layer, area_features, output_path, self.feedback) - # Step 3: Assign scores to the grid layer grid_layer = self._score_grid(area_grid, buffered_layer) - # Step 4: Rasterize the grid layer using the assigned scores raster_output = self._rasterize( grid_layer, @@ -154,17 +137,14 @@ def _process_features_for_area( value_field="score", default_value=0, ) - return raster_output def _buffer_features(self, layer: QgsVectorLayer, output_name: str) -> QgsVectorLayer: """ Buffer the input features by the buffer_distance km. - Args: layer (QgsVectorLayer): The input feature layer. output_name (str): A name for the output buffered layer. - Returns: QgsVectorLayer: The buffered features layer. """ @@ -179,7 +159,6 @@ def _buffer_features(self, layer: QgsVectorLayer, output_name: str) -> QgsVector "OUTPUT": output_path, }, )["OUTPUT"] - buffered_layer = QgsVectorLayer(output_path, output_name, "ogr") return buffered_layer @@ -191,16 +170,15 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: str = None, ): """ Executes the actual workflow logic for a single area using a raster. - :current_area: Current polygon from our study area. :clip_area: Polygon to clip the raster to which is aligned to cell edges. :current_bbox: Bounding box of the above area. :area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. :index: Index of the current area. - :return: Path to the reclassified raster. """ pass @@ -211,6 +189,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: str = None, ): """ Executes the workflow, reporting progress through the feedback object and checking for cancellation. @@ -220,12 +199,10 @@ def _process_aggregate_for_area( def _score_grid(self, grid_layer: QgsVectorLayer, buffered_layer: QgsVectorLayer) -> QgsVectorLayer: """ Assign scores to a grid layer and rasterize it. - Args: grid_layer (QgsVectorLayer): The grid layer representing the study area. buffered_layer (QgsVectorLayer): Buffered layer to evaluate intersections. index (int): Index for output file naming. - Returns: str: Path to the output raster file. """ @@ -234,45 +211,37 @@ def _score_grid(self, grid_layer: QgsVectorLayer, buffered_layer: QgsVectorLayer tag="GeoE3", level=Qgis.Info, ) - # Add a new attribute to the grid layer for storing the score grid_layer.startEditing() if not grid_layer.fields().indexFromName("score") >= 0: grid_layer.dataProvider().addAttributes([QgsField("score", QVariant.Int)]) grid_layer.updateFields() - # Assign scores based on intersection with the buffered layer for grid_feature in grid_layer.getFeatures(): grid_geom = grid_feature.geometry() max_score = 0 - for buffered_feature in buffered_layer.getFeatures(): buffered_geom = buffered_feature.geometry() intersection = grid_geom.intersection(buffered_geom) if intersection.isEmpty(): continue - if self.scoring_method == "binary": max_score = max(max_score, self.scores.get("intersects_buffer", 5)) continue - overlap_percent = (intersection.area() / grid_geom.area()) * 100 log_message( f"Overlap percentage: {overlap_percent}", tag="GeoE3", level=Qgis.Info, ) - # Determine score based on overlap percentage thresholds if self.scoring_method == "percentage_intersection": for min_pct, score in sorted(self.percentage_scores.items(), reverse=True): if overlap_percent >= min_pct: max_score = max(max_score, score) break - # Update the "score" attribute for the feature grid_feature.setAttribute("score", max_score) grid_layer.updateFeature(grid_feature) - grid_layer.commitChanges() return grid_layer diff --git a/geest/core/workflows/workflow_base.py b/geest/core/workflows/workflow_base.py index fe8a88f5..d0c4e1c6 100644 --- a/geest/core/workflows/workflow_base.py +++ b/geest/core/workflows/workflow_base.py @@ -10,19 +10,6 @@ from abc import abstractmethod from typing import Optional -from geoe3.core import JsonTreeItem, setting -from geoe3.core.algorithms import ( - AreaIterator, - GHSLDownloader, - GHSLProcessor, - check_and_reproject_layer, - combine_rasters_to_vrt, - geometry_to_memory_layer, - subset_vector_layer, -) -from geoe3.core.constants import GDAL_OUTPUT_DATA_TYPE -from geoe3.core.grid_column_utils import write_raster_values_to_grid -from geoe3.utilities import log_layer_count, log_message, resources_path from qgis import processing from qgis.core import ( Qgis, @@ -39,6 +26,23 @@ ) from qgis.PyQt.QtCore import QObject, QSettings, pyqtSignal +from geest.core import JsonTreeItem, setting +from geest.core.algorithms import ( + AreaIterator, + GHSLDownloader, + GHSLProcessor, + check_and_reproject_layer, + combine_rasters_to_vrt, + geometry_to_memory_layer, + subset_vector_layer, +) +from geest.core.constants import GDAL_OUTPUT_DATA_TYPE +from geest.core.grid_column_utils import ( + rasterize_grid_column, + write_raster_values_to_grid, +) +from geest.utilities import log_layer_count, log_message, resources_path + class WorkflowBase(QObject): """ @@ -128,9 +132,19 @@ def __init__( attrs["error_file"] = None attrs["execution_start_time"] = None attrs["execution_end_time"] = None - self.layer_id = self.attributes.get("id", "").lower().replace(" ", "_") + # Add prefix based on item role to avoid namespace collisions + raw_id = self.attributes.get("id", "").lower().replace(" ", "_") + role = self.item.role if hasattr(self.item, "role") else "" + if role == "dimension": + self.layer_id = f"dim_{raw_id}" + elif role == "factor": + self.layer_id = f"fac_{raw_id}" + else: + self.layer_id = raw_id # indicators keep raw ID self.aggregation = False self.analysis_mode = self.item.attribute("analysis_mode", "") + # Grid-first mode: if True, skip raster-to-grid sampling since grid is already populated + self.use_grid_first = False self.updateProgress(0.0) self.output_filename = self.attributes.get("output_filename", "") self.feedback.progressChanged.connect(self.updateProgress) @@ -345,6 +359,7 @@ def _process_features_for_area( current_bbox: QgsGeometry, area_features: QgsVectorLayer, index: int, + area_name: Optional[str] = None, ) -> str: """ Executes the actual workflow logic for a single area @@ -356,6 +371,7 @@ def _process_features_for_area( current_bbox: Bounding box of the above area. area_features: A vector layer of features to analyse that includes only features in the study area. index: Iteration / number of area being processed. + area_name: Name of the area being processed (for grid-first mode). Returns: A raster layer file path if processing completes successfully, False if canceled or failed. @@ -370,6 +386,7 @@ def _process_raster_for_area( current_bbox: QgsGeometry, area_raster: str, index: int, + area_name: Optional[str] = None, ): """ Executes the actual workflow logic for a single area using a raster. @@ -380,6 +397,7 @@ def _process_raster_for_area( current_bbox: Bounding box of the above area. area_raster: A raster layer of features to analyse that includes only bbox pixels in the study area. index: Index of the current area. + area_name: Name of the area being processed (for grid-first mode). Returns: Path to the reclassified raster. @@ -393,6 +411,7 @@ def _process_aggregate_for_area( clip_area: QgsGeometry, current_bbox: QgsGeometry, index: int, + area_name: Optional[str] = None, ): """ Executes the actual workflow logic for a single area using an aggregate. @@ -402,6 +421,7 @@ def _process_aggregate_for_area( clip_area: Polygon to clip the raster to which is aligned to cell edges. current_bbox: Bounding box of the above area. index: Index of the current area. + area_name: Name of the area being processed (for grid-first mode). Returns: Path to the reclassified raster. @@ -522,6 +542,7 @@ def execute(self) -> bool: current_bbox=current_bbox, area_features=area_features, index=index, + area_name=area_name, ) elif not self.aggregation: # assumes we are processing a raster input area_raster = self._subset_raster_layer(bbox=current_bbox, index=index) @@ -531,6 +552,7 @@ def execute(self) -> bool: current_bbox=current_bbox, area_raster=area_raster, index=index, + area_name=area_name, ) elif self.aggregation: # we are processing an aggregate raster_output = self._process_aggregate_for_area( @@ -538,6 +560,7 @@ def execute(self) -> bool: clip_area=clip_area, current_bbox=current_bbox, index=index, + area_name=area_name, ) # clip the area by its matching mask layer in study_area geopackage @@ -550,7 +573,8 @@ def execute(self) -> bool: output_rasters.append(masked_layer) # Write raster values to grid for this area - if masked_layer and os.path.exists(masked_layer): + # Skip this step for grid-first workflows since grid was already populated + if not self.use_grid_first and masked_layer and os.path.exists(masked_layer): self.updateStatus(f"Writing grid values for area {index + 1}...") updated_cells = write_raster_values_to_grid( gpkg_path=self.gpkg_path, @@ -931,3 +955,67 @@ def _combine_rasters_to_vrt(self, rasters: list) -> None: log_message("Debug mode is on. Keeping all files in the workflow directory.") return vrt_filepath + + def _rasterize_grid_column( + self, + column_name: str, + bbox: QgsGeometry, + area_name: str, + index: int, + nodata: float = -9999.0, + ) -> Optional[str]: + """Rasterize a grid column to create a raster output. + + This method creates a raster from the study_area_grid column using + gdal_rasterize. It is used for grid-first workflows where results + are written to grid columns first, then rasterized for VRT generation. + + Args: + column_name: Name of the grid column to rasterize. + bbox: Bounding box geometry for the output raster extent. + area_name: Name of the area being processed. + index: Index of the area being processed (for output filename). + nodata: NoData value for the output raster. + + Returns: + Path to the output raster, or None on error. + """ + output_path = os.path.join( + self.workflow_directory, + f"{column_name}_from_grid_{index}.tif", + ) + + # Get extent from bbox + rect = bbox.boundingBox() + extent = (rect.xMinimum(), rect.yMinimum(), rect.xMaximum(), rect.yMaximum()) + + log_message( + f"Rasterizing grid column {column_name} for area {area_name}", + tag="GeoE3", + level=Qgis.Info, + ) + + success = rasterize_grid_column( + gpkg_path=self.gpkg_path, + column_name=column_name, + output_raster_path=output_path, + cell_size=self.cell_size_m, + extent=extent, + nodata=nodata, + area_name=area_name, + ) + + if success: + log_message( + f"Rasterized grid column {column_name} to {output_path}", + tag="GeoE3", + level=Qgis.Info, + ) + return output_path + else: + log_message( + f"Failed to rasterize grid column {column_name}", + tag="GeoE3", + level=Qgis.Warning, + ) + return None diff --git a/geest/gui/panels/create_project_panel.py b/geest/gui/panels/create_project_panel.py index 3b615631..9ea24f11 100644 --- a/geest/gui/panels/create_project_panel.py +++ b/geest/gui/panels/create_project_panel.py @@ -305,7 +305,7 @@ def create_project(self): feedback.progressChanged.connect(self.subtask_progress_updated) self.disable_widgets() if debug_env: - processor.process_study_area() + processor.run() else: self.queue_manager.add_task(processor) self.queue_manager.start_processing() @@ -503,7 +503,7 @@ def on_report_failed(self): self.child_progress_bar.setMinimum(0) self.child_progress_bar.setMaximum(100) self.child_progress_bar.setValue(0) - self.child_progress_bar.setFormat(f"Report failed — continuing") + self.child_progress_bar.setFormat("Report failed — continuing") self.enable_widgets() self.switch_to_next_tab.emit() diff --git a/geest/gui/panels/tree_panel.py b/geest/gui/panels/tree_panel.py index f95f4ddc..4151fddf 100644 --- a/geest/gui/panels/tree_panel.py +++ b/geest/gui/panels/tree_panel.py @@ -23,6 +23,7 @@ QgsProject, QgsVectorLayer, ) +from qgis.gui import QgsMessageBar from qgis.PyQt.QtCore import QModelIndex, QPoint, QSettings, Qt, pyqtSignal, pyqtSlot from qgis.PyQt.QtGui import QMovie from qgis.PyQt.QtWidgets import ( @@ -45,7 +46,6 @@ QWidget, ) from qgis.utils import iface -from qgis.gui import QgsMessageBar from geest.core import JsonTreeItem, WorkflowQueueManager from geest.core.algorithms import ( @@ -57,8 +57,8 @@ WEEByPopulationScoreProcessingTask, ) from geest.core.reports import StudyAreaReport -from geest.core.tasks import AnalysisReportTask from geest.core.settings import set_setting, setting +from geest.core.tasks import AnalysisReportTask from geest.core.utilities import add_to_map, validate_network_layer from geest.gui.dialogs import ( AnalysisAggregationDialog, @@ -136,19 +136,19 @@ def __init__(self, parent=None, json_file=None): """ QPushButton { background-color: qlineargradient(x1:0, y1:0, x2:0, y2:1, - stop:0 #b8dce3, stop:1 #8ec8d0); - color: #000; - border: 1px solid #6fa8b0; + stop:0 #3E799B, stop:1 #2d5a75); + color: #ffffff; + border: 1px solid #2d5a75; border-radius: 3px; padding: 4px 12px; } QPushButton:hover { background-color: qlineargradient(x1:0, y1:0, x2:0, y2:1, - stop:0 #c8e8ef, stop:1 #9ed8e0); + stop:0 #4a8bb0, stop:1 #3E799B); } QPushButton:pressed { background-color: qlineargradient(x1:0, y1:0, x2:0, y2:1, - stop:0 #8ec8d0, stop:1 #b8dce3); + stop:0 #2d5a75, stop:1 #3E799B); } """ ) @@ -607,6 +607,7 @@ def save_json_to_working_directory(self): tag="GeoE3", level=Qgis.Warning, ) + return try: json_data = self.model.to_json() diff --git a/geest/utilities.py b/geest/utilities.py index 3c59bd7a..3a72c19f 100644 --- a/geest/utilities.py +++ b/geest/utilities.py @@ -69,7 +69,17 @@ def theme_stylesheet() -> str: # try move it to the top and check that all the subsequent rules work still... light_theme_stylesheet = f""" QPushButton {{ - background-color: rgba(62, 121, 155, 25); + background-color: rgba(62, 121, 155, 180); + color: #ffffff; + border: 1px solid #3E799B; + border-radius: 3px; + padding: 4px 8px; + }} + QPushButton:hover {{ + background-color: rgba(62, 121, 155, 220); + }} + QPushButton:pressed {{ + background-color: rgba(45, 90, 117, 255); }} QToolTip {{ color: #000000; diff --git a/scripts/start_qgis.sh b/scripts/start_qgis.sh index 87286ee3..2e224e7f 100755 --- a/scripts/start_qgis.sh +++ b/scripts/start_qgis.sh @@ -4,39 +4,33 @@ echo "--------------------------------" echo "Do you want to enable debug mode?" choice=$(gum choose "🪲 Yes" "🐞 No") case $choice in -"🪲 Yes") developer_mode=1 ;; -"🐞 No") developer_mode=0 ;; +"🪲 Yes") DEVELOPER_MODE=1 ;; +"🐞 No") DEVELOPER_MODE=0 ;; esac echo "Do you want to enable experimental features?" choice=$(gum choose "🪲 Yes" "🐞 No") case $choice in -"🪲 Yes") GEOE3_EXPERIMENTAL=1 GEEST_EXPERIMENTAL=1 ;; -"🐞 No") GEOE3_EXPERIMENTAL=0 GEEST_EXPERIMENTAL=0 ;; +"🪲 Yes") GEOE3_EXPERIMENTAL=1 ;; +"🐞 No") GEOE3_EXPERIMENTAL=0 ;; esac # Running on local used to skip tests that will not work in a local dev env GEOE3_LOG=$HOME/GEOE3.log -GEEST_LOG=$HOME/GEEST2.log GEOE3_TEST_DIR="$(pwd)/test" -GEEST_TEST_DIR="$(pwd)/test" # Set test directory relative to project root -rm -f "$GEOE3_LOG" "$GEEST_LOG" +rm -f "$GEOE3_LOG" #nix-shell -p \ # This is the old way using default nix packages with overrides # 'qgis.override { extraPythonPackages = (ps: [ ps.pyqtwebengine ps.jsonschema ps.debugpy ps.future ps.psutil ]);}' \ -# --command "GEEST_LOG=${GEEST_LOG} GEEST_DEBUG=${developer_mode} RUNNING_ON_LOCAL=1 qgis --profile GEEST2" +# --command "GEEST_LOG=${GEEST_LOG} GEEST_DEBUG=${DEVELOPER_MODE} RUNNING_ON_LOCAL=1 qgis --profile GEEST2" # This is the new way, using Ivan Mincis nix spatial project and a flake # see flake.nix for implementation details # Both GEOE3_* and GEEST_* env vars are set for backward compatibility # QT_QPA_PLATFORM flag forces it to run under x11 protocol GEOE3_LOG=${GEOE3_LOG} \ - GEEST_LOG=${GEEST_LOG} \ - GEOE3_DEBUG=${developer_mode} \ - GEEST_DEBUG=${developer_mode} \ + GEOE3_DEBUG=${DEVELOPER_MODE} \ GEOE3_EXPERIMENTAL=${GEOE3_EXPERIMENTAL} \ - GEEST_EXPERIMENTAL=${GEEST_EXPERIMENTAL} \ GEOE3_TEST_DIR=${GEOE3_TEST_DIR} \ - GEEST_TEST_DIR=${GEEST_TEST_DIR} \ RUNNING_ON_LOCAL=1 \ QT_QPA_PLATFORM=xcb \ -nix run .#default -- --profile GEOE3 + nix run .#default -- --profile GEOE3 diff --git a/scripts/start_qgis_ltr.sh b/scripts/start_qgis_ltr.sh index 5fa4a27b..2251a2e9 100755 --- a/scripts/start_qgis_ltr.sh +++ b/scripts/start_qgis_ltr.sh @@ -1,18 +1,36 @@ #!/usr/bin/env bash echo "🪛 Running QGIS with the GEOE3 profile:" echo "--------------------------------" +echo "Do you want to enable debug mode?" +choice=$(gum choose "🪲 Yes" "🐞 No") +case $choice in +"🪲 Yes") DEVELOPER_MODE=1 ;; +"🐞 No") DEVELOPER_MODE=0 ;; +esac +echo "Do you want to enable experimental features?" +choice=$(gum choose "🪲 Yes" "🐞 No") +case $choice in +"🪲 Yes") GEOE3_EXPERIMENTAL=1 ;; +"🐞 No") GEOE3_EXPERIMENTAL=0 ;; +esac -# Set environment variables (both GEOE3_* and GEEST_* for backward compatibility) +# Running on local used to skip tests that will not work in a local dev env +GEOE3_LOG=$HOME/GEOE3.log GEOE3_TEST_DIR="$(pwd)/test" -GEEST_TEST_DIR="$(pwd)/test" # Set test directory relative to project root +rm -f "$GEOE3_LOG" +#nix-shell -p \ +# This is the old way using default nix packages with overrides +# 'qgis.override { extraPythonPackages = (ps: [ ps.pyqtwebengine ps.jsonschema ps.debugpy ps.future ps.psutil ]);}' \ +# --command "GEEST_LOG=${GEEST_LOG} GEEST_DEBUG=${DEVELOPER_MODE} RUNNING_ON_LOCAL=1 qgis --profile GEEST2" -# This is the flake approach, using Ivan Mincis nix spatial project and a flake +# This is the new way, using Ivan Mincis nix spatial project and a flake # see flake.nix for implementation details +# Both GEOE3_* and GEEST_* env vars are set for backward compatibility # QT_QPA_PLATFORM flag forces it to run under x11 protocol GEOE3_LOG=${GEOE3_LOG} \ - GEEST_LOG=${GEEST_LOG} \ + GEOE3_DEBUG=${DEVELOPER_MODE} \ + GEOE3_EXPERIMENTAL=${GEOE3_EXPERIMENTAL} \ GEOE3_TEST_DIR=${GEOE3_TEST_DIR} \ - GEEST_TEST_DIR=${GEEST_TEST_DIR} \ RUNNING_ON_LOCAL=1 \ QT_QPA_PLATFORM=xcb \ nix run .#qgis-ltr -- --profile GEOE3 From fc29768132d33786a89e9085371cc0dc1d1f6520 Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Mon, 30 Mar 2026 21:55:29 +0100 Subject: [PATCH 04/55] Hide single-child factors in tree view, show indicator in their place When a factor has only one indicator child, the factor is now hidden and the indicator is displayed directly under the dimension. This reduces visual clutter in the tree. Changes: - Add get_effective_visible_children() to JsonTreeItem for child promotion - Add get_visual_parent() to JsonTreeItem for hidden parent handling - Add visible_row() to JsonTreeItem for visible position calculation - Update JsonTreeModel.rowCount() to use effective visible children - Update JsonTreeModel.index() to use effective visible children - Update JsonTreeModel.parent() to handle hidden parents - Update _findIndexByGuid() to search both visible and hidden items - Add toggle_single_child_factors_visibility() method to model - Add are_single_child_factors_hidden() method to model - Enable feature by default when loading model data --- geest/core/json_tree_item.py | 61 +++++++++++++-- geest/gui/panels/tree_panel.py | 6 ++ geest/gui/views/treeview.py | 134 ++++++++++++++++++++++++++------- 3 files changed, 167 insertions(+), 34 deletions(-) diff --git a/geest/core/json_tree_item.py b/geest/core/json_tree_item.py index b8660c1e..e6053c77 100644 --- a/geest/core/json_tree_item.py +++ b/geest/core/json_tree_item.py @@ -10,7 +10,7 @@ from typing import Optional from qgis.core import Qgis -from qgis.PyQt.QtCore import Qt, QReadWriteLock, QReadLocker, QWriteLocker +from qgis.PyQt.QtCore import QReadWriteLock, Qt from qgis.PyQt.QtGui import QColor, QFont, QIcon from geest.core.settings import setting @@ -142,10 +142,61 @@ def is_enabled(self) -> bool: return self._enabled def is_only_child(self) -> bool: - """Returns the only child status of this item.""" - siblings_count = len(self.parentItem.childItems) - if siblings_count == 1: - return True + """Returns True if this item is the only child of its parent.""" + if not self.parentItem: + return False + return len(self.parentItem.childItems) == 1 + + def visible_row(self) -> int: + """Returns the visible row position among siblings (excluding hidden siblings). + + This is used when hidden items need to be skipped in the visual tree. + """ + if not self.parentItem: + return 0 + visible_siblings = [c for c in self.parentItem.childItems if c.is_visible()] + if self in visible_siblings: + return visible_siblings.index(self) + return 0 + + def get_effective_visible_children(self) -> list: + """Returns effective visible children for tree display. + + For normal visible children, returns them directly. + For hidden children that have exactly one child, returns their child instead + (promoting the grandchild to appear at this level). + This enables hiding single-child factors while showing their indicator. + + Returns: + list: List of JsonTreeItem objects to display as children. + """ + effective_children = [] + for child in self.childItems: + if child.is_visible(): + effective_children.append(child) + elif len(child.childItems) == 1: + # Hidden single-child item: promote its child to this level + grandchild = child.childItems[0] + if grandchild.is_visible(): + effective_children.append(grandchild) + return effective_children + + def get_visual_parent(self): + """Returns the visual parent for tree display. + + If the actual parent is hidden, returns the grandparent instead. + This handles the case where factors are hidden but indicators need + to appear under the dimension. + + Returns: + JsonTreeItem: The visual parent item. + """ + if not self.parentItem: + return None + if self.parentItem.is_visible(): + return self.parentItem + # Parent is hidden, return grandparent + return self.parentItem.parentItem def internalPointer(self): """Returns a reference to itself, or any unique identifier for the item.""" diff --git a/geest/gui/panels/tree_panel.py b/geest/gui/panels/tree_panel.py index 4151fddf..c334ad58 100644 --- a/geest/gui/panels/tree_panel.py +++ b/geest/gui/panels/tree_panel.py @@ -470,6 +470,8 @@ def working_directory_changed(self, new_directory): self.load_json() # sets the class member json_data self.model.loadJsonData(self.json_data) self.apply_women_considerations_logic() # Hide factors based on women considerations + # Hide factors that have only a single indicator (indicator shown in their place) + self.model.toggle_single_child_factors_visibility(hide_single_child=True) self.treeView.expandAll() log_message(f"Loaded model.json from {model_path}") @@ -521,6 +523,8 @@ def working_directory_changed(self, new_directory): self.load_json() self.model.loadJsonData(self.json_data) self.apply_women_considerations_logic() # Hide factors based on women considerations + # Hide factors that have only a single indicator (indicator shown in their place) + self.model.toggle_single_child_factors_visibility(hide_single_child=True) self.treeView.expandAll() # Collapse any factors that have only a single indicator self.treeView.collapse_single_nodes() @@ -765,6 +769,8 @@ def load_json_from_file(self): self.load_json() self.model.loadJsonData(self.json_data) self.apply_women_considerations_logic() # Hide factors based on women considerations + # Hide factors that have only a single indicator (indicator shown in their place) + self.model.toggle_single_child_factors_visibility(hide_single_child=True) self.treeView.expandAll() def export_json_to_file(self): diff --git a/geest/gui/views/treeview.py b/geest/gui/views/treeview.py index dfb2eb8c..fff890ab 100644 --- a/geest/gui/views/treeview.py +++ b/geest/gui/views/treeview.py @@ -321,6 +321,53 @@ def toggle_indicator_visibility(self, visible: bool, parent_item=None): # Notify view about layout changes self.layoutChanged.emit() + def toggle_single_child_factors_visibility(self, hide_single_child: bool): + """ + Toggles the visibility of factors that have only one child indicator. + + When hide_single_child is True, factors with exactly one child indicator + are hidden, and their indicator is promoted to appear directly under + the dimension. + + Args: + hide_single_child (bool): If True, hide factors with only one child. + If False, show all factors. + """ + analysis_item = self.get_analysis_item() + if not analysis_item: + return + + for dimension in analysis_item.childItems: + for factor in dimension.childItems: + # Check if factor has exactly one child indicator + if len(factor.childItems) == 1: + # Hide the factor if hide_single_child is True + factor.set_visibility(not hide_single_child) + else: + # Always show factors with multiple children + factor.set_visibility(True) + + # Notify view about layout changes + self.layoutChanged.emit() + + def are_single_child_factors_hidden(self) -> bool: + """ + Check if single-child factors are currently hidden. + + Returns: + bool: True if any single-child factor is hidden, False otherwise. + """ + analysis_item = self.get_analysis_item() + if not analysis_item: + return False + + for dimension in analysis_item.childItems: + for factor in dimension.childItems: + if len(factor.childItems) == 1: + # Return visibility state of first single-child factor found + return not factor.is_visible() + return False + def data(self, index, role): """ Provides data for the given index and role, including displaying custom attributes such as the font color, @@ -639,21 +686,25 @@ def remove_item(self, item): def rowCount(self, parent=QModelIndex()): """ - Returns the number of child items for the given parent, excluding hidden items if visibility is off. + Returns the number of effective visible children for the given parent. + + This uses get_effective_visible_children() which handles: + - Normal visible children + - Promotion of grandchildren when a child is hidden but has exactly one child Args: parent (QModelIndex): The parent index. Returns: - int: The number of visible child items under the parent. + int: The number of effective visible child items under the parent. """ if not parent.isValid(): parentItem = self.rootItem else: parentItem = parent.internalPointer() - # Count only visible items - return len([child for child in parentItem.childItems if child.is_visible()]) + # Use effective visible children (handles hidden single-child factors) + return len(parentItem.get_effective_visible_children()) def columnCount(self, parent=QModelIndex()): """ @@ -691,36 +742,51 @@ def guidIndex(self, guid): """ return self._findIndexByGuid(self.rootItem, guid) - def _findIndexByGuid(self, parent_item, target_guid, parent_index=QModelIndex()): + def _findIndexByGuid(self, parent_item, target_guid): """ Recursively searches for the target guid within the children of the given parent item. + Uses get_effective_visible_children() to calculate the correct visual row + position, accounting for hidden single-child factors. + Args: parent_item (JsonTreeItem): The parent item to start searching from. target_guid (str): The GUID of the item to search for. - parent_index (QModelIndex): The QModelIndex of the parent item. Returns: QModelIndex: The QModelIndex of the target item, or an invalid QModelIndex if not found. """ - for row in range(parent_item.childCount()): - child_item = parent_item.child(row) + # Get effective visible children for correct row calculation + effective_children = parent_item.get_effective_visible_children() - # If the item's UUID matches, return its QModelIndex + for row, child_item in enumerate(effective_children): + # If the item's GUID matches, return its QModelIndex if child_item.guid == target_guid: return self.createIndex(row, 0, child_item) # Recursively search children - child_index = self._findIndexByGuid(child_item, target_guid, self.createIndex(row, 0, parent_item)) + child_index = self._findIndexByGuid(child_item, target_guid) if child_index.isValid(): return child_index + # Also search in hidden children (in case the item is under a hidden factor) + for child_item in parent_item.childItems: + if not child_item.is_visible(): + # Search within hidden item's children + child_index = self._findIndexByGuid(child_item, target_guid) + if child_index.isValid(): + return child_index + return QModelIndex() # Return invalid QModelIndex if not found def index(self, row, column, parent=QModelIndex()): """ Creates a QModelIndex for the specified row and column under the given parent. + Uses get_effective_visible_children() to handle: + - Normal visible children + - Promotion of grandchildren when a child is hidden but has exactly one child + Args: row (int): The row of the child item. column (int): The column of the child item. @@ -737,8 +803,10 @@ def index(self, row, column, parent=QModelIndex()): else: parentItem = parent.internalPointer() - childItem = parentItem.child(row) - if childItem: + # Use effective visible children (handles hidden single-child factors) + effective_children = parentItem.get_effective_visible_children() + if row < len(effective_children): + childItem = effective_children[row] return self.createIndex(row, column, childItem) return QModelIndex() @@ -746,6 +814,10 @@ def parent(self, index): """ Returns the parent index of the specified index. + Uses get_visual_parent() to handle the case where the actual parent + is hidden (e.g., a factor with only one child). In this case, the + visual parent is the grandparent (dimension). + Args: index (QModelIndex): The child index. @@ -756,12 +828,25 @@ def parent(self, index): return QModelIndex() childItem = index.internalPointer() - parentItem = childItem.parent() + # Use visual parent which skips hidden parents + parentItem = childItem.get_visual_parent() - if parentItem == self.rootItem: + if parentItem is None or parentItem == self.rootItem: return QModelIndex() - return self.createIndex(parentItem.row(), 0, parentItem) + # Calculate the visible row position of the parent + # We need to find the parent's position in its own parent's effective children + grandparent = parentItem.get_visual_parent() + if grandparent is None: + grandparent = self.rootItem + + effective_siblings = grandparent.get_effective_visible_children() + if parentItem in effective_siblings: + row = effective_siblings.index(parentItem) + else: + row = 0 + + return self.createIndex(row, 0, parentItem) def headerData(self, section, orientation, role=Qt.ItemDataRole.DisplayRole): """ @@ -855,20 +940,11 @@ def collapse_node_in_view(self, item): self.treeView.setExpanded(index, False) def toggle_only_child_indicator_nodes(self): - """Toggles visibility of indicator nodes if it has no siblings.""" - indicators_visible = self._indicators_only_child() - self.model().toggle_indicator_visibility(not indicators_visible) + """Toggles visibility of factors that have only one child indicator. - def _indicators_only_child(self): - """Check if indicators are currently the only child under their parent. - - Returns: - bool: True if indicators are only children, False otherwise. + When enabled, single-child factors are hidden and their indicator + is shown directly under the dimension. """ model = self.model() - analysis_item = model.get_analysis_item() - for dimension in analysis_item.childItems: - for factor in dimension.childItems: - for indicator in factor.childItems: - return indicator.is_only_child() - return True + currently_hidden = model.are_single_child_factors_hidden() + model.toggle_single_child_factors_visibility(not currently_hidden) From 53ed4a003a43c3dc2ec8b98e19139e512ee96e5e Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Mon, 30 Mar 2026 23:08:53 +0100 Subject: [PATCH 05/55] Add grid vector layer visualization option alongside raster - Add indicator-vector-template.qml with [attribute] placeholder - Add add_grid_layer_to_map() function to load study_area_grid with templated styling for any column - Add "Add to map (Grid)" action to context menus for indicators, factors, and dimensions - Users can now compare raster and vector grid visualizations --- geest/core/utilities.py | 164 +++++++++++ geest/gui/panels/tree_panel.py | 26 +- .../qml/indicator-vector-template.qml | 269 ++++++++++++++++++ 3 files changed, 458 insertions(+), 1 deletion(-) create mode 100644 geest/resources/qml/indicator-vector-template.qml diff --git a/geest/core/utilities.py b/geest/core/utilities.py index abc88c08..df55c33d 100644 --- a/geest/core/utilities.py +++ b/geest/core/utilities.py @@ -177,6 +177,170 @@ def add_to_map( ) +def add_grid_layer_to_map( + item: "JsonTreeItem", + column_name: str, + working_directory: str, + layer_name: str = None, + group: str = "GeoE3", +): + """Add a styled grid layer to the map for a specific column. + + This function creates a layer from study_area_grid and applies the + indicator-vector-template.qml style with the column name substituted. + + Args: + item: The tree item (indicator/factor/dimension) to display. + column_name: The column in study_area_grid to symbolize. + working_directory: Path to the working directory containing study_area.gpkg. + layer_name: Optional display name for the layer. Defaults to item name. + group: The top-level group name. Defaults to "GeoE3". + """ + import tempfile + + from geest.utilities import resources_path + + # Construct the GeoPackage path + gpkg_path = os.path.join(working_directory, "study_area", "study_area.gpkg") + if not os.path.exists(gpkg_path): + log_message( + f"GeoPackage not found: {gpkg_path}", + tag="GeoE3", + level=Qgis.Warning, + ) + return + + # Create layer URI for study_area_grid + layer_uri = f"{gpkg_path}|layername=study_area_grid" + + if not layer_name: + layer_name = item.data(0) + + log_message(f"Adding grid layer for column: {column_name}") + + # Load the layer + layer = QgsVectorLayer(layer_uri, layer_name, "ogr") + if not layer.isValid(): + log_message( + f"Layer {layer_name} is invalid and cannot be added.", + tag="GeoE3", + level=Qgis.Warning, + ) + return + + # Load the QML template and substitute the column name + template_path = resources_path("resources", "qml", "indicator-vector-template.qml") + if not os.path.exists(template_path): + log_message( + f"QML template not found: {template_path}", + tag="GeoE3", + level=Qgis.Warning, + ) + return + + with open(template_path, "r") as f: + qml_content = f.read() + + # Replace the [attribute] placeholder with the actual column name + qml_content = qml_content.replace("[attribute]", column_name) + + # Write to a temporary file and apply the style + with tempfile.NamedTemporaryFile(mode="w", suffix=".qml", delete=False) as tmp: + tmp.write(qml_content) + tmp_path = tmp.name + + try: + result = layer.loadNamedStyle(tmp_path) + if not result[0]: + log_message( + f"Failed to apply style: {result[1]}", + tag="GeoE3", + level=Qgis.Warning, + ) + finally: + # Clean up temp file + try: + os.unlink(tmp_path) + except OSError: + pass + + project = QgsProject.instance() + + # Check if 'GeoE3' group exists, otherwise create it + root = project.layerTreeRoot() + geoe3_group = root.findGroup(group) + if geoe3_group is None: + geoe3_group = root.insertGroup(0, group) + geoe3_group.setIsMutuallyExclusive(True) + + # Traverse the tree view structure to determine the appropriate subgroup + path_list = item.getPaths() + parent_group = geoe3_group + # Truncate the last item from the path list + path_list = path_list[:-1] + + for path in path_list: + sub_group = parent_group.findGroup(path) + if sub_group is None: + sub_group = parent_group.addGroup(path) + sub_group.setIsMutuallyExclusive(True) + parent_group = sub_group + + # Check if a layer with the same name exists in the group + existing_layer = None + layer_tree_layer = None + for child in parent_group.children(): + if isinstance(child, QgsLayerTreeGroup): + continue + if child.layer().name() == layer_name: + existing_layer = child.layer() + layer_tree_layer = child + break + + # If the layer exists, update its style instead of re-adding + if existing_layer is not None: + log_message(f"Refreshing existing layer: {existing_layer.name()}") + # Re-apply style to existing layer + with tempfile.NamedTemporaryFile(mode="w", suffix=".qml", delete=False) as tmp: + tmp.write(qml_content) + tmp_path = tmp.name + try: + existing_layer.loadNamedStyle(tmp_path) + finally: + try: + os.unlink(tmp_path) + except OSError: + pass + layer_tree_layer.setItemVisibilityChecked(True) + existing_layer.triggerRepaint() + else: + # Add the new layer + QgsProject.instance().addMapLayer(layer, False) + layer_tree_layer = parent_group.addLayer(layer) + layer_tree_layer.setExpanded(False) + log_message(f"Added layer: {layer.name()} to group: {parent_group.name()}") + + # Ensure the layer and its parent groups are visible + current_group = parent_group + while current_group is not None: + current_group.setExpanded(True) + current_group.setItemVisibilityChecked(True) + current_group = current_group.parent() + + layer_tree_layer.setItemVisibilityChecked(True) + + # Refresh the canvas + repaint_layer = existing_layer if existing_layer is not None else layer + repaint_layer.triggerRepaint() + iface.mapCanvas().refresh() + + log_message( + f"Grid layer {layer_name} for column {column_name} added to map.", + tag="GeoE3", + level=Qgis.Info, + ) + + def validate_network_layer(layer_path: str, expected_crs: QgsCoordinateReferenceSystem) -> tuple: """Validate network layer for road network analysis. diff --git a/geest/gui/panels/tree_panel.py b/geest/gui/panels/tree_panel.py index c334ad58..ebdcad4f 100644 --- a/geest/gui/panels/tree_panel.py +++ b/geest/gui/panels/tree_panel.py @@ -59,7 +59,7 @@ from geest.core.reports import StudyAreaReport from geest.core.settings import set_setting, setting from geest.core.tasks import AnalysisReportTask -from geest.core.utilities import add_to_map, validate_network_layer +from geest.core.utilities import add_grid_layer_to_map, add_to_map, validate_network_layer from geest.gui.dialogs import ( AnalysisAggregationDialog, DimensionAggregationDialog, @@ -930,6 +930,13 @@ def update_action_text(): add_factor_action = QAction("Add Factor", self) remove_dimension_action = QAction("Remove Dimension", self) + # Add grid layer action + add_grid_to_map_action = QAction("Add to map (Grid)", self) + column_name = f"dim_{item.attribute('id').lower().replace(' ', '_').replace('-', '_')}" + add_grid_to_map_action.triggered.connect( + lambda col=column_name, i=item: add_grid_layer_to_map(i, col, self.working_directory) + ) + # Connect actions add_factor_action.triggered.connect(lambda: self.model.add_factor(item)) remove_dimension_action.triggered.connect(lambda: self.model.remove_item(item)) @@ -939,6 +946,7 @@ def update_action_text(): menu.addAction(show_json_attributes_action) menu.addAction(clear_item_action) menu.addAction(add_to_map_action) + menu.addAction(add_grid_to_map_action) menu.addAction(run_item_action) menu.addAction(open_working_directory_action) menu.addAction(disable_action) @@ -949,6 +957,13 @@ def update_action_text(): add_indicator_action = QAction("Add Indicator", self) remove_factor_action = QAction("Remove Factor", self) + # Add grid layer action + add_grid_to_map_action = QAction("Add to map (Grid)", self) + column_name = f"fac_{item.attribute('id').lower().replace(' ', '_').replace('-', '_')}" + add_grid_to_map_action.triggered.connect( + lambda col=column_name, i=item: add_grid_layer_to_map(i, col, self.working_directory) + ) + # Connect actions edit_aggregation_action.triggered.connect(lambda: self.edit_factor_aggregation(item)) # Connect to method add_indicator_action.triggered.connect(lambda: self.model.add_indicator(item)) @@ -960,6 +975,7 @@ def update_action_text(): menu.addAction(show_json_attributes_action) menu.addAction(clear_item_action) menu.addAction(add_to_map_action) + menu.addAction(add_grid_to_map_action) menu.addAction(run_item_action) menu.addAction(open_working_directory_action) menu.addAction(disable_action) @@ -970,6 +986,13 @@ def update_action_text(): # of its parent factor... show_properties_action = QAction("🔘 Edit Weights", self) + # Add grid layer action (indicators use raw ID without prefix) + add_grid_to_map_action = QAction("Add to map (Grid)", self) + column_name = item.attribute("id").lower().replace(" ", "_").replace("-", "_") + add_grid_to_map_action.triggered.connect( + lambda col=column_name, i=item: add_grid_layer_to_map(i, col, self.working_directory) + ) + # Connect actions show_properties_action.triggered.connect(lambda: self.edit_factor_aggregation(item.parent())) # Add actions to menu @@ -978,6 +1001,7 @@ def update_action_text(): menu.addAction(show_json_attributes_action) menu.addAction(clear_item_action) menu.addAction(add_to_map_action) + menu.addAction(add_grid_to_map_action) menu.addAction(run_item_action) menu.addAction(open_working_directory_action) menu.addAction(disable_action) diff --git a/geest/resources/qml/indicator-vector-template.qml b/geest/resources/qml/indicator-vector-template.qml new file mode 100644 index 00000000..23d806ff --- /dev/null +++ b/geest/resources/qml/indicator-vector-template.qml @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 2 + From a0ea50b57637a33c23182aff9ca7a9d4d4edb8a6 Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Mon, 30 Mar 2026 23:14:16 +0100 Subject: [PATCH 06/55] Fix TypeError in Add to map (Grid) menu actions The QAction.triggered signal passes a boolean 'checked' parameter which was being captured by the lambda's first keyword argument, overwriting the column_name. Added underscore parameter to properly capture and discard the signal argument. --- geest/gui/panels/tree_panel.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/geest/gui/panels/tree_panel.py b/geest/gui/panels/tree_panel.py index ebdcad4f..6020e42a 100644 --- a/geest/gui/panels/tree_panel.py +++ b/geest/gui/panels/tree_panel.py @@ -934,7 +934,7 @@ def update_action_text(): add_grid_to_map_action = QAction("Add to map (Grid)", self) column_name = f"dim_{item.attribute('id').lower().replace(' ', '_').replace('-', '_')}" add_grid_to_map_action.triggered.connect( - lambda col=column_name, i=item: add_grid_layer_to_map(i, col, self.working_directory) + lambda _, col=column_name, i=item: add_grid_layer_to_map(i, col, self.working_directory) ) # Connect actions @@ -961,7 +961,7 @@ def update_action_text(): add_grid_to_map_action = QAction("Add to map (Grid)", self) column_name = f"fac_{item.attribute('id').lower().replace(' ', '_').replace('-', '_')}" add_grid_to_map_action.triggered.connect( - lambda col=column_name, i=item: add_grid_layer_to_map(i, col, self.working_directory) + lambda _, col=column_name, i=item: add_grid_layer_to_map(i, col, self.working_directory) ) # Connect actions @@ -990,7 +990,7 @@ def update_action_text(): add_grid_to_map_action = QAction("Add to map (Grid)", self) column_name = item.attribute("id").lower().replace(" ", "_").replace("-", "_") add_grid_to_map_action.triggered.connect( - lambda col=column_name, i=item: add_grid_layer_to_map(i, col, self.working_directory) + lambda _, col=column_name, i=item: add_grid_layer_to_map(i, col, self.working_directory) ) # Connect actions From 3b77198dd175ba4634229e55e9cbc85a88851ba0 Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Mon, 30 Mar 2026 23:20:25 +0100 Subject: [PATCH 07/55] Add diagnostic logging to add_grid_layer_to_map - Log function entry with column name and working directory - Check for None working directory early with warning - Log layer URI and layer name - Verify column exists in grid before applying style - Log available columns for debugging - Append (Grid) suffix to layer name to distinguish from raster --- geest/core/utilities.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/geest/core/utilities.py b/geest/core/utilities.py index df55c33d..d1c15ada 100644 --- a/geest/core/utilities.py +++ b/geest/core/utilities.py @@ -200,6 +200,17 @@ def add_grid_layer_to_map( from geest.utilities import resources_path + log_message(f"add_grid_layer_to_map called with column: {column_name}") + log_message(f"Working directory: {working_directory}") + + if not working_directory: + log_message( + "Working directory is not set. Cannot add grid layer.", + tag="GeoE3", + level=Qgis.Warning, + ) + return + # Construct the GeoPackage path gpkg_path = os.path.join(working_directory, "study_area", "study_area.gpkg") if not os.path.exists(gpkg_path): @@ -214,9 +225,11 @@ def add_grid_layer_to_map( layer_uri = f"{gpkg_path}|layername=study_area_grid" if not layer_name: - layer_name = item.data(0) + layer_name = f"{item.data(0)} (Grid)" log_message(f"Adding grid layer for column: {column_name}") + log_message(f"Layer URI: {layer_uri}") + log_message(f"Layer name: {layer_name}") # Load the layer layer = QgsVectorLayer(layer_uri, layer_name, "ogr") @@ -228,8 +241,20 @@ def add_grid_layer_to_map( ) return + # Verify the column exists in the layer + field_names = [field.name() for field in layer.fields()] + log_message(f"Available columns: {field_names[:10]}...") # Log first 10 + if column_name not in field_names: + log_message( + f"Column '{column_name}' not found in study_area_grid. Available columns: {field_names}", + tag="GeoE3", + level=Qgis.Warning, + ) + return + # Load the QML template and substitute the column name template_path = resources_path("resources", "qml", "indicator-vector-template.qml") + log_message(f"Template path: {template_path}") if not os.path.exists(template_path): log_message( f"QML template not found: {template_path}", @@ -243,6 +268,7 @@ def add_grid_layer_to_map( # Replace the [attribute] placeholder with the actual column name qml_content = qml_content.replace("[attribute]", column_name) + log_message(f"Substituted column '{column_name}' in QML template") # Write to a temporary file and apply the style with tempfile.NamedTemporaryFile(mode="w", suffix=".qml", delete=False) as tmp: From 5a419cef00d3488b13bb4a89847f7f8f9a26138d Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Tue, 31 Mar 2026 00:11:24 +0100 Subject: [PATCH 08/55] Fix GeoE3 score column name mismatch The AnalysisAggregationWorkflow was using layer_id="geoe3" but the grid column is actually named "wee_score" as defined in grid_column_utils.get_aggregate_column_names(). This caused the analysis aggregation to fail when trying to write to a non-existent column. --- geest/core/workflows/analysis_aggregation_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geest/core/workflows/analysis_aggregation_workflow.py b/geest/core/workflows/analysis_aggregation_workflow.py index d7d1a2d1..9ee3133f 100644 --- a/geest/core/workflows/analysis_aggregation_workflow.py +++ b/geest/core/workflows/analysis_aggregation_workflow.py @@ -47,7 +47,7 @@ def __init__( self.id = ( self.item.attribute("analysis_name").lower().replace(" ", "_").replace("'", "") ) # should not be needed any more - self.layer_id = "geoe3" + self.layer_id = "wee_score" # Must match column name in grid_column_utils.get_aggregate_column_names() self.weight_key = "analysis_weighting" self.workflow_name = "analysis_aggregation" # Override the default working directory defined in the base class From 04dd4d5070480e8369d66c5ae2e3b3539b64e9ba Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Tue, 31 Mar 2026 09:24:15 +0100 Subject: [PATCH 09/55] Fix masked score file name mismatch The OpportunitiesByWeeScoreProcessingTask and WeeByPopulationScoreProcessor were looking for geoe3_masked_{index}.tif but the analysis workflow now creates wee_score_masked_{index}.tif (using layer_id as filename prefix). Updated both processors to use the correct filename pattern. --- geest/core/algorithms/opportunities_by_wee_score_processor.py | 2 +- geest/core/algorithms/wee_by_population_score_processor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/geest/core/algorithms/opportunities_by_wee_score_processor.py b/geest/core/algorithms/opportunities_by_wee_score_processor.py index de2fa82a..5e0f1675 100644 --- a/geest/core/algorithms/opportunities_by_wee_score_processor.py +++ b/geest/core/algorithms/opportunities_by_wee_score_processor.py @@ -161,7 +161,7 @@ def calculate_score(self) -> None: return mask_path = os.path.join(self.opportunity_masks_folder, f"opportunites_mask_{index}.tif") - geoe3_score_path = os.path.join(self.geoe3_folder, f"geoe3_masked_{index}.tif") + geoe3_score_path = os.path.join(self.geoe3_folder, f"wee_score_masked_{index}.tif") mask_layer = QgsRasterLayer(mask_path, "GeoE3") geoe3_score_layer = QgsRasterLayer(geoe3_score_path, "POP") self.validate_rasters(mask_layer, geoe3_score_layer, dimension_check=False) diff --git a/geest/core/algorithms/wee_by_population_score_processor.py b/geest/core/algorithms/wee_by_population_score_processor.py index 359df94b..9ac8e24a 100644 --- a/geest/core/algorithms/wee_by_population_score_processor.py +++ b/geest/core/algorithms/wee_by_population_score_processor.py @@ -194,7 +194,7 @@ def calculate_score(self) -> None: if self.isCanceled(): return - geoe3_path = os.path.join(self.geoe3_folder, f"geoe3_masked_{index}.tif") + geoe3_path = os.path.join(self.geoe3_folder, f"wee_score_masked_{index}.tif") population_path = os.path.join(self.population_folder, f"reclassified_{index}.tif") geoe3_layer = QgsRasterLayer(geoe3_path, "GeoE3") pop_layer = QgsRasterLayer(population_path, "POP") From afe4a3d53285b56692152b07ff8641d85ed27ca3 Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Tue, 31 Mar 2026 09:32:22 +0100 Subject: [PATCH 10/55] Use geoe3 column name instead of wee_score Changed the analysis aggregation to use layer_id="geoe3" and updated grid_column_utils to use "geoe3" and "geoe3_by_population" as the aggregate column names. This maintains consistency with the existing file naming convention (geoe3_masked_{index}.tif) expected by the masked score processors. --- geest/core/algorithms/opportunities_by_wee_score_processor.py | 2 +- geest/core/algorithms/wee_by_population_score_processor.py | 2 +- geest/core/grid_column_utils.py | 4 ++-- geest/core/workflows/analysis_aggregation_workflow.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/geest/core/algorithms/opportunities_by_wee_score_processor.py b/geest/core/algorithms/opportunities_by_wee_score_processor.py index 5e0f1675..de2fa82a 100644 --- a/geest/core/algorithms/opportunities_by_wee_score_processor.py +++ b/geest/core/algorithms/opportunities_by_wee_score_processor.py @@ -161,7 +161,7 @@ def calculate_score(self) -> None: return mask_path = os.path.join(self.opportunity_masks_folder, f"opportunites_mask_{index}.tif") - geoe3_score_path = os.path.join(self.geoe3_folder, f"wee_score_masked_{index}.tif") + geoe3_score_path = os.path.join(self.geoe3_folder, f"geoe3_masked_{index}.tif") mask_layer = QgsRasterLayer(mask_path, "GeoE3") geoe3_score_layer = QgsRasterLayer(geoe3_score_path, "POP") self.validate_rasters(mask_layer, geoe3_score_layer, dimension_check=False) diff --git a/geest/core/algorithms/wee_by_population_score_processor.py b/geest/core/algorithms/wee_by_population_score_processor.py index 9ac8e24a..359df94b 100644 --- a/geest/core/algorithms/wee_by_population_score_processor.py +++ b/geest/core/algorithms/wee_by_population_score_processor.py @@ -194,7 +194,7 @@ def calculate_score(self) -> None: if self.isCanceled(): return - geoe3_path = os.path.join(self.geoe3_folder, f"wee_score_masked_{index}.tif") + geoe3_path = os.path.join(self.geoe3_folder, f"geoe3_masked_{index}.tif") population_path = os.path.join(self.population_folder, f"reclassified_{index}.tif") geoe3_layer = QgsRasterLayer(geoe3_path, "GeoE3") pop_layer = QgsRasterLayer(population_path, "POP") diff --git a/geest/core/grid_column_utils.py b/geest/core/grid_column_utils.py index d9a1c94d..3130ee20 100644 --- a/geest/core/grid_column_utils.py +++ b/geest/core/grid_column_utils.py @@ -76,8 +76,8 @@ def get_aggregate_column_names() -> List[str]: List of column names for aggregate scores (WEE score, WEE by population, etc.) """ return [ - "wee_score", - "wee_by_population", + "geoe3", + "geoe3_by_population", "contextual_score", "accessibility_score", "place_characterization_score", diff --git a/geest/core/workflows/analysis_aggregation_workflow.py b/geest/core/workflows/analysis_aggregation_workflow.py index 9ee3133f..9d9bf6f8 100644 --- a/geest/core/workflows/analysis_aggregation_workflow.py +++ b/geest/core/workflows/analysis_aggregation_workflow.py @@ -47,7 +47,7 @@ def __init__( self.id = ( self.item.attribute("analysis_name").lower().replace(" ", "_").replace("'", "") ) # should not be needed any more - self.layer_id = "wee_score" # Must match column name in grid_column_utils.get_aggregate_column_names() + self.layer_id = "geoe3" # Must match column name in grid_column_utils.get_aggregate_column_names() self.weight_key = "analysis_weighting" self.workflow_name = "analysis_aggregation" # Override the default working directory defined in the base class From 43bcf170e24915152815632896b1dde4d2d6b7bc Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Tue, 31 Mar 2026 09:51:17 +0100 Subject: [PATCH 11/55] Change logging level from DEBUG to INFO Reduces log spam from PyQt UI loader debug messages like 'push QCheckBox', 'pop widget', 'setting property', etc. --- geest/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geest/__init__.py b/geest/__init__.py index 76686a7e..7f3e9b56 100644 --- a/geest/__init__.py +++ b/geest/__init__.py @@ -81,7 +81,7 @@ filename=log_file_path, filemode="a", # Append mode format="%(asctime)s [%(levelname)s] %(message)s", - level=logging.DEBUG, + level=logging.INFO, # INFO level to avoid PyQt UI loader debug spam ) date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") log_message("»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»", force=True) From 3ebd6db6770ff13c93a9289e73b1643840fad858 Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Tue, 31 Mar 2026 09:53:13 +0100 Subject: [PATCH 12/55] Suppress PyQt uic debug messages while keeping DEBUG level The PyQt uic module emits verbose debug messages when loading .ui files: - "push QCheckBox", "pop widget", "setting property", etc. These are not from GeoE3 code but from PyQt's UI compiler internals. Rather than changing the overall log level to INFO, specifically suppress the PyQt5.uic and PyQt6.uic loggers to WARNING level. This keeps DEBUG level available for GeoE3 code debugging. --- geest/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/geest/__init__.py b/geest/__init__.py index 7f3e9b56..e0468e93 100644 --- a/geest/__init__.py +++ b/geest/__init__.py @@ -81,8 +81,11 @@ filename=log_file_path, filemode="a", # Append mode format="%(asctime)s [%(levelname)s] %(message)s", - level=logging.INFO, # INFO level to avoid PyQt UI loader debug spam + level=logging.DEBUG, ) +# Suppress PyQt uic module debug spam (push/pop widget, setting property, etc.) +logging.getLogger("PyQt5.uic").setLevel(logging.WARNING) +logging.getLogger("PyQt6.uic").setLevel(logging.WARNING) date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") log_message("»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»»", force=True) log_message(f"GeoE3 started at {date}", force=True) From aa8fbe5eb1d04dad839d98ed8d6a0c92c0060453 Mon Sep 17 00:00:00 2001 From: Tim Sutton Date: Tue, 31 Mar 2026 12:45:11 +0100 Subject: [PATCH 13/55] Add filter to exclude NULL values in grid layer visualization When adding a grid layer to the map, apply a subset filter to exclude features where the visualized column has NULL values. This prevents empty/unclassified features from being rendered. Filter is applied both for new layers and when refreshing existing ones. --- geest/core/utilities.py | 9 ++- .../qml/indicator-vector-template.qml | 74 +++++++++---------- 2 files changed, 45 insertions(+), 38 deletions(-) diff --git a/geest/core/utilities.py b/geest/core/utilities.py index d1c15ada..60f1ca38 100644 --- a/geest/core/utilities.py +++ b/geest/core/utilities.py @@ -252,6 +252,11 @@ def add_grid_layer_to_map( ) return + # Apply filter to exclude NULL values for the column being visualized + filter_expression = f'"{column_name}" IS NOT NULL' + layer.setSubsetString(filter_expression) + log_message(f"Applied filter: {filter_expression}") + # Load the QML template and substitute the column name template_path = resources_path("resources", "qml", "indicator-vector-template.qml") log_message(f"Template path: {template_path}") @@ -323,9 +328,11 @@ def add_grid_layer_to_map( layer_tree_layer = child break - # If the layer exists, update its style instead of re-adding + # If the layer exists, update its style and filter instead of re-adding if existing_layer is not None: log_message(f"Refreshing existing layer: {existing_layer.name()}") + # Update filter for the column being visualized + existing_layer.setSubsetString(filter_expression) # Re-apply style to existing layer with tempfile.NamedTemporaryFile(mode="w", suffix=".qml", delete=False) as tmp: tmp.write(qml_content) diff --git a/geest/resources/qml/indicator-vector-template.qml b/geest/resources/qml/indicator-vector-template.qml index 23d806ff..8f8316c5 100644 --- a/geest/resources/qml/indicator-vector-template.qml +++ b/geest/resources/qml/indicator-vector-template.qml @@ -1,15 +1,15 @@ - + - - - - - + + + + + - + - + @@ -40,7 +40,7 @@ - + - + @@ -71,7 +71,7 @@ - + - + @@ -102,7 +102,7 @@ - + - + @@ -133,7 +133,7 @@ - + - + @@ -166,7 +166,7 @@ - + - + - - + + @@ -230,7 +230,7 @@ - + - +