Ch/ex dqry postprocessing#49
Conversation
This comment has been minimized.
This comment has been minimized.
There was a problem hiding this comment.
I reviewed the files except the new filter, because it was close to the one on the main project DQRY, so the same comments apply.
I don't agree with your addition to the data preparation, because I feel like you copy-pasted some code from elsewhere without taking into account the latest modification. I did a new branch with a PR on this branch: #51. It rollbacks some of your modifications and factorize the post-processing more.
I kept your new function merge_polygons and the post-processing in merge_adjacent_detections is a mix of your code and of the comments I left on this PR. I kept the previous version of the year control, because assigning random year to empty tiles seemed like a risky business and because I was lazy.
| # Prepare the tiles | ||
|
|
||
| ## Convert datasets shapefiles into geojson format | ||
| logger.info('Convert the label shapefiles into GeoJSON format (EPSG:4326)...') | ||
| labels_4326_gdf, written_files = prepare_labels(SHPFILE, written_files) | ||
| gt_labels_4326_gdf = labels_4326_gdf[['geometry', 'CATEGORY', 'SUPERCATEGORY']].copy() | ||
|
|
||
| # Add FP labels if it exists | ||
| if FP_SHPFILE: | ||
| logger.info('Convert the FP label shapefiles into GeoJSON format (EPSG:4326)...') | ||
| fp_labels_4326_gdf, written_files = prepare_labels(FP_SHPFILE, written_files, prefix='FP_') | ||
| labels_4326_gdf = pd.concat([labels_4326_gdf, fp_labels_4326_gdf], ignore_index=True) | ||
|
|
||
| # Tiling of the AoI | ||
| logger.info("- Get the label boundaries") | ||
| boundaries_df = labels_4326_gdf.bounds | ||
| logger.info("- Tiling of the AoI") | ||
| tiles_4326_aoi_gdf = aoi_tiling(boundaries_df) | ||
| tiles_4326_labels_gdf = gpd.sjoin(tiles_4326_aoi_gdf, labels_4326_gdf, how='inner', predicate='intersects') | ||
|
|
||
| # Tiling of the AoI from which empty tiles will be selected | ||
| if EPT_SHPFILE: | ||
| EPT_aoi_gdf = gpd.read_file(EPT_SHPFILE) | ||
| EPT_aoi_4326_gdf = EPT_aoi_gdf.to_crs(epsg=4326) | ||
| assert_year(labels_4326_gdf, EPT_aoi_4326_gdf, 'empty_tiles', EPT_YEAR) | ||
|
|
||
| if EPT_TYPE == 'aoi': | ||
| logger.info("- Get AoI boundaries") | ||
| EPT_aoi_boundaries_df = EPT_aoi_4326_gdf.bounds | ||
|
|
||
| # Get tile coordinates and shapes | ||
| logger.info("- Tiling of the empty tiles AoI") | ||
| empty_tiles_4326_all_gdf = aoi_tiling(EPT_aoi_boundaries_df) | ||
| # Delete tiles outside of the AoI limits | ||
| empty_tiles_4326_aoi_gdf = gpd.sjoin(empty_tiles_4326_all_gdf, EPT_aoi_4326_gdf, how='inner', lsuffix='ept_tiles', rsuffix='ept_aoi') | ||
| # Attribute a year to empty tiles if necessary | ||
| if 'year' in labels_4326_gdf.keys(): | ||
| if isinstance(EPT_YEAR, int): | ||
| empty_tiles_4326_aoi_gdf['year'] = int(EPT_YEAR) | ||
| else: | ||
| empty_tiles_4326_aoi_gdf['year'] = np.random.randint(low=EPT_YEAR[0], high=EPT_YEAR[1], size=(len(empty_tiles_4326_aoi_gdf))) | ||
| elif EPT_TYPE == 'shp': | ||
| if EPT_YEAR: | ||
| logger.warning("A shapefile of selected empty tiles are provided. The year set for the empty tiles in the configuration file will be ignored") | ||
| EPT_YEAR = None | ||
| empty_tiles_4326_aoi_gdf = EPT_aoi_4326_gdf.copy() | ||
|
|
||
| # Get all the tiles in one gdf | ||
| logger.info("- Concatenate label tiles and empty AoI tiles") | ||
| tiles_4326_all_gdf = pd.concat([tiles_4326_labels_gdf, empty_tiles_4326_aoi_gdf]) | ||
| else: | ||
| tiles_4326_all_gdf = tiles_4326_labels_gdf.copy() | ||
|
|
||
| # - Remove useless columns, reset feature id and redefine it according to xyz format | ||
| logger.info('- Add tile IDs and reorganise the data set') | ||
| tiles_4326_all_gdf = tiles_4326_all_gdf[['geometry', 'title', 'year'] if 'year' in tiles_4326_all_gdf.keys() else ['geometry', 'title']].copy() | ||
| tiles_4326_all_gdf.reset_index(drop=True, inplace=True) | ||
| tiles_4326_all_gdf = tiles_4326_all_gdf.apply(add_tile_id, axis=1) | ||
|
|
||
| # - Remove duplicated tiles | ||
| tiles_4326_all_gdf.drop_duplicates(['id'], inplace=True) | ||
|
|
||
| nb_tiles = len(tiles_4326_all_gdf) | ||
| logger.info(f"There were {nb_tiles} tiles created") |
There was a problem hiding this comment.
There was a problem hiding this comment.
I'm not sur to what I have to look at here.
There was a problem hiding this comment.
I think the link was supposed to point here: https://github.com/swiss-territorial-data-lab/object-detector/blob/ch/ex-dqry_postprocessing/helpers/functions_for_examples.py#L86
| detections_merge_gdf['geometry'] = detections_merge_gdf.geometry.buffer(-1, join_style='mitre') | ||
|
|
||
| # Merge adjacent polygons within the provided thd distance | ||
| detections_merge_gdf['geometry'] = detections_merge_gdf.geometry.buffer(DISTANCE, join_style='mitre') |
There was a problem hiding this comment.
Detections are already buffered based on ln. 104, except with 1 instead of DISTANCE. The geometry passed down is the buffered one.
Is it really what you want? Wouldn't it be simpler to always buffer with DISTANCE?
There was a problem hiding this comment.
I checked, and indeed, it didn't change the input geometry much.

I had to dig a bit to remember why I did that... I think it's because I still want the adjacent tiles to be merged even if the distance threshold is below the distance to needed to merge adjacent polygons (which is unlikely to happen in this case) or if the value is set to 0 but may be we can add a condition for this case.
| detections_overlap_tiles_gdf = misc.merge_polygons(detections_overlap_tiles_gdf) | ||
|
|
||
| # Concat polygons contained within a tile and the merged ones | ||
| detections_merge_gdf = pd.concat([detections_overlap_tiles_gdf, detections_within_tiles_gdf], axis=0, ignore_index=True) |
There was a problem hiding this comment.
Since detections_within_tiles_gdf are within tiles, you don't need to include them in all the stuff about merging detections and getting the right attributes after. You could just use the initial unbuffered geometry and add them at the end (ln 156).
| detections_merge_gdf['geometry'] = detections_merge_gdf.geometry.buffer(-1, join_style='mitre') | ||
|
|
||
| # Merge adjacent polygons within the provided thd distance | ||
| detections_merge_gdf['geometry'] = detections_merge_gdf.geometry.buffer(DISTANCE, join_style='mitre') |
There was a problem hiding this comment.
I checked, and indeed, it didn't change the input geometry much.

I had to dig a bit to remember why I did that... I think it's because I still want the adjacent tiles to be merged even if the distance threshold is below the distance to needed to merge adjacent polygons (which is unlikely to happen in this case) or if the value is set to 0 but may be we can add a condition for this case.
| # Prepare the tiles | ||
|
|
||
| ## Convert datasets shapefiles into geojson format | ||
| logger.info('Convert the label shapefiles into GeoJSON format (EPSG:4326)...') | ||
| labels_4326_gdf, written_files = prepare_labels(SHPFILE, written_files) | ||
| gt_labels_4326_gdf = labels_4326_gdf[['geometry', 'CATEGORY', 'SUPERCATEGORY']].copy() | ||
|
|
||
| # Add FP labels if it exists | ||
| if FP_SHPFILE: | ||
| logger.info('Convert the FP label shapefiles into GeoJSON format (EPSG:4326)...') | ||
| fp_labels_4326_gdf, written_files = prepare_labels(FP_SHPFILE, written_files, prefix='FP_') | ||
| labels_4326_gdf = pd.concat([labels_4326_gdf, fp_labels_4326_gdf], ignore_index=True) | ||
|
|
||
| # Tiling of the AoI | ||
| logger.info("- Get the label boundaries") | ||
| boundaries_df = labels_4326_gdf.bounds | ||
| logger.info("- Tiling of the AoI") | ||
| tiles_4326_aoi_gdf = aoi_tiling(boundaries_df) | ||
| tiles_4326_labels_gdf = gpd.sjoin(tiles_4326_aoi_gdf, labels_4326_gdf, how='inner', predicate='intersects') | ||
|
|
||
| # Tiling of the AoI from which empty tiles will be selected | ||
| if EPT_SHPFILE: | ||
| EPT_aoi_gdf = gpd.read_file(EPT_SHPFILE) | ||
| EPT_aoi_4326_gdf = EPT_aoi_gdf.to_crs(epsg=4326) | ||
| assert_year(labels_4326_gdf, EPT_aoi_4326_gdf, 'empty_tiles', EPT_YEAR) | ||
|
|
||
| if EPT_TYPE == 'aoi': | ||
| logger.info("- Get AoI boundaries") | ||
| EPT_aoi_boundaries_df = EPT_aoi_4326_gdf.bounds | ||
|
|
||
| # Get tile coordinates and shapes | ||
| logger.info("- Tiling of the empty tiles AoI") | ||
| empty_tiles_4326_all_gdf = aoi_tiling(EPT_aoi_boundaries_df) | ||
| # Delete tiles outside of the AoI limits | ||
| empty_tiles_4326_aoi_gdf = gpd.sjoin(empty_tiles_4326_all_gdf, EPT_aoi_4326_gdf, how='inner', lsuffix='ept_tiles', rsuffix='ept_aoi') | ||
| # Attribute a year to empty tiles if necessary | ||
| if 'year' in labels_4326_gdf.keys(): | ||
| if isinstance(EPT_YEAR, int): | ||
| empty_tiles_4326_aoi_gdf['year'] = int(EPT_YEAR) | ||
| else: | ||
| empty_tiles_4326_aoi_gdf['year'] = np.random.randint(low=EPT_YEAR[0], high=EPT_YEAR[1], size=(len(empty_tiles_4326_aoi_gdf))) | ||
| elif EPT_TYPE == 'shp': | ||
| if EPT_YEAR: | ||
| logger.warning("A shapefile of selected empty tiles are provided. The year set for the empty tiles in the configuration file will be ignored") | ||
| EPT_YEAR = None | ||
| empty_tiles_4326_aoi_gdf = EPT_aoi_4326_gdf.copy() | ||
|
|
||
| # Get all the tiles in one gdf | ||
| logger.info("- Concatenate label tiles and empty AoI tiles") | ||
| tiles_4326_all_gdf = pd.concat([tiles_4326_labels_gdf, empty_tiles_4326_aoi_gdf]) | ||
| else: | ||
| tiles_4326_all_gdf = tiles_4326_labels_gdf.copy() | ||
|
|
||
| # - Remove useless columns, reset feature id and redefine it according to xyz format | ||
| logger.info('- Add tile IDs and reorganise the data set') | ||
| tiles_4326_all_gdf = tiles_4326_all_gdf[['geometry', 'title', 'year'] if 'year' in tiles_4326_all_gdf.keys() else ['geometry', 'title']].copy() | ||
| tiles_4326_all_gdf.reset_index(drop=True, inplace=True) | ||
| tiles_4326_all_gdf = tiles_4326_all_gdf.apply(add_tile_id, axis=1) | ||
|
|
||
| # - Remove duplicated tiles | ||
| tiles_4326_all_gdf.drop_duplicates(['id'], inplace=True) | ||
|
|
||
| nb_tiles = len(tiles_4326_all_gdf) | ||
| logger.info(f"There were {nb_tiles} tiles created") |
There was a problem hiding this comment.
I'm not sur to what I have to look at here.
This comment has been minimized.
This comment has been minimized.
…x_dqry Factorize detection merge
Update of the mineral extraction site example with improved merging and filtering approach, following the one of
proj-dqry.