diff --git a/CHANGELOG.md b/CHANGELOG.md index 904864c..37ca684 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # CHANGELOG +- Possibilité d'ignorer les points synthétiques du fichier donneur (paramètre DONOR_USE_SYNTHETIC_POINTS dans le fichier de config) + ## 1.2.1 - Ajout de gdal dans l'image docker (fichiers manquant pour l'utiliser en ligne de commande) diff --git a/configs/configs_patchwork.yaml b/configs/configs_patchwork.yaml index 688f7b0..2082851 100644 --- a/configs/configs_patchwork.yaml +++ b/configs/configs_patchwork.yaml @@ -48,6 +48,8 @@ mount_points: CRS: 2154 DONOR_CLASS_LIST: [2, 22] +DONOR_USE_SYNTHETIC_POINTS: false + RECIPIENT_CLASS_LIST: [2, 6, 9, 17] TILE_SIZE: 1000 diff --git a/patchwork/patchwork.py b/patchwork/patchwork.py index bc850d4..51a41ad 100644 --- a/patchwork/patchwork.py +++ b/patchwork/patchwork.py @@ -16,41 +16,68 @@ def get_selected_classes_points( - config: DictConfig, tile_origin: Tuple[int, int], points_list: ScaleAwarePointRecord, class_list: list[int], + use_synthetic_points: bool, fields_to_keep: list[str], + patch_size: int, + tile_size: int, ) -> pd.DataFrame: - """get a list of points from a las, and return a ndarray of those point with the selected classification""" - + """Get a list of points from a las, filter them based on classification an d synthetic flag + and return them as a pandas dataframe + + Args: + tile_origin (Tuple[int, int]): Origin point of the tile (in meters) + points_list (ScaleAwarePointRecord): Points list in laspy format + class_list (list[int]): List of classes to keep + use_synthetic_points (bool): if false, filter out points with flag "synthetic" = True + fields_to_keep (list[str]): Las file attribute to keep in the output dataframe + patch_size (int): Size of the patches (for discretization) + tile_size (int): Size of the tile + + Raises: + NotImplementedError: Filtering out synthetic points is implemented only + if the synthetic field is in fields_to_keep + + Returns: + pd.DataFrame: Filtered points list as a pd.DataFrame + """ # we add automatically classification, so we remove it if it's in field_to_keep if c.CLASSIFICATION_STR in fields_to_keep: fields_to_keep.remove(c.CLASSIFICATION_STR) table_fields_to_keep = [points_list[field] for field in fields_to_keep] table_field_necessary = [ - np.int32(points_list.x / config.PATCH_SIZE), # convert x into the coordinate of the patch - np.int32(points_list.y / config.PATCH_SIZE), # convert y into the coordinate of the patch + np.int32(points_list.x / patch_size), # convert x into the coordinate of the patch + np.int32(points_list.y / patch_size), # convert y into the coordinate of the patch points_list.classification, ] + all_fields_list = [*fields_to_keep, c.PATCH_X_STR, c.PATCH_Y_STR, c.CLASSIFICATION_STR] all_classes_points = np.array(table_fields_to_keep + table_field_necessary).transpose() + df_points = pd.DataFrame(all_classes_points, columns=all_fields_list) - mask = np.zeros(len(all_classes_points), dtype=bool) - for classification in class_list: - mask = mask | (all_classes_points[:, -1] == classification) - wanted_classes_points = all_classes_points[mask] - all_fields_list = [*fields_to_keep, c.PATCH_X_STR, c.PATCH_Y_STR, c.CLASSIFICATION_STR] - df_wanted_classes_points = pd.DataFrame(wanted_classes_points, columns=all_fields_list) + # Filter points based on classification + df_points = df_points[df_points.classification.isin(class_list)] + + # Filter based on if the point is synthetic + if not use_synthetic_points: + if "synthetic" in fields_to_keep: + df_points = df_points[np.logical_not(df_points.synthetic)] + else: + raise NotImplementedError( + "'get_selected_classes_points' is asked to filter on synthetic flag, " + "but this flag is not in fields to keep." + ) # "push" the points on the limit of the tile to the closest patch - mask_points_on_max_x = df_wanted_classes_points[c.PATCH_X_STR] == tile_origin[0] + config.TILE_SIZE - df_wanted_classes_points.loc[mask_points_on_max_x, c.PATCH_X_STR] = tile_origin[0] + config.TILE_SIZE - 1 - mask_points_on_max_y = df_wanted_classes_points[c.PATCH_Y_STR] == tile_origin[1] - df_wanted_classes_points.loc[mask_points_on_max_y, c.PATCH_Y_STR] = tile_origin[1] - 1 + mask_points_on_max_x = df_points[c.PATCH_X_STR] == tile_origin[0] + tile_size + df_points.loc[mask_points_on_max_x, c.PATCH_X_STR] = tile_origin[0] + tile_size - 1 + mask_points_on_max_y = df_points[c.PATCH_Y_STR] == tile_origin[1] + df_points.loc[mask_points_on_max_y, c.PATCH_Y_STR] = tile_origin[1] - 1 - return df_wanted_classes_points + return df_points def get_type(new_column_size: int): @@ -75,7 +102,13 @@ def get_complementary_points( recipient_points = recipient_file.read().points df_recipient_points = get_selected_classes_points( - config, tile_origin, recipient_points, config.RECIPIENT_CLASS_LIST, [] + tile_origin, + recipient_points, + config.RECIPIENT_CLASS_LIST, + use_synthetic_points=True, + fields_to_keep=[], + patch_size=config.PATCH_SIZE, + tile_size=config.TILE_SIZE, ) # set, for each patch of coordinate (patch_x, patch_y), the number of recipient point @@ -100,7 +133,15 @@ def get_complementary_points( donor_columns = get_field_from_header(donor_file) dfs_donor_points.append( - get_selected_classes_points(config, tile_origin, donor_points, config.DONOR_CLASS_LIST, donor_columns) + get_selected_classes_points( + tile_origin, + donor_points, + config.DONOR_CLASS_LIST, + config.DONOR_USE_SYNTHETIC_POINTS, + donor_columns, + patch_size=config.PATCH_SIZE, + tile_size=config.TILE_SIZE, + ) ) if len(df_donor_info.index): diff --git a/test/configs/config_test_mount_points.yaml b/test/configs/config_test_mount_points.yaml index a2581e2..ea17550 100644 --- a/test/configs/config_test_mount_points.yaml +++ b/test/configs/config_test_mount_points.yaml @@ -50,6 +50,8 @@ mount_points: CRS: 2154 +DONOR_USE_SYNTHETIC_POINTS: true + DONOR_CLASS_LIST: [2, 22] RECIPIENT_CLASS_LIST: [2, 6, 9, 17] diff --git a/test/data/recipient_with_synthetic_points.laz b/test/data/recipient_with_synthetic_points.laz new file mode 100644 index 0000000..17ea784 Binary files /dev/null and b/test/data/recipient_with_synthetic_points.laz differ diff --git a/test/test_patchwork.py b/test/test_patchwork.py index 4c5d66e..63b5f3f 100644 --- a/test/test_patchwork.py +++ b/test/test_patchwork.py @@ -18,7 +18,7 @@ patchwork, ) -RECIPIENT_TEST_DIR = "test/data/" +TEST_DATA_DIR = "test/data/" RECIPIENT_TEST_NAME = "recipient_test.laz" DONOR_CLASS_LIST = [2, 9] @@ -29,37 +29,79 @@ NEW_COLUMN = "virtual_column" NEW_COLUMN_SIZE = 8 VALUE_ADDED_POINTS = 1 +TILE_SIZE = 1000 +PATCH_SIZE = 1 SHP_X_Y_TO_METER_FACTOR = 1000 def test_get_field_from_header(): - with laspy.open(os.path.join(RECIPIENT_TEST_DIR, RECIPIENT_TEST_NAME)) as recipient_file: + with laspy.open(os.path.join(TEST_DATA_DIR, RECIPIENT_TEST_NAME)) as recipient_file: recipient_fields_list = get_field_from_header(recipient_file) assert len(recipient_fields_list) == 18 # check if all fields are lower case assert [field for field in recipient_fields_list if field != field.lower()] == [] -def test_get_selected_classes_points(): - with initialize(version_base="1.2", config_path="../configs"): - config = compose( - config_name="configs_patchwork.yaml", - overrides=[ - f"filepath.RECIPIENT_DIRECTORY={RECIPIENT_TEST_DIR}", - f"filepath.RECIPIENT_NAME={RECIPIENT_TEST_NAME}", - f"RECIPIENT_CLASS_LIST={RECIPIENT_CLASS_LIST}", - ], +@pytest.mark.parametrize( + "las_path, class_list, fields_to_keep, use_synthetic", + [ + # Keep all points + (os.path.join(TEST_DATA_DIR, RECIPIENT_TEST_NAME), [1, 2, 3, 4, 5], ["synthetic", "intensity"], True), + # Filter on class only + (os.path.join(TEST_DATA_DIR, RECIPIENT_TEST_NAME), [2, 3], ["synthetic", "intensity"], True), + # Filter out synthetic points + (os.path.join(TEST_DATA_DIR, RECIPIENT_TEST_NAME), [2, 3], ["synthetic", "x"], False), + ], +) +def test_get_selected_classes_points(las_path, class_list, fields_to_keep, use_synthetic): + tile_origin = get_tile_origin_using_header_info(las_path, TILE_SIZE) + with laspy.open(las_path) as recipient_file: + input_points = recipient_file.read().points + df_output_points = get_selected_classes_points( + tile_origin, + input_points, + class_list, + fields_to_keep=fields_to_keep, + use_synthetic_points=use_synthetic, + patch_size=PATCH_SIZE, + tile_size=TILE_SIZE, ) - recipient_path = os.path.join(config.filepath.RECIPIENT_DIRECTORY, config.filepath.RECIPIENT_NAME) - tile_origin_recipient = get_tile_origin_using_header_info(recipient_path, config.TILE_SIZE) - with laspy.open(recipient_path) as recipient_file: - recipient_points = recipient_file.read().points - df_recipient_points = get_selected_classes_points( - config, tile_origin_recipient, recipient_points, config.RECIPIENT_CLASS_LIST, [] + assert len(df_output_points.index), "No points in output dataframe" + classification = set(df_output_points[c.CLASSIFICATION_STR]) + assert classification.issubset(class_list) + assert set(df_output_points.columns.values) == { + *fields_to_keep, + c.PATCH_X_STR, + c.PATCH_Y_STR, + c.CLASSIFICATION_STR, + } + if use_synthetic: + assert len(df_output_points.index) == np.count_nonzero( + np.isin(np.array(input_points.classification), class_list) + ) + else: + assert not np.any(df_output_points.synthetic) + + +def test_get_selected_classes_points_raise_error(): + las_path = os.path.join(TEST_DATA_DIR, "recipient_with_synthetic_points.laz") + class_list = [2, 3] + fields_to_keep = [] + use_synthetic = False + tile_origin = get_tile_origin_using_header_info(las_path, TILE_SIZE) + with pytest.raises(NotImplementedError): + with laspy.open(las_path) as las_path: + input_points = las_path.read().points + get_selected_classes_points( + tile_origin, + input_points, + class_list, + fields_to_keep=fields_to_keep, + use_synthetic_points=use_synthetic, + patch_size=PATCH_SIZE, + tile_size=TILE_SIZE, ) - for classification in np.unique(df_recipient_points[c.CLASSIFICATION_STR]): - assert classification in RECIPIENT_CLASS_LIST @pytest.mark.parametrize( @@ -104,6 +146,7 @@ def test_get_complementary_points(donor_info_path, recipient_path, x, y, expecte f"DONOR_CLASS_LIST={DONOR_CLASS_LIST}", f"RECIPIENT_CLASS_LIST={RECIPIENT_CLASS_LIST}", f"+VIRTUAL_CLASS_TRANSLATION={VIRTUAL_CLASS_TRANSLATION}", + "DONOR_USE_SYNTHETIC_POINTS=true", ], ) complementary_points = get_complementary_points(df_donor_info, recipient_path, (x, y), config) @@ -157,6 +200,7 @@ def test_get_complementary_points_2_more_fields(tmp_path_factory): f"DONOR_CLASS_LIST={DONOR_CLASS_LIST}", f"RECIPIENT_CLASS_LIST={RECIPIENT_CLASS_LIST}", f"+VIRTUAL_CLASS_TRANSLATION={VIRTUAL_CLASS_TRANSLATION}", + "DONOR_USE_SYNTHETIC_POINTS=true", ], ) @@ -193,7 +237,7 @@ def test_append_points(tmp_path_factory): config = compose( config_name="configs_patchwork.yaml", overrides=[ - f"filepath.RECIPIENT_DIRECTORY={RECIPIENT_TEST_DIR}", + f"filepath.RECIPIENT_DIRECTORY={TEST_DATA_DIR}", f"filepath.RECIPIENT_NAME={RECIPIENT_TEST_NAME}", f"filepath.OUTPUT_DIR={tmp_file_dir}", f"filepath.OUTPUT_NAME={tmp_file_name}", @@ -251,7 +295,7 @@ def test_append_points_new_column(tmp_path_factory): config = compose( config_name="configs_patchwork.yaml", overrides=[ - f"filepath.RECIPIENT_DIRECTORY={RECIPIENT_TEST_DIR}", + f"filepath.RECIPIENT_DIRECTORY={TEST_DATA_DIR}", f"filepath.RECIPIENT_NAME={RECIPIENT_TEST_NAME}", f"filepath.OUTPUT_DIR={tmp_file_dir}", f"filepath.OUTPUT_NAME={tmp_file_name}", @@ -326,6 +370,7 @@ def test_patchwork_default(tmp_path_factory, recipient_path, expected_nb_added_p f"DONOR_CLASS_LIST={DONOR_CLASS_LIST}", f"RECIPIENT_CLASS_LIST={RECIPIENT_CLASS_LIST}", f"+VIRTUAL_CLASS_TRANSLATION={VIRTUAL_CLASS_TRANSLATION}", + "DONOR_USE_SYNTHETIC_POINTS=true", "NEW_COLUMN=null", ], ) @@ -346,7 +391,7 @@ def test_patchwork_default(tmp_path_factory, recipient_path, expected_nb_added_p @pytest.mark.parametrize( - "recipient_path, expected_nb_added_points", + "recipient_path, donor_use_synthetic_points, expected_nb_added_points", # expected_nb_points value set after inspection of the initial result using qgis: # - there are points only inside the shapefile geometry # - when visualizing a grid, there seems to be no points in the cells where there is ground points in the @@ -354,19 +399,32 @@ def test_patchwork_default(tmp_path_factory, recipient_path, expected_nb_added_p [ ( "test/data/lidar_HD_decimated/Semis_2022_0673_6362_LA93_IGN69_decimated.laz", + True, 128675, ), # One donor + ( + "test/data/lidar_HD_decimated/Semis_2022_0673_6362_LA93_IGN69_decimated.laz", + False, + 127961, + ), # One donor, no synthetic points ( "test/data/lidar_HD_decimated/Semis_2022_0673_6363_LA93_IGN69_decimated.laz", + True, 149490, ), # Two donors + ( + "test/data/lidar_HD_decimated/Semis_2022_0673_6363_LA93_IGN69_decimated.laz", + False, + 149340, + ), # Two donors, no synthetic points ( "test/data/lidar_HD_decimated/Semis_2022_0674_6363_LA93_IGN69_decimated.laz", + True, 0, ), # No donor ], ) -def test_patchwork_with_origin(tmp_path_factory, recipient_path, expected_nb_added_points): +def test_patchwork_with_origin(tmp_path_factory, recipient_path, donor_use_synthetic_points, expected_nb_added_points): input_shp_path = "test/data/shapefile_local/patchwork_geometries.shp" tmp_file_dir = tmp_path_factory.mktemp("data") tmp_output_las_name = "result_patchwork.laz" @@ -386,6 +444,7 @@ def test_patchwork_with_origin(tmp_path_factory, recipient_path, expected_nb_add f"filepath.OUTPUT_INDICES_MAP_NAME={tmp_output_indices_map_name}", f"DONOR_CLASS_LIST={DONOR_CLASS_LIST}", f"RECIPIENT_CLASS_LIST={RECIPIENT_CLASS_LIST}", + f"DONOR_USE_SYNTHETIC_POINTS={donor_use_synthetic_points}", "NEW_COLUMN='Origin'", ], ) @@ -411,6 +470,7 @@ def test_patchwork_with_origin(tmp_path_factory, recipient_path, expected_nb_add @pytest.mark.parametrize( "input_shp_path, recipient_path, expected_nb_added_points", # Same tests as "test_patchwork_default", but with shapefiles that refer to paths in mounted stores + # All tests keep synthetic points [ ( "test/data/shapefile_mounted_unix_path/patchwork_geometries.shp",