From 10b66bd796b9776c236a3de3760ef6186b59cf87 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Fri, 20 Mar 2026 07:30:59 -0600 Subject: [PATCH 01/11] wip: fixing delete --- src/sweeper/sweepers/duplicates.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/sweeper/sweepers/duplicates.py b/src/sweeper/sweepers/duplicates.py index c8faa20..24eb4f7 100644 --- a/src/sweeper/sweepers/duplicates.py +++ b/src/sweeper/sweepers/duplicates.py @@ -2,6 +2,7 @@ # * coding: utf8 * import logging import re +from typing import Generator import arcpy from xxhash import xxh64 @@ -99,10 +100,20 @@ def try_fix(self): if len(self.oids_with_issues) == 0: return report - sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in self.oids_with_issues])})' + chunk_size = 1000 + + # sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in self.oids_with_issues])})' + lists_of_oids = list(self._chunk_oid_list(self.oids_with_issues, chunk_size)) temp_feature_layer = "temp_layer" log.info(f"Workspace is: {self.workspace}") + log.info( + f"Attempting to delete a total of {len(self.oids_with_issues)} duplicate records in {len(lists_of_oids)} batch(es) of {chunk_size} records each" + ) + for list_of_oids in lists_of_oids: + sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in list_of_oids])})' + + #: TODO: move into loop, figure out report addition at success #: Delete duplicate rows using different arcpy tools for tables and feature classes with arcpy.EnvManager(workspace=self.workspace): if self.is_table: @@ -126,3 +137,11 @@ def try_fix(self): report["fixes"].append(f"{len(self.oids_with_issues)} records deleted successfully") return report + + @staticmethod + def _chunk_oid_list(lst: list, chunk_size: int): + if len(lst) <= chunk_size: + yield lst + return + for i in range(0, len(lst), chunk_size): + yield lst[i : i + chunk_size] From b0edf2665805aa9156211bf188bcebe9bad6afc8 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Fri, 20 Mar 2026 07:51:12 -0600 Subject: [PATCH 02/11] wip: more work in progress --- src/sweeper/sweepers/duplicates.py | 51 ++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/sweeper/sweepers/duplicates.py b/src/sweeper/sweepers/duplicates.py index 24eb4f7..1fe9f95 100644 --- a/src/sweeper/sweepers/duplicates.py +++ b/src/sweeper/sweepers/duplicates.py @@ -110,8 +110,41 @@ def try_fix(self): log.info( f"Attempting to delete a total of {len(self.oids_with_issues)} duplicate records in {len(lists_of_oids)} batch(es) of {chunk_size} records each" ) - for list_of_oids in lists_of_oids: - sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in list_of_oids])})' + + with arcpy.EnvManager(workspace=self.workspace): + successful_deletes = 0 + + if self.is_table: + all_features = arcpy.management.MakeTableView(self.table_name, temp_feature_layer) + for index, list_of_oids in enumerate(lists_of_oids, start=1): + sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in list_of_oids])})' + + try: + log.info(f"Batch {index}: attempting to delete {len(list_of_oids)} duplicate records") + arcpy.management.SelectLayerByAttribute(all_features, "NEW_SELECTION", sql) + arcpy.management.DeleteRows(all_features) + successful_deletes += len(list_of_oids) + except Exception as error: + error_message = f"unable to delete features in batch {index}: {error}" + report["issues"].append(error_message) + finally: + arcpy.management.SelectLayerByAttribute(all_features, "CLEAR_SELECTION") + + else: + all_features = arcpy.management.MakeFeatureLayer(self.table_name, temp_feature_layer) + for index, list_of_oids in enumerate(lists_of_oids, start=1): + sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in list_of_oids])})' + + try: + log.info(f"Batch {index}: attempting to delete {len(list_of_oids)} duplicate records") + arcpy.management.SelectLayerByAttribute(all_features, "NEW_SELECTION", sql) + arcpy.management.DeleteFeatures(all_features) + successful_deletes += len(list_of_oids) + except Exception as error: + error_message = f"unable to delete features {error}" + report["issues"].append(error_message) + finally: + arcpy.management.SelectLayerByAttribute(all_features, "CLEAR_SELECTION") #: TODO: move into loop, figure out report addition at success #: Delete duplicate rows using different arcpy tools for tables and feature classes @@ -145,3 +178,17 @@ def _chunk_oid_list(lst: list, chunk_size: int): return for i in range(0, len(lst), chunk_size): yield lst[i : i + chunk_size] + + @staticmethod + def _delete_features_or_rows(feature_layer, is_table): + if is_table: + arcpy.management.DeleteRows(feature_layer) + else: + arcpy.management.DeleteFeatures(feature_layer) + + @staticmethod + def _make_feature_layer(feature_class, temp_layer_name, is_table): + if is_table: + return arcpy.management.MakeTableView(feature_class, temp_layer_name) + else: + return arcpy.management.MakeFeatureLayer(feature_class, temp_layer_name) From d03b0325f0f4ccbac81263b554bc16e8e5ea7f2e Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Fri, 20 Mar 2026 08:27:12 -0600 Subject: [PATCH 03/11] chore: copilot instructions --- .github/copilot-instructions.md | 62 +++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..4a88ce5 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,62 @@ +# GitHub Copilot Instructions + +## General Guidelines + +- Prefer to return early from functions to reduce nesting and improve code readability. +- Always follow good security practices +- Use f-strings for string formatting +- Use logging instead of print statements +- Follow the DRY principle (Don't Repeat Yourself) +- Use pathlib for all file operations +- Follow defensive programming practices + - Validate all function inputs + - Handle exceptions gracefully +- Use retry logic for any network calls +- Add a new line before return statements in functions +- Use list/dict/set comprehensions where appropriate +- Avoid using wildcard imports (e.g., from module import *) +- Use context managers for file operations (e.g., with open(...) as f:) +- Prefer using built-in functions and libraries over custom implementations when possible +- Prefer smaller, focused functions that do one thing well rather than large, monolithic functions + +## Commit Message Format + +All commits must follow the Conventional Commits format using the Angular preset. + +For detailed guidelines on commit types, scopes, and formatting rules, see the release-composite-action README. + +## Code Style and Conventions + +### Python Style +- Line length: 120 characters (configured in ruff) +- Indentation: 4 spaces for Python files +- Use type hints for all new work +- Follow PEP 8 conventions +- Follow ruff style guide and linting rules +- Use pylint disable comments sparingly and only when necessary (e.g., `# pylint: disable=invalid-name`) + +### Documentation +- Use docstrings for all classes and public methods +- Follow NumPy/SciPy docstring format with sections: + - Brief description + - `Attributes` for class attributes + - `Parameters` for method parameters + - `Returns` for return values + - `Methods` for public methods in class docstrings + +## Testing Guidelines + +- Unit tests are required, and are required to pass before PR +- Mock external services +- Test both success and failure paths +- Verify warning messages for invalid configurations +- Code coverage should be maintained at a high level (tracked via codecov) +- Test names should be descriptive and follow the pattern `test__` + +## Code Quality + +- Run `ruff` for linting before committing +- Maintain test coverage (tracked via codecov) +- Follow existing patterns in the codebase +- Keep methods focused and single-purpose +- Use static methods when methods don't need instance state From 9e69654d58117dfd6b8e3d0e02a2b08e179dbc87 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Fri, 20 Mar 2026 08:34:44 -0600 Subject: [PATCH 04/11] fix: delete dupes in chunks to limit sql length, use select for delete --- .vscode/launch.json | 40 +-- src/sweeper/sweepers/duplicates.py | 111 ++++----- tests/test_duplicates.py | 386 +++++++++++++++++++++++++++++ 3 files changed, 457 insertions(+), 80 deletions(-) create mode 100644 tests/test_duplicates.py diff --git a/.vscode/launch.json b/.vscode/launch.json index 3b62c65..19bac35 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,20 +1,24 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "Python Debugger: Module", - "type": "debugpy", - "request": "launch", - "module": "sweeper", - "args": [ - "sweep", - "--workspace", - "C:\\temp\\locators.gdb" - ], - "cwd": "${workspaceFolder}" - } - ] + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + { + "name": "Python Debugger: Module", + "type": "debugpy", + "request": "launch", + "module": "sweeper.__main__", + "args": [ + "sweep", + "duplicates", + "--workspace=C:\\temp\\sweeper_test\\Pre1978.gdb", + "--table-name=DAQPre1978LeadInHomes", + "--try-fix", + "--verbose", + "--save-report=C:\\temp\\sweeper_test\\" + ] + } + ] } diff --git a/src/sweeper/sweepers/duplicates.py b/src/sweeper/sweepers/duplicates.py index 1fe9f95..a7f8e91 100644 --- a/src/sweeper/sweepers/duplicates.py +++ b/src/sweeper/sweepers/duplicates.py @@ -5,6 +5,8 @@ from typing import Generator import arcpy +from arcpy._mp import Layer, Table +from arcpy.typing.gp import Result1 from xxhash import xxh64 from .base import SweeperBase @@ -111,68 +113,49 @@ def try_fix(self): f"Attempting to delete a total of {len(self.oids_with_issues)} duplicate records in {len(lists_of_oids)} batch(es) of {chunk_size} records each" ) - with arcpy.EnvManager(workspace=self.workspace): - successful_deletes = 0 - - if self.is_table: - all_features = arcpy.management.MakeTableView(self.table_name, temp_feature_layer) - for index, list_of_oids in enumerate(lists_of_oids, start=1): - sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in list_of_oids])})' - - try: - log.info(f"Batch {index}: attempting to delete {len(list_of_oids)} duplicate records") - arcpy.management.SelectLayerByAttribute(all_features, "NEW_SELECTION", sql) - arcpy.management.DeleteRows(all_features) - successful_deletes += len(list_of_oids) - except Exception as error: - error_message = f"unable to delete features in batch {index}: {error}" - report["issues"].append(error_message) - finally: - arcpy.management.SelectLayerByAttribute(all_features, "CLEAR_SELECTION") + successful_deletes = 0 + failed_deletes = 0 + failed_batches = 0 - else: - all_features = arcpy.management.MakeFeatureLayer(self.table_name, temp_feature_layer) - for index, list_of_oids in enumerate(lists_of_oids, start=1): - sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in list_of_oids])})' - - try: - log.info(f"Batch {index}: attempting to delete {len(list_of_oids)} duplicate records") - arcpy.management.SelectLayerByAttribute(all_features, "NEW_SELECTION", sql) - arcpy.management.DeleteFeatures(all_features) - successful_deletes += len(list_of_oids) - except Exception as error: - error_message = f"unable to delete features {error}" - report["issues"].append(error_message) - finally: - arcpy.management.SelectLayerByAttribute(all_features, "CLEAR_SELECTION") - - #: TODO: move into loop, figure out report addition at success - #: Delete duplicate rows using different arcpy tools for tables and feature classes with arcpy.EnvManager(workspace=self.workspace): - if self.is_table: - duplicate_features = arcpy.management.MakeTableView(self.table_name, temp_feature_layer, sql) - else: - duplicate_features = arcpy.management.MakeFeatureLayer(self.table_name, temp_feature_layer, sql) - - try: - log.info(f"attempting to delete {len(self.oids_with_issues)} duplicate records") - if self.is_table: - arcpy.management.DeleteRows(duplicate_features) - else: - arcpy.management.DeleteFeatures(duplicate_features) - except Exception as error: - error_message = f"unable to delete features {error}" - report["issues"].append(error_message) - finally: - if arcpy.Exists(temp_feature_layer): - arcpy.management.Delete(temp_feature_layer) - - report["fixes"].append(f"{len(self.oids_with_issues)} records deleted successfully") + all_features = self._make_feature_layer(self.table_name, temp_feature_layer, self.is_table) + + for index, list_of_oids in enumerate(lists_of_oids, start=1): + sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in list_of_oids])})' + + try: + log.info(f"Batch {index}: attempting to delete {len(list_of_oids)} duplicate records") + arcpy.management.SelectLayerByAttribute(all_features, "NEW_SELECTION", sql) + self._delete_features_or_rows(all_features, self.is_table) + successful_deletes += len(list_of_oids) + except Exception as error: + error_message = f"unable to delete features in batch {index}: {error}" + log.info(error_message) + report["issues"].append(error_message) + failed_deletes += len(list_of_oids) + failed_batches += 1 + finally: + arcpy.management.SelectLayerByAttribute(all_features, "CLEAR_SELECTION") + + report["fixes"].append(f"{successful_deletes} records deleted successfully") + if failed_deletes > 0: + report["issues"].append( + f"{failed_batches} batch(es) with {failed_deletes} total records had errors deleting." + ) return report @staticmethod - def _chunk_oid_list(lst: list, chunk_size: int): + def _chunk_oid_list(lst: list, chunk_size: int) -> Generator[list, None, None]: + """Breaks a list into chunks of chunk_size + + Args: + lst (list): Input List + chunk_size (int): The desired size per chunk + + Yields: + Generator[list, None, None]: The next chunk_size sized chunk + """ if len(lst) <= chunk_size: yield lst return @@ -180,15 +163,19 @@ def _chunk_oid_list(lst: list, chunk_size: int): yield lst[i : i + chunk_size] @staticmethod - def _delete_features_or_rows(feature_layer, is_table): + def _make_feature_layer( + feature_class: str, temp_layer_name: str, is_table: bool + ) -> Result1[str | Table | Layer] | Result1[str | Layer]: + """Single method to handle table or layer creation based on is_table parameter""" if is_table: - arcpy.management.DeleteRows(feature_layer) + return arcpy.management.MakeTableView(feature_class, temp_layer_name) else: - arcpy.management.DeleteFeatures(feature_layer) + return arcpy.management.MakeFeatureLayer(feature_class, temp_layer_name) @staticmethod - def _make_feature_layer(feature_class, temp_layer_name, is_table): + def _delete_features_or_rows(feature_layer: Result1[str | Table | Layer] | Result1[str | Layer], is_table: bool): + """Single method to handle deleting features or rows based on is_table parameter""" if is_table: - return arcpy.management.MakeTableView(feature_class, temp_layer_name) + arcpy.management.DeleteRows(feature_layer) else: - return arcpy.management.MakeFeatureLayer(feature_class, temp_layer_name) + arcpy.management.DeleteFeatures(feature_layer) diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py new file mode 100644 index 0000000..cface68 --- /dev/null +++ b/tests/test_duplicates.py @@ -0,0 +1,386 @@ +import sys +from unittest.mock import MagicMock, call, patch + +import pytest + +#: Mock arcpy before importing the module under test +arcpy_mock = MagicMock() +sys.modules["arcpy"] = arcpy_mock +sys.modules["arcpy.da"] = arcpy_mock.da +sys.modules["arcpy._mp"] = arcpy_mock._mp +sys.modules["arcpy.typing"] = arcpy_mock.typing +sys.modules["arcpy.typing.gp"] = arcpy_mock.typing.gp +sys.modules["xxhash"] = MagicMock() + +from sweeper.sweepers.duplicates import DuplicateTest # noqa: E402 + + +class TestChunkOidList: + def test_chunk_oid_list_small_list_returns_single_chunk(self): + lst = [1, 2, 3] + result = list(DuplicateTest._chunk_oid_list(lst, 10)) + + assert result == [[1, 2, 3]] + + def test_chunk_oid_list_exact_chunk_size_returns_single_chunk(self): + lst = list(range(10)) + result = list(DuplicateTest._chunk_oid_list(lst, 10)) + + assert result == [list(range(10))] + + def test_chunk_oid_list_large_list_returns_multiple_chunks(self): + lst = list(range(25)) + result = list(DuplicateTest._chunk_oid_list(lst, 10)) + + assert len(result) == 3 + assert result[0] == list(range(10)) + assert result[1] == list(range(10, 20)) + assert result[2] == list(range(20, 25)) + + def test_chunk_oid_list_empty_list_returns_single_empty_chunk(self): + result = list(DuplicateTest._chunk_oid_list([], 10)) + + assert result == [[]] + + def test_chunk_oid_list_exactly_two_chunks(self): + lst = list(range(20)) + result = list(DuplicateTest._chunk_oid_list(lst, 10)) + + assert len(result) == 2 + assert result[0] == list(range(10)) + assert result[1] == list(range(10, 20)) + + +class TestMakeFeatureLayer: + def test_make_feature_layer_is_table_calls_make_table_view(self): + DuplicateTest._make_feature_layer("my_table", "temp_layer", True) + + arcpy_mock.management.MakeTableView.assert_called_once_with("my_table", "temp_layer") + + def test_make_feature_layer_is_not_table_calls_make_feature_layer(self): + DuplicateTest._make_feature_layer("my_fc", "temp_layer", False) + + arcpy_mock.management.MakeFeatureLayer.assert_called_once_with("my_fc", "temp_layer") + + +class TestDeleteFeaturesOrRows: + def test_delete_features_or_rows_is_table_calls_delete_rows(self): + layer_mock = MagicMock() + DuplicateTest._delete_features_or_rows(layer_mock, True) + + arcpy_mock.management.DeleteRows.assert_called_once_with(layer_mock) + + def test_delete_features_or_rows_is_not_table_calls_delete_features(self): + layer_mock = MagicMock() + DuplicateTest._delete_features_or_rows(layer_mock, False) + + arcpy_mock.management.DeleteFeatures.assert_called_once_with(layer_mock) + + +class TestTryFix: + def setup_method(self): + arcpy_mock.reset_mock() + arcpy_mock.management.SelectLayerByAttribute.side_effect = None + arcpy_mock.EnvManager.return_value.__enter__ = MagicMock(return_value=None) + arcpy_mock.EnvManager.return_value.__exit__ = MagicMock(return_value=False) + + def test_try_fix_no_oids_returns_empty_report(self): + sweeper = DuplicateTest("workspace", "my_table") + + report = sweeper.try_fix() + + assert report["title"] == "Duplicate Try Fix" + assert report["feature_class"] == "my_table" + assert report["issues"] == [] + assert report["fixes"] == [] + + def test_try_fix_successful_deletes_reports_count(self): + sweeper = DuplicateTest("workspace", "my_table") + sweeper.oids_with_issues = [1, 2, 3] + layer_mock = MagicMock() + arcpy_mock.management.MakeFeatureLayer.return_value = layer_mock + + report = sweeper.try_fix() + + assert "3 records deleted successfully" in report["fixes"] + assert report["issues"] == [] + + def test_try_fix_failed_batch_reports_error(self): + sweeper = DuplicateTest("workspace", "my_table") + sweeper.oids_with_issues = [1, 2, 3] + layer_mock = MagicMock() + arcpy_mock.management.MakeFeatureLayer.return_value = layer_mock + arcpy_mock.management.SelectLayerByAttribute.side_effect = [Exception("delete error"), MagicMock()] + + report = sweeper.try_fix() + + assert any("unable to delete features in batch 1" in issue for issue in report["issues"]) + assert any("1 batch(es) with 3 total records had errors deleting." in issue for issue in report["issues"]) + + def test_try_fix_clear_selection_called_in_finally(self): + sweeper = DuplicateTest("workspace", "my_table") + sweeper.oids_with_issues = [1, 2] + layer_mock = MagicMock() + arcpy_mock.management.MakeFeatureLayer.return_value = layer_mock + arcpy_mock.management.SelectLayerByAttribute.side_effect = [Exception("fail"), None] + + sweeper.try_fix() + + clear_calls = [ + c for c in arcpy_mock.management.SelectLayerByAttribute.call_args_list if "CLEAR_SELECTION" in c.args + ] + assert len(clear_calls) >= 1 + + def test_try_fix_is_table_uses_make_table_view(self): + sweeper = DuplicateTest("workspace", "my_table") + sweeper.oids_with_issues = [1] + sweeper.is_table = True + table_view_mock = MagicMock() + arcpy_mock.management.MakeTableView.return_value = table_view_mock + + report = sweeper.try_fix() + + arcpy_mock.management.MakeTableView.assert_called_once_with("my_table", "temp_layer") + assert "1 records deleted successfully" in report["fixes"] + + def test_try_fix_multiple_batches_accumulates_successful_deletes(self): + sweeper = DuplicateTest("workspace", "my_table") + sweeper.oids_with_issues = list(range(1500)) + layer_mock = MagicMock() + arcpy_mock.management.MakeFeatureLayer.return_value = layer_mock + arcpy_mock.management.SelectLayerByAttribute.side_effect = None + arcpy_mock.management.SelectLayerByAttribute.return_value = MagicMock() + + report = sweeper.try_fix() + + assert "1500 records deleted successfully" in report["fixes"] + + def test_try_fix_partial_failure_reports_correct_counts(self): + sweeper = DuplicateTest("workspace", "my_table") + sweeper.oids_with_issues = list(range(1500)) + layer_mock = MagicMock() + arcpy_mock.management.MakeFeatureLayer.return_value = layer_mock + + call_count = 0 + + def select_side_effect(*args, **kwargs): + nonlocal call_count + if "CLEAR_SELECTION" in args: + return MagicMock() + call_count += 1 + if call_count == 1: + raise Exception("batch 1 failed") + + return MagicMock() + + arcpy_mock.management.SelectLayerByAttribute.side_effect = select_side_effect + + report = sweeper.try_fix() + + assert any("500 records deleted successfully" in fix for fix in report["fixes"]) + assert any("1 batch(es) with 1000 total records had errors deleting." in issue for issue in report["issues"]) + + +class TestSweep: + def setup_method(self): + arcpy_mock.reset_mock() + arcpy_mock.EnvManager.return_value.__enter__ = MagicMock(return_value=None) + arcpy_mock.EnvManager.return_value.__exit__ = MagicMock(return_value=False) + + def _make_field(self, name: str) -> MagicMock: + field = MagicMock() + field.name = name + + return field + + def test_sweep_table_no_duplicates_returns_empty_issues(self): + sweeper = DuplicateTest("workspace", "my_table") + description = { + "dataType": "Table", + "hasGlobalID": False, + "hasOID": True, + "OIDFieldName": "OBJECTID", + "fields": [self._make_field("Name"), self._make_field("OBJECTID")], + "shapeFieldName": "Shape", + "globalIDFieldName": "GlobalID", + } + arcpy_mock.da.Describe.return_value = description + + rows = [("Alice", 1), ("Bob", 2)] + cursor_mock = MagicMock() + cursor_mock.__enter__ = MagicMock(return_value=iter(rows)) + cursor_mock.__exit__ = MagicMock(return_value=False) + arcpy_mock.da.SearchCursor.return_value = cursor_mock + + import xxhash + + real_hashes = {} + + def fake_xxh64(data): + hasher = MagicMock() + hasher.hexdigest.return_value = str(hash(data)) + real_hashes[data] = hasher.hexdigest.return_value + + return hasher + + sys.modules["xxhash"].xxh64.side_effect = fake_xxh64 + + report = sweeper.sweep() + + assert report["issues"] == [] + assert sweeper.is_table is True + + def test_sweep_table_with_duplicates_reports_oids(self): + sweeper = DuplicateTest("workspace", "my_table") + description = { + "dataType": "Table", + "hasGlobalID": False, + "hasOID": True, + "OIDFieldName": "OBJECTID", + "fields": [self._make_field("Name"), self._make_field("OBJECTID")], + "shapeFieldName": "Shape", + "globalIDFieldName": "GlobalID", + } + arcpy_mock.da.Describe.return_value = description + + rows = [("Alice", 1), ("Alice", 2)] + cursor_mock = MagicMock() + cursor_mock.__enter__ = MagicMock(return_value=iter(rows)) + cursor_mock.__exit__ = MagicMock(return_value=False) + arcpy_mock.da.SearchCursor.return_value = cursor_mock + + digest_counter = {"count": 0} + + def fake_xxh64(data): + hasher = MagicMock() + digest_counter["count"] += 1 + #: Return the same hash for both rows to simulate duplicate + hasher.hexdigest.return_value = "same_hash" + + return hasher + + sys.modules["xxhash"].xxh64.side_effect = fake_xxh64 + + report = sweeper.sweep() + + assert "2" in report["issues"] + assert 2 in sweeper.oids_with_issues + + def test_sweep_feature_class_skips_none_shape(self): + sweeper = DuplicateTest("workspace", "my_fc") + description = { + "dataType": "FeatureClass", + "hasGlobalID": False, + "hasOID": True, + "OIDFieldName": "OBJECTID", + "shapeFieldName": "Shape", + "globalIDFieldName": "GlobalID", + "fields": [self._make_field("Name"), self._make_field("OBJECTID"), self._make_field("Shape")], + } + arcpy_mock.da.Describe.return_value = description + + rows = [("Alice", 1, None)] + cursor_mock = MagicMock() + cursor_mock.__enter__ = MagicMock(return_value=iter(rows)) + cursor_mock.__exit__ = MagicMock(return_value=False) + arcpy_mock.da.SearchCursor.return_value = cursor_mock + + report = sweeper.sweep() + + assert report["issues"] == [] + assert sweeper.is_table is False + + def test_sweep_feature_class_with_duplicates_reports_oids(self): + sweeper = DuplicateTest("workspace", "my_fc") + description = { + "dataType": "FeatureClass", + "hasGlobalID": False, + "hasOID": True, + "OIDFieldName": "OBJECTID", + "shapeFieldName": "Shape", + "globalIDFieldName": "GlobalID", + "fields": [self._make_field("Name"), self._make_field("OBJECTID"), self._make_field("Shape")], + } + arcpy_mock.da.Describe.return_value = description + + rows = [ + ("Alice", 1, "POINT (1.123456 2.123456)"), + ("Alice", 2, "POINT (1.123456 2.123456)"), + ] + cursor_mock = MagicMock() + cursor_mock.__enter__ = MagicMock(return_value=iter(rows)) + cursor_mock.__exit__ = MagicMock(return_value=False) + arcpy_mock.da.SearchCursor.return_value = cursor_mock + + def fake_xxh64(data): + hasher = MagicMock() + hasher.hexdigest.return_value = "same_hash" + + return hasher + + sys.modules["xxhash"].xxh64.side_effect = fake_xxh64 + + report = sweeper.sweep() + + assert "2" in report["issues"] + assert 2 in sweeper.oids_with_issues + + def test_sweep_skips_global_id_field(self): + sweeper = DuplicateTest("workspace", "my_table") + description = { + "dataType": "Table", + "hasGlobalID": True, + "globalIDFieldName": "GlobalID", + "hasOID": True, + "OIDFieldName": "OBJECTID", + "shapeFieldName": "Shape", + "fields": [self._make_field("Name"), self._make_field("GlobalID"), self._make_field("OBJECTID")], + } + arcpy_mock.da.Describe.return_value = description + + rows = [("Alice", 1)] + cursor_mock = MagicMock() + cursor_mock.__enter__ = MagicMock(return_value=iter(rows)) + cursor_mock.__exit__ = MagicMock(return_value=False) + arcpy_mock.da.SearchCursor.return_value = cursor_mock + + def fake_xxh64(data): + hasher = MagicMock() + hasher.hexdigest.return_value = "unique_hash" + + return hasher + + sys.modules["xxhash"].xxh64.side_effect = fake_xxh64 + + sweeper.sweep() + + call_args = arcpy_mock.da.SearchCursor.call_args + fields_used = call_args[0][1] + assert "GlobalID" not in fields_used + + def test_sweep_returns_correct_report_structure(self): + sweeper = DuplicateTest("workspace", "my_table") + description = { + "dataType": "Table", + "hasGlobalID": False, + "hasOID": False, + "OIDFieldName": "OBJECTID", + "shapeFieldName": "Shape", + "globalIDFieldName": "GlobalID", + "fields": [self._make_field("Name")], + } + arcpy_mock.da.Describe.return_value = description + + rows = [] + cursor_mock = MagicMock() + cursor_mock.__enter__ = MagicMock(return_value=iter(rows)) + cursor_mock.__exit__ = MagicMock(return_value=False) + arcpy_mock.da.SearchCursor.return_value = cursor_mock + + report = sweeper.sweep() + + assert "title" in report + assert "feature_class" in report + assert "issues" in report + assert report["title"] == "Duplicate Test" + assert report["feature_class"] == "my_table" From 731d11864f70b0523312dfcfd1e3eaf8890aee72 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Fri, 20 Mar 2026 08:49:08 -0600 Subject: [PATCH 05/11] chore(tests): fix ruff errors --- tests/test_duplicates.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py index cface68..4a5bbf3 100644 --- a/tests/test_duplicates.py +++ b/tests/test_duplicates.py @@ -1,7 +1,5 @@ import sys -from unittest.mock import MagicMock, call, patch - -import pytest +from unittest.mock import MagicMock #: Mock arcpy before importing the module under test arcpy_mock = MagicMock() @@ -181,6 +179,7 @@ def select_side_effect(*args, **kwargs): assert any("1 batch(es) with 1000 total records had errors deleting." in issue for issue in report["issues"]) +#: This test class was added by copilot when creating tests for the try_fix method. They pass, but I've not verified if they're actually sane. class TestSweep: def setup_method(self): arcpy_mock.reset_mock() @@ -212,7 +211,7 @@ def test_sweep_table_no_duplicates_returns_empty_issues(self): cursor_mock.__exit__ = MagicMock(return_value=False) arcpy_mock.da.SearchCursor.return_value = cursor_mock - import xxhash + # import xxhash #: This is unused; not sure if manually inserting the mock into sys.modules works; this was a copilot-created test. real_hashes = {} From 7bea4976cecbaa984217883ff319221084c58482 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Fri, 20 Mar 2026 09:25:02 -0600 Subject: [PATCH 06/11] chore: suggestions from copilot review --- src/sweeper/sweepers/duplicates.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/sweeper/sweepers/duplicates.py b/src/sweeper/sweepers/duplicates.py index a7f8e91..54b0fc8 100644 --- a/src/sweeper/sweepers/duplicates.py +++ b/src/sweeper/sweepers/duplicates.py @@ -2,11 +2,10 @@ # * coding: utf8 * import logging import re +import typing from typing import Generator import arcpy -from arcpy._mp import Layer, Table -from arcpy.typing.gp import Result1 from xxhash import xxh64 from .base import SweeperBase @@ -137,7 +136,10 @@ def try_fix(self): finally: arcpy.management.SelectLayerByAttribute(all_features, "CLEAR_SELECTION") - report["fixes"].append(f"{successful_deletes} records deleted successfully") + if arcpy.Exists(temp_feature_layer): + arcpy.management.Delete(temp_feature_layer) + if successful_deletes > 0: + report["fixes"].append(f"{successful_deletes} records deleted successfully") if failed_deletes > 0: report["issues"].append( f"{failed_batches} batch(es) with {failed_deletes} total records had errors deleting." @@ -163,17 +165,17 @@ def _chunk_oid_list(lst: list, chunk_size: int) -> Generator[list, None, None]: yield lst[i : i + chunk_size] @staticmethod - def _make_feature_layer( - feature_class: str, temp_layer_name: str, is_table: bool - ) -> Result1[str | Table | Layer] | Result1[str | Layer]: + def _make_feature_layer(feature_class: str, temp_layer_name: str, is_table: bool) -> typing.Any: """Single method to handle table or layer creation based on is_table parameter""" + #: arcpy's typing gets really convoluted, so we're just using typing.Any. + if is_table: return arcpy.management.MakeTableView(feature_class, temp_layer_name) else: return arcpy.management.MakeFeatureLayer(feature_class, temp_layer_name) @staticmethod - def _delete_features_or_rows(feature_layer: Result1[str | Table | Layer] | Result1[str | Layer], is_table: bool): + def _delete_features_or_rows(feature_layer: typing.Any, is_table: bool): """Single method to handle deleting features or rows based on is_table parameter""" if is_table: arcpy.management.DeleteRows(feature_layer) From 689d4e198f134e34d6c9d191ea996817c81b0ce0 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Fri, 20 Mar 2026 10:06:38 -0600 Subject: [PATCH 07/11] chore: Add an OID selection check to safeguard delete --- src/sweeper/sweepers/duplicates.py | 32 ++++++---- tests/test_duplicates.py | 93 ++++++++++++++++++++++++++++-- 2 files changed, 110 insertions(+), 15 deletions(-) diff --git a/src/sweeper/sweepers/duplicates.py b/src/sweeper/sweepers/duplicates.py index 54b0fc8..5b54a6a 100644 --- a/src/sweeper/sweepers/duplicates.py +++ b/src/sweeper/sweepers/duplicates.py @@ -117,7 +117,7 @@ def try_fix(self): failed_batches = 0 with arcpy.EnvManager(workspace=self.workspace): - all_features = self._make_feature_layer(self.table_name, temp_feature_layer, self.is_table) + all_features = self._make_feature_layer(temp_feature_layer) for index, list_of_oids in enumerate(lists_of_oids, start=1): sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in list_of_oids])})' @@ -125,7 +125,11 @@ def try_fix(self): try: log.info(f"Batch {index}: attempting to delete {len(list_of_oids)} duplicate records") arcpy.management.SelectLayerByAttribute(all_features, "NEW_SELECTION", sql) - self._delete_features_or_rows(all_features, self.is_table) + if not self._valid_selection(all_features, list_of_oids): + raise RuntimeError( + f"Invalid selection for batch {index}. The OIDs in the selection do not match the expected OIDs." + ) + self._delete_features_or_rows(all_features) successful_deletes += len(list_of_oids) except Exception as error: error_message = f"unable to delete features in batch {index}: {error}" @@ -164,20 +168,28 @@ def _chunk_oid_list(lst: list, chunk_size: int) -> Generator[list, None, None]: for i in range(0, len(lst), chunk_size): yield lst[i : i + chunk_size] - @staticmethod - def _make_feature_layer(feature_class: str, temp_layer_name: str, is_table: bool) -> typing.Any: + def _make_feature_layer(self, temp_layer_name: str) -> typing.Any: """Single method to handle table or layer creation based on is_table parameter""" #: arcpy's typing gets really convoluted, so we're just using typing.Any. - if is_table: - return arcpy.management.MakeTableView(feature_class, temp_layer_name) + if self.is_table: + return arcpy.management.MakeTableView(self.table_name, temp_layer_name) else: - return arcpy.management.MakeFeatureLayer(feature_class, temp_layer_name) + return arcpy.management.MakeFeatureLayer(self.table_name, temp_layer_name) - @staticmethod - def _delete_features_or_rows(feature_layer: typing.Any, is_table: bool): + def _delete_features_or_rows(self, feature_layer: typing.Any): """Single method to handle deleting features or rows based on is_table parameter""" - if is_table: + if self.is_table: arcpy.management.DeleteRows(feature_layer) else: arcpy.management.DeleteFeatures(feature_layer) + + @staticmethod + def _valid_selection(feature_layer_or_table: typing.Any, list_of_oids: list) -> bool: + """Makes sure the selection is valid by comparing the OBJECTIDs in the selection to the list of OBJECTIDs used to create the where clause. This is a safeguard to prevent deleting the entire table if something goes wrong with the where clause.""" + with arcpy.da.SearchCursor(feature_layer_or_table, ["OID@"]) as cursor: + selected_oids = {str(row[0]) for row in cursor} + if set([str(oid) for oid in list_of_oids]) != selected_oids: + log.info("Selected OIDs do not match expected OIDs.") + return False + return True diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py index 4a5bbf3..6b5ddfa 100644 --- a/tests/test_duplicates.py +++ b/tests/test_duplicates.py @@ -1,5 +1,5 @@ import sys -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch #: Mock arcpy before importing the module under test arcpy_mock = MagicMock() @@ -50,27 +50,39 @@ def test_chunk_oid_list_exactly_two_chunks(self): class TestMakeFeatureLayer: + def setup_method(self): + arcpy_mock.reset_mock() + def test_make_feature_layer_is_table_calls_make_table_view(self): - DuplicateTest._make_feature_layer("my_table", "temp_layer", True) + sweeper = DuplicateTest("workspace", "my_table") + sweeper.is_table = True + sweeper._make_feature_layer("temp_layer") arcpy_mock.management.MakeTableView.assert_called_once_with("my_table", "temp_layer") def test_make_feature_layer_is_not_table_calls_make_feature_layer(self): - DuplicateTest._make_feature_layer("my_fc", "temp_layer", False) + sweeper = DuplicateTest("workspace", "my_fc") + sweeper._make_feature_layer("temp_layer") arcpy_mock.management.MakeFeatureLayer.assert_called_once_with("my_fc", "temp_layer") class TestDeleteFeaturesOrRows: + def setup_method(self): + arcpy_mock.reset_mock() + def test_delete_features_or_rows_is_table_calls_delete_rows(self): + sweeper = DuplicateTest("workspace", "my_table") + sweeper.is_table = True layer_mock = MagicMock() - DuplicateTest._delete_features_or_rows(layer_mock, True) + sweeper._delete_features_or_rows(layer_mock) arcpy_mock.management.DeleteRows.assert_called_once_with(layer_mock) def test_delete_features_or_rows_is_not_table_calls_delete_features(self): + sweeper = DuplicateTest("workspace", "my_fc") layer_mock = MagicMock() - DuplicateTest._delete_features_or_rows(layer_mock, False) + sweeper._delete_features_or_rows(layer_mock) arcpy_mock.management.DeleteFeatures.assert_called_once_with(layer_mock) @@ -81,6 +93,11 @@ def setup_method(self): arcpy_mock.management.SelectLayerByAttribute.side_effect = None arcpy_mock.EnvManager.return_value.__enter__ = MagicMock(return_value=None) arcpy_mock.EnvManager.return_value.__exit__ = MagicMock(return_value=False) + self._valid_selection_patcher = patch.object(DuplicateTest, "_valid_selection", return_value=True) + self._valid_selection_patcher.start() + + def teardown_method(self): + self._valid_selection_patcher.stop() def test_try_fix_no_oids_returns_empty_report(self): sweeper = DuplicateTest("workspace", "my_table") @@ -178,6 +195,72 @@ def select_side_effect(*args, **kwargs): assert any("500 records deleted successfully" in fix for fix in report["fixes"]) assert any("1 batch(es) with 1000 total records had errors deleting." in issue for issue in report["issues"]) + def test_try_fix_invalid_selection_reports_error(self): + sweeper = DuplicateTest("workspace", "my_table") + sweeper.oids_with_issues = [1, 2, 3] + layer_mock = MagicMock() + arcpy_mock.management.MakeFeatureLayer.return_value = layer_mock + + with patch.object(DuplicateTest, "_valid_selection", return_value=False): + report = sweeper.try_fix() + + assert any("Invalid selection for batch 1" in issue for issue in report["issues"]) + assert any("1 batch(es) with 3 total records had errors deleting." in issue for issue in report["issues"]) + + def test_try_fix_continues_after_invalid_selection_batch(self): + sweeper = DuplicateTest("workspace", "my_table") + sweeper.oids_with_issues = list(range(1500)) + layer_mock = MagicMock() + arcpy_mock.management.MakeFeatureLayer.return_value = layer_mock + + #: Return False for batch 1, True for all subsequent batches + valid_selection_results = iter([False, True, True]) + with patch.object(DuplicateTest, "_valid_selection", side_effect=valid_selection_results): + report = sweeper.try_fix() + + assert any("500 records deleted successfully" in fix for fix in report["fixes"]) + assert any("Invalid selection for batch 1" in issue for issue in report["issues"]) + assert any("1 batch(es) with 1000 total records had errors deleting." in issue for issue in report["issues"]) + + +class TestValidSelection: + def setup_method(self): + arcpy_mock.reset_mock() + + def _make_cursor_mock(self, rows): + cursor_mock = MagicMock() + cursor_mock.__enter__ = MagicMock(return_value=iter(rows)) + cursor_mock.__exit__ = MagicMock(return_value=False) + arcpy_mock.da.SearchCursor.return_value = cursor_mock + + def test_valid_selection_returns_true_when_oids_match(self): + self._make_cursor_mock([(1,), (2,), (3,)]) + + result = DuplicateTest._valid_selection(MagicMock(), [1, 2, 3]) + + assert result is True + + def test_valid_selection_returns_false_when_oids_do_not_match(self): + self._make_cursor_mock([(1,), (2,)]) + + result = DuplicateTest._valid_selection(MagicMock(), [1, 2, 3]) + + assert result is False + + def test_valid_selection_returns_false_when_selection_is_empty(self): + self._make_cursor_mock([]) + + result = DuplicateTest._valid_selection(MagicMock(), [1, 2, 3]) + + assert result is False + + def test_valid_selection_returns_true_for_single_oid_match(self): + self._make_cursor_mock([(42,)]) + + result = DuplicateTest._valid_selection(MagicMock(), [42]) + + assert result is True + #: This test class was added by copilot when creating tests for the try_fix method. They pass, but I've not verified if they're actually sane. class TestSweep: From f7f613edcfc040563a2fb5a9cdffc2a81dceb8db Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Fri, 20 Mar 2026 10:17:12 -0600 Subject: [PATCH 08/11] Update tests/test_duplicates.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_duplicates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py index 6b5ddfa..7969ae2 100644 --- a/tests/test_duplicates.py +++ b/tests/test_duplicates.py @@ -445,11 +445,11 @@ def test_sweep_returns_correct_report_structure(self): description = { "dataType": "Table", "hasGlobalID": False, - "hasOID": False, + "hasOID": True, "OIDFieldName": "OBJECTID", "shapeFieldName": "Shape", "globalIDFieldName": "GlobalID", - "fields": [self._make_field("Name")], + "fields": [self._make_field("Name"), self._make_field("OBJECTID")], } arcpy_mock.da.Describe.return_value = description From 41715a05be08745730f298c16da16dea657b8f63 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Fri, 20 Mar 2026 10:22:10 -0600 Subject: [PATCH 09/11] Update tests/test_duplicates.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- tests/test_duplicates.py | 46 +++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py index 7969ae2..1012bef 100644 --- a/tests/test_duplicates.py +++ b/tests/test_duplicates.py @@ -1,18 +1,48 @@ import sys from unittest.mock import MagicMock, patch +import pytest + #: Mock arcpy before importing the module under test arcpy_mock = MagicMock() -sys.modules["arcpy"] = arcpy_mock -sys.modules["arcpy.da"] = arcpy_mock.da -sys.modules["arcpy._mp"] = arcpy_mock._mp -sys.modules["arcpy.typing"] = arcpy_mock.typing -sys.modules["arcpy.typing.gp"] = arcpy_mock.typing.gp -sys.modules["xxhash"] = MagicMock() - -from sweeper.sweepers.duplicates import DuplicateTest # noqa: E402 +@pytest.fixture(autouse=True, scope="module") +def _mock_arcpy_and_xxhash(): + """Mock arcpy (and submodules) and xxhash for this test module only.""" + # Save original modules so we can restore them after the tests run. + module_names = [ + "arcpy", + "arcpy.da", + "arcpy._mp", + "arcpy.typing", + "arcpy.typing.gp", + "xxhash", + ] + originals = {name: sys.modules.get(name) for name in module_names} + + # Install mocks into sys.modules for the duration of this module's tests. + sys.modules["arcpy"] = arcpy_mock + sys.modules["arcpy.da"] = arcpy_mock.da + sys.modules["arcpy._mp"] = arcpy_mock._mp + sys.modules["arcpy.typing"] = arcpy_mock.typing + sys.modules["arcpy.typing.gp"] = arcpy_mock.typing.gp + sys.modules["xxhash"] = MagicMock() + + from sweeper.sweepers.duplicates import DuplicateTest as _DuplicateTest # noqa: E402 + + # Expose DuplicateTest at module level so existing tests continue to work. + globals()["DuplicateTest"] = _DuplicateTest + + try: + yield + finally: + # Restore the original sys.modules entries. + for name, original in originals.items(): + if original is None: + sys.modules.pop(name, None) + else: + sys.modules[name] = original class TestChunkOidList: def test_chunk_oid_list_small_list_returns_single_chunk(self): lst = [1, 2, 3] From aac15b5273b91c19ccc113936080e2c1a24f52f4 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 10:42:33 -0600 Subject: [PATCH 10/11] fix(tests): resolve ruff F821 errors for DuplicateTest in test_duplicates.py (#148) * Initial plan * fix(tests): resolve ruff F821 errors by importing DuplicateTest at module level Co-authored-by: jacobdadams <38168030+jacobdadams@users.noreply.github.com> Agent-Logs-Url: https://github.com/agrc/sweeper/sessions/ba6f55df-5b0c-43fd-a7dd-25596cf24014 --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jacobdadams <38168030+jacobdadams@users.noreply.github.com> --- tests/test_duplicates.py | 67 +++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py index 1012bef..e3acb88 100644 --- a/tests/test_duplicates.py +++ b/tests/test_duplicates.py @@ -3,46 +3,41 @@ import pytest -#: Mock arcpy before importing the module under test +# ruff: isort: off +#: Mock arcpy and its submodules before importing the module under test arcpy_mock = MagicMock() +_MOCK_MODULE_NAMES = [ + "arcpy", + "arcpy.da", + "arcpy._mp", + "arcpy.typing", + "arcpy.typing.gp", + "xxhash", +] +_originals = {name: sys.modules.get(name) for name in _MOCK_MODULE_NAMES} +sys.modules["arcpy"] = arcpy_mock +sys.modules["arcpy.da"] = arcpy_mock.da +sys.modules["arcpy._mp"] = arcpy_mock._mp +sys.modules["arcpy.typing"] = arcpy_mock.typing +sys.modules["arcpy.typing.gp"] = arcpy_mock.typing.gp +sys.modules["xxhash"] = MagicMock() + +from sweeper.sweepers.duplicates import DuplicateTest # noqa: E402 + +# ruff: isort: on @pytest.fixture(autouse=True, scope="module") -def _mock_arcpy_and_xxhash(): - """Mock arcpy (and submodules) and xxhash for this test module only.""" - # Save original modules so we can restore them after the tests run. - module_names = [ - "arcpy", - "arcpy.da", - "arcpy._mp", - "arcpy.typing", - "arcpy.typing.gp", - "xxhash", - ] - originals = {name: sys.modules.get(name) for name in module_names} - - # Install mocks into sys.modules for the duration of this module's tests. - sys.modules["arcpy"] = arcpy_mock - sys.modules["arcpy.da"] = arcpy_mock.da - sys.modules["arcpy._mp"] = arcpy_mock._mp - sys.modules["arcpy.typing"] = arcpy_mock.typing - sys.modules["arcpy.typing.gp"] = arcpy_mock.typing.gp - sys.modules["xxhash"] = MagicMock() - - from sweeper.sweepers.duplicates import DuplicateTest as _DuplicateTest # noqa: E402 - - # Expose DuplicateTest at module level so existing tests continue to work. - globals()["DuplicateTest"] = _DuplicateTest - - try: - yield - finally: - # Restore the original sys.modules entries. - for name, original in originals.items(): - if original is None: - sys.modules.pop(name, None) - else: - sys.modules[name] = original +def _restore_sys_modules(): + """Restore sys.modules entries modified by this module after all tests complete.""" + yield + for name, original in _originals.items(): + if original is None: + sys.modules.pop(name, None) + else: + sys.modules[name] = original + + class TestChunkOidList: def test_chunk_oid_list_small_list_returns_single_chunk(self): lst = [1, 2, 3] From 5b4e75ccb9f2be7fd696df3f98f4d18a2db1348c Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Mon, 23 Mar 2026 16:41:07 -0600 Subject: [PATCH 11/11] chore: PR suggestion Co-authored-by: Scott Davis --- src/sweeper/sweepers/duplicates.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sweeper/sweepers/duplicates.py b/src/sweeper/sweepers/duplicates.py index 5b54a6a..044d5b8 100644 --- a/src/sweeper/sweepers/duplicates.py +++ b/src/sweeper/sweepers/duplicates.py @@ -103,7 +103,6 @@ def try_fix(self): chunk_size = 1000 - # sql = f'"OBJECTID" IN ({",".join([str(oid) for oid in self.oids_with_issues])})' lists_of_oids = list(self._chunk_oid_list(self.oids_with_issues, chunk_size)) temp_feature_layer = "temp_layer"