From 0d49c3ab2b31d88f48fb775067565d46d13060ba Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Fri, 3 Apr 2026 11:20:57 -0500 Subject: [PATCH 1/9] BUG: add sort() method to StringArray and ArrowStringArray (GH#64977) In pandas 3.x, Series.unique() started returning StringArray/ArrowStringArray instead of numpy.ndarray. Since numpy arrays have an in-place sort() method but extension arrays didn't, code that called .sort() on unique() results broke. Added sort() to NDArrayBackedExtensionArray and ArrowExtensionArray, and tests covering ascending, descending, NA placement, and the original repro. --- pandas/core/arrays/_mixins.py | 37 +++++++++++++++++++++ pandas/core/arrays/arrow/array.py | 37 +++++++++++++++++++++ pandas/tests/arrays/string_/test_string.py | 38 ++++++++++++++++++++++ 3 files changed, 112 insertions(+) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 22214bfd9ab87..c4454df604854 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -62,6 +62,7 @@ ScalarIndexer, SequenceIndexer, Shape, + SortKind, TakeIndexer, npt, ) @@ -231,6 +232,42 @@ def unique(self) -> Self: new_data = unique(self._ndarray) return self._from_backing_data(new_data) + def sort( + self, + *, + ascending: bool = True, + kind: SortKind = "quicksort", + na_position: str = "last", + ) -> None: + """ + Sort the array in-place. + + Parameters + ---------- + ascending : bool, default True + Whether to sort in ascending order. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Sorting algorithm. + na_position : {'first', 'last'}, default 'last' + If 'first', put NaN values at the beginning. + If 'last', put NaN values at the end. + + Returns + ------- + None + + Examples + -------- + >>> arr = pd.array(["b", "a", "c"], dtype="str") + >>> arr.sort() + >>> arr + + ['a', 'b', 'c'] + Length: 3, dtype: str + """ + sort_indices = self.argsort(ascending=ascending, kind=kind, na_position=na_position) + self._ndarray[:] = self._ndarray[sort_indices] + @classmethod def _concat_same_type( cls, diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index ae84482cc1684..d1279e99e3faa 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1389,6 +1389,43 @@ def argsort( np_result = result.to_numpy() return np_result.astype(np.intp, copy=False) + def sort( + self, + *, + ascending: bool = True, + kind: SortKind = "quicksort", + na_position: str = "last", + ) -> None: + """ + Sort the array in-place. + + Parameters + ---------- + ascending : bool, default True + Whether to sort in ascending order. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Sorting algorithm. + na_position : {'first', 'last'}, default 'last' + If 'first', put NaN values at the beginning. + If 'last', put NaN values at the end. + + Returns + ------- + None + + Examples + -------- + >>> arr = pd.array(["b", "a", "c"], dtype="str[pyarrow]") + >>> arr.sort() + >>> arr + + ['a', 'b', 'c'] + Length: 3, dtype: str + """ + sort_indices = self.argsort(ascending=ascending, kind=kind, na_position=na_position) + sorted_array = self.take(sort_indices) + self._pa_array = sorted_array._pa_array + def _argmin_max(self, skipna: bool, method: str) -> int: if self._pa_array.length() in (0, self._pa_array.null_count) or ( self._hasna and not skipna diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 3dd98f1f3a123..b86401c9725ea 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -650,3 +650,41 @@ def test_numpy_random_permute(dtype, box): result = rng.permutation(arr) assert isinstance(result, np.ndarray) assert sorted(result.tolist()) == ["a", "bb", "ccc"] + + +def test_sort_inplace(dtype): + arr = pd.array(["Bob", "Alice", "Charlie"], dtype=dtype) + result = arr.sort() + assert result is None + expected = pd.array(["Alice", "Bob", "Charlie"], dtype=dtype) + tm.assert_extension_array_equal(arr, expected) + + +def test_sort_descending(dtype): + arr = pd.array(["Bob", "Alice", "Charlie"], dtype=dtype) + arr.sort(ascending=False) + expected = pd.array(["Charlie", "Bob", "Alice"], dtype=dtype) + tm.assert_extension_array_equal(arr, expected) + + +def test_sort_with_na(dtype): + na = pd.NA if dtype.na_value is pd.NA else np.nan + arr = pd.array(["Bob", na, "Alice"], dtype=dtype) + arr.sort(na_position="last") + expected = pd.array(["Alice", "Bob", na], dtype=dtype) + tm.assert_extension_array_equal(arr, expected) + + arr2 = pd.array(["Bob", na, "Alice"], dtype=dtype) + arr2.sort(na_position="first") + expected2 = pd.array([na, "Alice", "Bob"], dtype=dtype) + tm.assert_extension_array_equal(arr2, expected2) + + +def test_sort_unique_result(dtype): + entries = [{"name": "Bob", "age": 30}, {"name": "Alice", "age": 25}] + df = pd.DataFrame(entries) + with pd.option_context("mode.string_storage", dtype.storage): + unique_names = df["name"].astype(dtype).unique() + unique_names.sort() + expected = pd.array(["Alice", "Bob"], dtype=dtype) + tm.assert_extension_array_equal(unique_names, expected) From 6fe3a9e78168037d832092a1c947752da8689c83 Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Fri, 3 Apr 2026 11:26:31 -0500 Subject: [PATCH 2/9] CLN: fix line length in sort() methods --- pandas/core/arrays/_mixins.py | 4 +++- pandas/core/arrays/arrow/array.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index c4454df604854..ed0aed886cab8 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -265,7 +265,9 @@ def sort( ['a', 'b', 'c'] Length: 3, dtype: str """ - sort_indices = self.argsort(ascending=ascending, kind=kind, na_position=na_position) + sort_indices = self.argsort( + ascending=ascending, kind=kind, na_position=na_position + ) self._ndarray[:] = self._ndarray[sort_indices] @classmethod diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index d1279e99e3faa..875a9dea584e5 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1422,7 +1422,9 @@ def sort( ['a', 'b', 'c'] Length: 3, dtype: str """ - sort_indices = self.argsort(ascending=ascending, kind=kind, na_position=na_position) + sort_indices = self.argsort( + ascending=ascending, kind=kind, na_position=na_position + ) sorted_array = self.take(sort_indices) self._pa_array = sorted_array._pa_array From 07e32814e0644419246fbeee61df6cbc8d7c437d Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Fri, 3 Apr 2026 12:26:01 -0500 Subject: [PATCH 3/9] DOC: fix doctest dtype strings in sort() methods --- pandas/core/arrays/_mixins.py | 4 ++-- pandas/core/arrays/arrow/array.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index ed0aed886cab8..fb30958a3607b 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -258,12 +258,12 @@ def sort( Examples -------- - >>> arr = pd.array(["b", "a", "c"], dtype="str") + >>> arr = pd.array(["b", "a", "c"], dtype="string[python]") >>> arr.sort() >>> arr ['a', 'b', 'c'] - Length: 3, dtype: str + Length: 3, dtype: string """ sort_indices = self.argsort( ascending=ascending, kind=kind, na_position=na_position diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 875a9dea584e5..9e1902f857d46 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1415,12 +1415,12 @@ def sort( Examples -------- - >>> arr = pd.array(["b", "a", "c"], dtype="str[pyarrow]") + >>> arr = pd.array(["b", "a", "c"], dtype="string[pyarrow]") >>> arr.sort() >>> arr ['a', 'b', 'c'] - Length: 3, dtype: str + Length: 3, dtype: string """ sort_indices = self.argsort( ascending=ascending, kind=kind, na_position=na_position From 6f0b32469a42f38b4028261972cbb7a0f8de9616 Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Fri, 3 Apr 2026 21:41:19 -0500 Subject: [PATCH 4/9] BUG: add sort() to base ExtensionArray; keep efficient overrides in subclasses --- pandas/core/arrays/_mixins.py | 26 ------------------- pandas/core/arrays/arrow/array.py | 26 ------------------- pandas/core/arrays/base.py | 42 +++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 52 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index fb30958a3607b..7acf5b8c1c025 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -239,32 +239,6 @@ def sort( kind: SortKind = "quicksort", na_position: str = "last", ) -> None: - """ - Sort the array in-place. - - Parameters - ---------- - ascending : bool, default True - Whether to sort in ascending order. - kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' - Sorting algorithm. - na_position : {'first', 'last'}, default 'last' - If 'first', put NaN values at the beginning. - If 'last', put NaN values at the end. - - Returns - ------- - None - - Examples - -------- - >>> arr = pd.array(["b", "a", "c"], dtype="string[python]") - >>> arr.sort() - >>> arr - - ['a', 'b', 'c'] - Length: 3, dtype: string - """ sort_indices = self.argsort( ascending=ascending, kind=kind, na_position=na_position ) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 9e1902f857d46..cbe48d3e2db52 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1396,32 +1396,6 @@ def sort( kind: SortKind = "quicksort", na_position: str = "last", ) -> None: - """ - Sort the array in-place. - - Parameters - ---------- - ascending : bool, default True - Whether to sort in ascending order. - kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' - Sorting algorithm. - na_position : {'first', 'last'}, default 'last' - If 'first', put NaN values at the beginning. - If 'last', put NaN values at the end. - - Returns - ------- - None - - Examples - -------- - >>> arr = pd.array(["b", "a", "c"], dtype="string[pyarrow]") - >>> arr.sort() - >>> arr - - ['a', 'b', 'c'] - Length: 3, dtype: string - """ sort_indices = self.argsort( ascending=ascending, kind=kind, na_position=na_position ) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 756a6017d2c02..c2c160aaa7bfc 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1067,6 +1067,48 @@ def argsort( mask=np.asarray(self.isna()), ) + def sort( + self, + *, + ascending: bool = True, + kind: SortKind = "quicksort", + na_position: str = "last", + ) -> None: + """ + Sort the array in-place. + + Parameters + ---------- + ascending : bool, default True + Whether to sort in ascending order. + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' + Sorting algorithm. + na_position : {'first', 'last'}, default 'last' + If 'first', put NaN values at the beginning. + If 'last', put NaN values at the end. + + Returns + ------- + None + + See Also + -------- + ExtensionArray.argsort : Return the indices that would sort this array. + + Examples + -------- + >>> arr = pd.array([3, 1, 2, 5, 4]) + >>> arr.sort() + >>> arr + + [1, 2, 3, 4, 5] + Length: 5, dtype: Int64 + """ + sort_indices = self.argsort( + ascending=ascending, kind=kind, na_position=na_position + ) + self[:] = self.take(sort_indices) + def argmin(self, skipna: bool = True) -> int: """ Return the index of minimum value. From 0a477cfe9f9a508414d2a43e77fdb97bd7a72beb Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Tue, 5 May 2026 19:32:02 -0500 Subject: [PATCH 5/9] DOC/TEST: address review feedback on ExtensionArray.sort PR 1. Add whatsnew entry under Other enhancements in v3.1.0.rst 2. Add GH issue reference comments to new sort tests 3. Simplify NA handling in test_sort_with_na to use dtype.na_value directly 4. Parametrize test_sort_with_na on na_position to remove duplicated body --- doc/source/whatsnew/v3.1.0.rst | 1 + pandas/tests/arrays/string_/test_string.py | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index 85c03396a33a2..b03e46adcd89a 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -33,6 +33,7 @@ Other enhancements - :meth:`Timestamp.round`, :meth:`Timestamp.floor`, and :meth:`Timestamp.ceil` now officially accept :class:`Timedelta` arguments (:issue:`63687`) - :meth:`ExtensionArray.map` now calls :meth:`ExtensionArray._cast_pointwise_result` to retain the dtype backend, e.g. Arrow-backed arrays now preserve their Arrow dtype through ``map`` (:issue:`57189`, :issue:`62164`) - Added :meth:`ExtensionArray.count` (:issue:`64450`) +- Added :meth:`ExtensionArray.sort` for in-place sorting of :class:`ExtensionArray` (:issue:`64977`) - Added :meth:`Index.replace` method to support value replacement functionality similar to :meth:`Series.replace` (:issue:`19495`) - Display formatting for float sequences in DataFrame cells now respects the ``display.precision`` option (:issue:`60503`). - Improved the precision of float parsing in :func:`read_csv` (:issue:`64395`) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index b86401c9725ea..5c302da874045 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -653,6 +653,7 @@ def test_numpy_random_permute(dtype, box): def test_sort_inplace(dtype): + # https://github.com/pandas-dev/pandas/issues/64977 arr = pd.array(["Bob", "Alice", "Charlie"], dtype=dtype) result = arr.sort() assert result is None @@ -661,26 +662,28 @@ def test_sort_inplace(dtype): def test_sort_descending(dtype): + # https://github.com/pandas-dev/pandas/issues/64977 arr = pd.array(["Bob", "Alice", "Charlie"], dtype=dtype) arr.sort(ascending=False) expected = pd.array(["Charlie", "Bob", "Alice"], dtype=dtype) tm.assert_extension_array_equal(arr, expected) -def test_sort_with_na(dtype): - na = pd.NA if dtype.na_value is pd.NA else np.nan +@pytest.mark.parametrize("na_position", ["first", "last"]) +def test_sort_with_na(dtype, na_position): + # https://github.com/pandas-dev/pandas/issues/64977 + na = dtype.na_value arr = pd.array(["Bob", na, "Alice"], dtype=dtype) - arr.sort(na_position="last") - expected = pd.array(["Alice", "Bob", na], dtype=dtype) + arr.sort(na_position=na_position) + if na_position == "last": + expected = pd.array(["Alice", "Bob", na], dtype=dtype) + else: + expected = pd.array([na, "Alice", "Bob"], dtype=dtype) tm.assert_extension_array_equal(arr, expected) - arr2 = pd.array(["Bob", na, "Alice"], dtype=dtype) - arr2.sort(na_position="first") - expected2 = pd.array([na, "Alice", "Bob"], dtype=dtype) - tm.assert_extension_array_equal(arr2, expected2) - def test_sort_unique_result(dtype): + # https://github.com/pandas-dev/pandas/issues/64977 entries = [{"name": "Bob", "age": 30}, {"name": "Alice", "age": 25}] df = pd.DataFrame(entries) with pd.option_context("mode.string_storage", dtype.storage): From 10f0c15baf7a35c77ca2ccf533b9f5d5e02f9ace Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Fri, 15 May 2026 15:31:10 -0500 Subject: [PATCH 6/9] TST: move ExtensionArray.sort() tests to shared base suite Per review feedback on GH#65052, hoist the in-place sort tests from the StringArray-specific test file into BaseMethodsTests so every ExtensionArray subclass exercises the inherited sort() method through data_for_sorting / data_missing_for_sorting fixtures. --- pandas/tests/arrays/string_/test_string.py | 30 ---------------------- pandas/tests/extension/base/methods.py | 26 +++++++++++++++++++ 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 5c302da874045..298e8c5269806 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -652,36 +652,6 @@ def test_numpy_random_permute(dtype, box): assert sorted(result.tolist()) == ["a", "bb", "ccc"] -def test_sort_inplace(dtype): - # https://github.com/pandas-dev/pandas/issues/64977 - arr = pd.array(["Bob", "Alice", "Charlie"], dtype=dtype) - result = arr.sort() - assert result is None - expected = pd.array(["Alice", "Bob", "Charlie"], dtype=dtype) - tm.assert_extension_array_equal(arr, expected) - - -def test_sort_descending(dtype): - # https://github.com/pandas-dev/pandas/issues/64977 - arr = pd.array(["Bob", "Alice", "Charlie"], dtype=dtype) - arr.sort(ascending=False) - expected = pd.array(["Charlie", "Bob", "Alice"], dtype=dtype) - tm.assert_extension_array_equal(arr, expected) - - -@pytest.mark.parametrize("na_position", ["first", "last"]) -def test_sort_with_na(dtype, na_position): - # https://github.com/pandas-dev/pandas/issues/64977 - na = dtype.na_value - arr = pd.array(["Bob", na, "Alice"], dtype=dtype) - arr.sort(na_position=na_position) - if na_position == "last": - expected = pd.array(["Alice", "Bob", na], dtype=dtype) - else: - expected = pd.array([na, "Alice", "Bob"], dtype=dtype) - tm.assert_extension_array_equal(arr, expected) - - def test_sort_unique_result(dtype): # https://github.com/pandas-dev/pandas/issues/64977 entries = [{"name": "Bob", "age": 30}, {"name": "Alice", "age": 25}] diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 46a2f048fed3f..caeac087a4aa1 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -292,6 +292,32 @@ def test_sort_values_frame(self, data_for_sorting, ascending): ) tm.assert_frame_equal(result, expected) + def test_sort_inplace(self, data_for_sorting): + arr = data_for_sorting.copy() + result = arr.sort() + assert result is None + expected = data_for_sorting.take([2, 0, 1]) + tm.assert_extension_array_equal(arr, expected) + + def test_sort_inplace_descending(self, data_for_sorting): + arr = data_for_sorting.copy() + arr.sort(ascending=False) + if pd.Series(data_for_sorting).nunique() == 2: + expected = data_for_sorting.take([0, 1, 2]) + else: + expected = data_for_sorting.take([1, 0, 2]) + tm.assert_extension_array_equal(arr, expected) + + @pytest.mark.parametrize("na_position", ["first", "last"]) + def test_sort_inplace_na_position(self, data_missing_for_sorting, na_position): + arr = data_missing_for_sorting.copy() + arr.sort(na_position=na_position) + if na_position == "last": + expected = data_missing_for_sorting.take([2, 0, 1]) + else: + expected = data_missing_for_sorting.take([1, 2, 0]) + tm.assert_extension_array_equal(arr, expected) + @pytest.mark.parametrize("ascending", [True, False]) def test_rank(self, data_for_sorting, ascending): ser = pd.Series(data_for_sorting) From b5931abc4655b833493dff29196e4e285c963c34 Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Fri, 15 May 2026 15:31:35 -0500 Subject: [PATCH 7/9] BUG: raise NotImplementedError for SparseArray.sort() SparseArray.__setitem__ raises TypeError, so the base ExtensionArray.sort() implementation (which does self[:] = self.take(...)) failed with a confusing internal error. Override sort() to raise NotImplementedError cleanly and assert that behavior in the extension test suite. Surfaced by the new shared sort tests in BaseMethodsTests. --- pandas/core/arrays/sparse/array.py | 10 ++++++++++ pandas/tests/extension/test_sparse.py | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index cb508a6f3d429..a6a17c8f5c56a 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -127,6 +127,7 @@ def tocsc(self, /) -> csc_array | csc_matrix: ... Scalar, ScalarIndexer, SequenceIndexer, + SortKind, npt, ) @@ -614,6 +615,15 @@ def __setitem__(self, key, value) -> None: msg = "SparseArray does not support item assignment via setitem" raise TypeError(msg) + def sort( + self, + *, + ascending: bool = True, + kind: SortKind = "quicksort", + na_position: str = "last", + ) -> None: + raise NotImplementedError("SparseArray does not support in-place sort") + @classmethod def _from_sequence( cls, scalars, *, dtype: Dtype | None = None, copy: bool = False diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 6a460b3ef1496..ff09156e3a927 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -330,6 +330,19 @@ def test_searchsorted(self, performance_warning, data_for_sorting, as_series): with tm.assert_produces_warning(performance_warning, check_stacklevel=False): super().test_searchsorted(data_for_sorting, as_series) + def test_sort_inplace(self, data_for_sorting): + with pytest.raises(NotImplementedError): + data_for_sorting.sort() + + def test_sort_inplace_descending(self, data_for_sorting): + with pytest.raises(NotImplementedError): + data_for_sorting.sort(ascending=False) + + @pytest.mark.parametrize("na_position", ["first", "last"]) + def test_sort_inplace_na_position(self, data_missing_for_sorting, na_position): + with pytest.raises(NotImplementedError): + data_missing_for_sorting.sort(na_position=na_position) + def test_shift_0_periods(self, data): # GH#33856 shifting with periods=0 should return a copy, not same obj result = data.shift(0) From 3f7eefc3489bcbf1584706e37acb6c57db3dcdce Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Fri, 15 May 2026 15:31:56 -0500 Subject: [PATCH 8/9] TST: handle slice keys in JSONArray test-helper __setitem__ JSONArray.__setitem__ in the extension test helper didn't iterate over slice keys, so any self[:] = ... path (including the inherited ExtensionArray.sort()) blew up with "slice object is not iterable". Expand slice keys to a range before dispatching. Three pre-existing xfail markers on test_setitem_slice* now XPASS, so remove or scope them accordingly. --- pandas/tests/extension/json/array.py | 3 +++ pandas/tests/extension/json/test_json.py | 20 ++++++-------------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 9adb72877b333..0b48cbb402d14 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -143,6 +143,9 @@ def __setitem__(self, key, value) -> None: if isinstance(key, numbers.Integral): self.data[key] = value else: + if isinstance(key, slice): + key = range(*key.indices(len(self))) + if not isinstance(value, (type(self), abc.Sequence)): # broadcast value value = itertools.cycle([value]) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index bee7ece3a4cf8..52bc2d31972ef 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -404,12 +404,6 @@ def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): def test_setitem_scalar_key_sequence_raise(self, data): super().test_setitem_scalar_key_sequence_raise(data) - def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request): - if "full_slice" in request.node.name: - mark = pytest.mark.xfail(reason="slice is not iterable") - request.applymarker(mark) - super().test_setitem_with_expansion_dataframe_column(data, full_indexer) - @pytest.mark.xfail(reason="slice is not iterable") def test_setitem_frame_2d_values(self, data): super().test_setitem_frame_2d_values(data) @@ -421,10 +415,12 @@ def test_setitem_frame_2d_values(self, data): def test_setitem_mask_broadcast(self, data, setter): super().test_setitem_mask_broadcast(data, setter) - @pytest.mark.xfail( - reason="cannot set using a slice indexer with a different length" - ) - def test_setitem_slice(self, data, box_in_series): + def test_setitem_slice(self, data, box_in_series, request): + if box_in_series: + mark = pytest.mark.xfail( + reason="cannot set using a slice indexer with a different length" + ) + request.applymarker(mark) super().test_setitem_slice(data, box_in_series) @pytest.mark.xfail(reason="slice object is not iterable") @@ -435,10 +431,6 @@ def test_setitem_loc_iloc_slice(self, data): def test_setitem_slice_mismatch_length_raises(self, data): super().test_setitem_slice_mismatch_length_raises(data) - @pytest.mark.xfail(reason="slice object is not iterable") - def test_setitem_slice_array(self, data): - super().test_setitem_slice_array(data) - @pytest.mark.xfail(reason="Fail to raise") def test_setitem_invalid(self, data, invalid_scalar): super().test_setitem_invalid(data, invalid_scalar) From 9680a574cc01e2de354fcd6deb288a0cbaa7f185 Mon Sep 17 00:00:00 2001 From: Samaresh Kumar Singh Date: Mon, 1 Jun 2026 18:16:11 -0500 Subject: [PATCH 9/9] Address review feedback on the ExtensionArray.sort PR Simplified test_sort_unique_result to construct the StringArray directly rather than going through a DataFrame, since the bug is just about unique followed by sort. Also listed ExtensionArray.sort in the extensions reference page next to argsort so the new public method is documented. --- doc/source/reference/extensions.rst | 1 + pandas/tests/arrays/string_/test_string.py | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index c51989ca54730..843bf1af493ae 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -65,6 +65,7 @@ objects. api.extensions.ExtensionArray.repeat api.extensions.ExtensionArray.searchsorted api.extensions.ExtensionArray.shift + api.extensions.ExtensionArray.sort api.extensions.ExtensionArray.take api.extensions.ExtensionArray.unique api.extensions.ExtensionArray.dtype diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 298e8c5269806..c2ec72a7f471a 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -654,10 +654,8 @@ def test_numpy_random_permute(dtype, box): def test_sort_unique_result(dtype): # https://github.com/pandas-dev/pandas/issues/64977 - entries = [{"name": "Bob", "age": 30}, {"name": "Alice", "age": 25}] - df = pd.DataFrame(entries) - with pd.option_context("mode.string_storage", dtype.storage): - unique_names = df["name"].astype(dtype).unique() + arr = pd.array(["Bob", "Alice", "Bob"], dtype=dtype) + unique_names = arr.unique() unique_names.sort() expected = pd.array(["Alice", "Bob"], dtype=dtype) tm.assert_extension_array_equal(unique_names, expected)