Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/reference/extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ objects.
api.extensions.ExtensionArray.repeat
api.extensions.ExtensionArray.searchsorted
api.extensions.ExtensionArray.shift
api.extensions.ExtensionArray.sort
api.extensions.ExtensionArray.take
api.extensions.ExtensionArray.unique
api.extensions.ExtensionArray.dtype
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Other enhancements
- :meth:`ExtensionArray.map` now calls :meth:`ExtensionArray._cast_pointwise_result` to retain the dtype backend, e.g. Arrow-backed arrays now preserve their Arrow dtype through ``map`` (:issue:`57189`, :issue:`62164`)
- :func:`read_csv` now supports ``dtype="complex64"`` and ``dtype="complex128"`` with the C engine, enabling round-tripping of complex-number columns written by :meth:`DataFrame.to_csv` (:issue:`9379`)
- Added :meth:`ExtensionArray.count` (:issue:`64450`)
- Added :meth:`ExtensionArray.sort` for in-place sorting of :class:`ExtensionArray` (:issue:`64977`)
- Added :meth:`Index.replace` method to support value replacement functionality similar to :meth:`Series.replace` (:issue:`19495`)
- Display formatting for float sequences in DataFrame cells now respects the ``display.precision`` option (:issue:`60503`).
- Improved the precision of float parsing in :func:`read_csv` (:issue:`64395`)
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
ScalarIndexer,
SequenceIndexer,
Shape,
SortKind,
TakeIndexer,
npt,
)
Expand Down Expand Up @@ -231,6 +232,18 @@ def unique(self) -> Self:
new_data = unique(self._ndarray)
return self._from_backing_data(new_data)

def sort(
self,
*,
ascending: bool = True,
kind: SortKind = "quicksort",
na_position: str = "last",
) -> None:
sort_indices = self.argsort(
ascending=ascending, kind=kind, na_position=na_position
)
self._ndarray[:] = self._ndarray[sort_indices]

@classmethod
def _concat_same_type(
cls,
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1391,6 +1391,19 @@ def argsort(
np_result = result.to_numpy()
return np_result.astype(np.intp, copy=False)

def sort(
self,
*,
ascending: bool = True,
kind: SortKind = "quicksort",
na_position: str = "last",
) -> None:
sort_indices = self.argsort(
ascending=ascending, kind=kind, na_position=na_position
)
sorted_array = self.take(sort_indices)
self._pa_array = sorted_array._pa_array

def _argmin_max(self, skipna: bool, method: str) -> int:
if self._pa_array.length() in (0, self._pa_array.null_count) or (
self._hasna and not skipna
Expand Down
42 changes: 42 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,48 @@ def argsort(
mask=np.asarray(self.isna()),
)

def sort(
self,
*,
ascending: bool = True,
kind: SortKind = "quicksort",
na_position: str = "last",
) -> None:
"""
Sort the array in-place.
Comment thread
ssam18 marked this conversation as resolved.

Parameters
----------
ascending : bool, default True
Whether to sort in ascending order.
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
Sorting algorithm.
na_position : {'first', 'last'}, default 'last'
If 'first', put NaN values at the beginning.
If 'last', put NaN values at the end.

Returns
-------
None

See Also
--------
ExtensionArray.argsort : Return the indices that would sort this array.

Examples
--------
>>> arr = pd.array([3, 1, 2, 5, 4])
>>> arr.sort()
>>> arr
<IntegerArray>
[1, 2, 3, 4, 5]
Length: 5, dtype: Int64
"""
sort_indices = self.argsort(
ascending=ascending, kind=kind, na_position=na_position
)
self[:] = self.take(sort_indices)

def argmin(self, skipna: bool = True) -> int:
"""
Return the index of minimum value.
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def tocsc(self, /) -> csc_array | csc_matrix: ...
Scalar,
ScalarIndexer,
SequenceIndexer,
SortKind,
npt,
)

Expand Down Expand Up @@ -614,6 +615,15 @@ def __setitem__(self, key, value) -> None:
msg = "SparseArray does not support item assignment via setitem"
raise TypeError(msg)

def sort(
self,
*,
ascending: bool = True,
kind: SortKind = "quicksort",
na_position: str = "last",
) -> None:
raise NotImplementedError("SparseArray does not support in-place sort")

@classmethod
def _from_sequence(
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,3 +650,12 @@ def test_numpy_random_permute(dtype, box):
result = rng.permutation(arr)
assert isinstance(result, np.ndarray)
assert sorted(result.tolist()) == ["a", "bb", "ccc"]


def test_sort_unique_result(dtype):
# https://github.com/pandas-dev/pandas/issues/64977
arr = pd.array(["Bob", "Alice", "Bob"], dtype=dtype)
unique_names = arr.unique()
unique_names.sort()
expected = pd.array(["Alice", "Bob"], dtype=dtype)
tm.assert_extension_array_equal(unique_names, expected)
26 changes: 26 additions & 0 deletions pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,32 @@ def test_sort_values_frame(self, data_for_sorting, ascending):
)
tm.assert_frame_equal(result, expected)

def test_sort_inplace(self, data_for_sorting):
arr = data_for_sorting.copy()
result = arr.sort()
assert result is None
expected = data_for_sorting.take([2, 0, 1])
tm.assert_extension_array_equal(arr, expected)

def test_sort_inplace_descending(self, data_for_sorting):
arr = data_for_sorting.copy()
arr.sort(ascending=False)
if pd.Series(data_for_sorting).nunique() == 2:
expected = data_for_sorting.take([0, 1, 2])
else:
expected = data_for_sorting.take([1, 0, 2])
tm.assert_extension_array_equal(arr, expected)

@pytest.mark.parametrize("na_position", ["first", "last"])
def test_sort_inplace_na_position(self, data_missing_for_sorting, na_position):
arr = data_missing_for_sorting.copy()
arr.sort(na_position=na_position)
if na_position == "last":
expected = data_missing_for_sorting.take([2, 0, 1])
else:
expected = data_missing_for_sorting.take([1, 2, 0])
tm.assert_extension_array_equal(arr, expected)

@pytest.mark.parametrize("ascending", [True, False])
def test_rank(self, data_for_sorting, ascending):
ser = pd.Series(data_for_sorting)
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ def __setitem__(self, key, value) -> None:
if isinstance(key, numbers.Integral):
self.data[key] = value
else:
if isinstance(key, slice):
key = range(*key.indices(len(self)))

if not isinstance(value, (type(self), abc.Sequence)):
# broadcast value
value = itertools.cycle([value])
Expand Down
20 changes: 6 additions & 14 deletions pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,12 +396,6 @@ def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
def test_setitem_scalar_key_sequence_raise(self, data):
super().test_setitem_scalar_key_sequence_raise(data)

def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request):
if "full_slice" in request.node.name:
mark = pytest.mark.xfail(reason="slice is not iterable")
request.applymarker(mark)
super().test_setitem_with_expansion_dataframe_column(data, full_indexer)

@pytest.mark.xfail(reason="slice is not iterable")
def test_setitem_frame_2d_values(self, data):
super().test_setitem_frame_2d_values(data)
Expand All @@ -413,10 +407,12 @@ def test_setitem_frame_2d_values(self, data):
def test_setitem_mask_broadcast(self, data, setter):
super().test_setitem_mask_broadcast(data, setter)

@pytest.mark.xfail(
reason="cannot set using a slice indexer with a different length"
)
def test_setitem_slice(self, data, box_in_series):
def test_setitem_slice(self, data, box_in_series, request):
if box_in_series:
mark = pytest.mark.xfail(
reason="cannot set using a slice indexer with a different length"
)
request.applymarker(mark)
super().test_setitem_slice(data, box_in_series)

@pytest.mark.xfail(reason="slice object is not iterable")
Expand All @@ -427,10 +423,6 @@ def test_setitem_loc_iloc_slice(self, data):
def test_setitem_slice_mismatch_length_raises(self, data):
super().test_setitem_slice_mismatch_length_raises(data)

@pytest.mark.xfail(reason="slice object is not iterable")
def test_setitem_slice_array(self, data):
super().test_setitem_slice_array(data)

@pytest.mark.xfail(reason="Fail to raise")
def test_setitem_invalid(self, data, invalid_scalar):
super().test_setitem_invalid(data, invalid_scalar)
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,19 @@ def test_searchsorted(self, performance_warning, data_for_sorting, as_series):
with tm.assert_produces_warning(performance_warning, check_stacklevel=False):
super().test_searchsorted(data_for_sorting, as_series)

def test_sort_inplace(self, data_for_sorting):
with pytest.raises(NotImplementedError):
data_for_sorting.sort()

def test_sort_inplace_descending(self, data_for_sorting):
with pytest.raises(NotImplementedError):
data_for_sorting.sort(ascending=False)

@pytest.mark.parametrize("na_position", ["first", "last"])
def test_sort_inplace_na_position(self, data_missing_for_sorting, na_position):
with pytest.raises(NotImplementedError):
data_missing_for_sorting.sort(na_position=na_position)

def test_shift_0_periods(self, data):
# GH#33856 shifting with periods=0 should return a copy, not same obj
result = data.shift(0)
Expand Down
Loading