From c5f0eb47243592700f3ea33892767df9e2f34d89 Mon Sep 17 00:00:00 2001 From: huhu-dsy Date: Fri, 12 Dec 2025 14:39:52 +0800 Subject: [PATCH 1/2] 1 --- pandas/core/arrays/arrow/accessors.py | 43 +++++++++++++++++-- .../series/accessors/test_list_accessor.py | 20 +++++++++ 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py index 7f3da9be0c03d..8415eb8b92918 100644 --- a/pandas/core/arrays/arrow/accessors.py +++ b/pandas/core/arrays/arrow/accessors.py @@ -156,10 +156,45 @@ def __getitem__(self, key: int | slice) -> Series: from pandas import Series if isinstance(key, int): - # TODO: Support negative key but pyarrow does not allow - # element index to be an array. - # if key < 0: - # key = pc.add(key, pc.list_value_length(self._pa_array)) + if key < 0: + arr = self._pa_array + lengths = pc.list_value_length(arr) + not_null = pc.is_valid(arr) + length_zero = pc.equal(lengths, 0) + length_too_short = pc.less(lengths, abs(key)) + should_error = pc.and_(not_null, pc.or_(length_zero, length_too_short)) + if pc.any(should_error).as_py(): + for i in range(len(arr)): + if not arr.is_null()[i].as_py(): + current_length = lengths[i].as_py() + if current_length == 0: + raise IndexError(f"Index {key} is out of bounds: should be in [0, 0)") + if current_length < abs(key): + raise IndexError(f"Index {key} is out of bounds: should be in [{-current_length}, {current_length})") + chunks = arr.chunks if isinstance(arr, pa.ChunkedArray) else [arr] + all_results = [] + for chunk in chunks: + if len(chunk) == 0: + continue + chunk_lengths = pc.list_value_length(chunk) + chunk_offsets = chunk.offsets + offsets = chunk_offsets.slice(0, len(chunk)) + indices = pc.add(pc.add(offsets, chunk_lengths), key) + taken_values = chunk.values.take(indices) + if chunk.null_count > 0: + mask = chunk.is_null() + null_scalar = pa.scalar(None, type=chunk.type.value_type) + chunk_result = pc.if_else(mask, null_scalar, taken_values) + else: + chunk_result = taken_values + all_results.append(chunk_result) + result_values = pa.concat_arrays(all_results) if all_results else pa.array([],type=arr.type.value_type) + return Series( + result_values, + dtype=ArrowDtype(result_values.type), + index=self._data.index, + name=self._data.name, + ) element = pc.list_element(self._pa_array, key) return Series( element, diff --git a/pandas/tests/series/accessors/test_list_accessor.py b/pandas/tests/series/accessors/test_list_accessor.py index 3541592e7c51e..18399d9880f0b 100644 --- a/pandas/tests/series/accessors/test_list_accessor.py +++ b/pandas/tests/series/accessors/test_list_accessor.py @@ -30,6 +30,26 @@ def test_list_getitem(list_dtype): tm.assert_series_equal(actual, expected) +def test_list_getitem_negative_index(): + ser = Series( + [[1, 2, 3], [4, None, 5], None], + dtype=ArrowDtype(pa.list_(pa.int64())), + name="a", + ) + actual = ser.list[-1] + expected = Series([3, 5, None], dtype="int64[pyarrow]", name="a") + tm.assert_series_equal(actual, expected) + + ser_empty = Series([[]], dtype=ArrowDtype(pa.list_(pa.int64()))) + with pytest.raises(IndexError, match="Index -1 is out of bounds: should be in \\[0, 0\\)"): + ser_empty.list[-1] + + ser_mixed = Series([[1], [1, 2], [1, 2, 3]], dtype=ArrowDtype(pa.list_(pa.int64()))) + actual = ser_mixed.list[-1] + expected = Series([1, 2, 3], dtype="int64[pyarrow]") + tm.assert_series_equal(actual, expected) + + def test_list_getitem_index(): # GH 58425 ser = Series( From ecda5731c64e4468a77a64b8a070103df434d344 Mon Sep 17 00:00:00 2001 From: huhu-dsy Date: Fri, 12 Dec 2025 15:28:54 +0800 Subject: [PATCH 2/2] geshi --- pandas/core/arrays/arrow/accessors.py | 10 +++++++--- pandas/tests/series/accessors/test_list_accessor.py | 3 ++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py index 8415eb8b92918..e42020ad575bb 100644 --- a/pandas/core/arrays/arrow/accessors.py +++ b/pandas/core/arrays/arrow/accessors.py @@ -168,9 +168,11 @@ def __getitem__(self, key: int | slice) -> Series: if not arr.is_null()[i].as_py(): current_length = lengths[i].as_py() if current_length == 0: - raise IndexError(f"Index {key} is out of bounds: should be in [0, 0)") + raise IndexError( +f"Index {key} is out of bounds: should be in [0, 0)") if current_length < abs(key): - raise IndexError(f"Index {key} is out of bounds: should be in [{-current_length}, {current_length})") + raise IndexError( +f"Index {key} is out of bounds: should be in [{-current_length}, {current_length})") chunks = arr.chunks if isinstance(arr, pa.ChunkedArray) else [arr] all_results = [] for chunk in chunks: @@ -188,7 +190,9 @@ def __getitem__(self, key: int | slice) -> Series: else: chunk_result = taken_values all_results.append(chunk_result) - result_values = pa.concat_arrays(all_results) if all_results else pa.array([],type=arr.type.value_type) + result_values = pa.concat_arrays( + all_results) if all_results else pa.array([], + type=arr.type.value_type) return Series( result_values, dtype=ArrowDtype(result_values.type), diff --git a/pandas/tests/series/accessors/test_list_accessor.py b/pandas/tests/series/accessors/test_list_accessor.py index 18399d9880f0b..c47feb8594023 100644 --- a/pandas/tests/series/accessors/test_list_accessor.py +++ b/pandas/tests/series/accessors/test_list_accessor.py @@ -41,7 +41,8 @@ def test_list_getitem_negative_index(): tm.assert_series_equal(actual, expected) ser_empty = Series([[]], dtype=ArrowDtype(pa.list_(pa.int64()))) - with pytest.raises(IndexError, match="Index -1 is out of bounds: should be in \\[0, 0\\)"): + with pytest.raises( + IndexError, match="Index -1 is out of bounds: should be in \\[0, 0\\)"): ser_empty.list[-1] ser_mixed = Series([[1], [1, 2], [1, 2, 3]], dtype=ArrowDtype(pa.list_(pa.int64())))