From 35803bcd053f98eed65b770c1365c043b479013f Mon Sep 17 00:00:00 2001 From: lif <1835304752@qq.com> Date: Thu, 11 Dec 2025 18:23:30 +0800 Subject: [PATCH 1/3] BUG: Fix NA comparison inconsistency for object dtype Series Fix comp_method_OBJECT_ARRAY to return BooleanArray when input contains pd.NA values, ensuring NA is properly propagated in comparison results instead of returning False. Closes #63328 --- pandas/core/ops/array_ops.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 7b21772b443f6..d179a280d3e55 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -113,6 +113,9 @@ def fill_binop(left, right, fill_value): def comp_method_OBJECT_ARRAY(op, x, y): + from pandas._libs import missing as libmissing + from pandas.core.arrays import BooleanArray + if isinstance(y, list): # e.g. test_tuple_categories y = construct_1d_object_array_from_listlike(y) @@ -129,7 +132,31 @@ def comp_method_OBJECT_ARRAY(op, x, y): result = libops.vec_compare(x.ravel(), y.ravel(), op) else: result = libops.scalar_compare(x.ravel(), y, op) - return result.reshape(x.shape) + result = result.reshape(x.shape) + + # GH#63328: Check if there are pd.NA values in the input and return + # BooleanArray to properly propagate NA in comparisons + x_has_na = any(val is libmissing.NA for val in x.ravel()) + y_has_na = ( + is_scalar(y) and y is libmissing.NA + ) or ( + isinstance(y, np.ndarray) + and any(val is libmissing.NA for val in y.ravel()) + ) + + if x_has_na or y_has_na: + # Create a mask for NA values + mask = np.array([val is libmissing.NA for val in x.ravel()], dtype=bool) + if isinstance(y, np.ndarray): + mask = mask | np.array( + [val is libmissing.NA for val in y.ravel()], dtype=bool + ) + elif y is libmissing.NA: + mask = np.ones(x.shape, dtype=bool) + mask = mask.reshape(x.shape) + return BooleanArray(result, mask, copy=False) + + return result def _masked_arith_op(x: np.ndarray, y, op) -> np.ndarray: From 6a7648548dae3402e6391bba25c22f3b888cc3d2 Mon Sep 17 00:00:00 2001 From: lif <1835304752@qq.com> Date: Fri, 12 Dec 2025 11:05:26 +0800 Subject: [PATCH 2/3] BUG: Fix NA comparison inconsistency for object dtype Series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix `comp_method_OBJECT_ARRAY` to return `BooleanArray` when input contains `pd.NA` values, ensuring NA is properly propagated in comparison results instead of returning `False`. Previously, when comparing an object dtype Series containing `pd.NA` with a scalar, the vectorized comparison would return `False` for NA positions instead of ``. Closes #63328 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- pandas/core/ops/array_ops.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index d179a280d3e55..f383b2ce77195 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -114,6 +114,7 @@ def fill_binop(left, right, fill_value): def comp_method_OBJECT_ARRAY(op, x, y): from pandas._libs import missing as libmissing + from pandas.core.arrays import BooleanArray if isinstance(y, list): @@ -137,11 +138,8 @@ def comp_method_OBJECT_ARRAY(op, x, y): # GH#63328: Check if there are pd.NA values in the input and return # BooleanArray to properly propagate NA in comparisons x_has_na = any(val is libmissing.NA for val in x.ravel()) - y_has_na = ( - is_scalar(y) and y is libmissing.NA - ) or ( - isinstance(y, np.ndarray) - and any(val is libmissing.NA for val in y.ravel()) + y_has_na = (is_scalar(y) and y is libmissing.NA) or ( + isinstance(y, np.ndarray) and any(val is libmissing.NA for val in y.ravel()) ) if x_has_na or y_has_na: From 3c7b795fd0d957d31e18a2c87244fda5050a8721 Mon Sep 17 00:00:00 2001 From: majiayu000 <1835304752@qq.com> Date: Sun, 14 Dec 2025 00:38:00 +0800 Subject: [PATCH 3/3] TST: Add test for NA comparison in object dtype Series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test_comparison_with_na_object_dtype to verify that pd.NA values are properly propagated in comparison results instead of returning False. Closes #63328 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- pandas/tests/series/test_arithmetic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index a77e55612e23d..7c0598b0cff1e 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -803,6 +803,20 @@ def test_compare_series_interval_keyword(self): expected = Series([True, False, False]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("comparison_op", [operator.eq, operator.ne]) + def test_comparison_with_na_object_dtype(self, comparison_op): + # GH#63328 - NA comparison should propagate NA in results + ser = Series([1, 2, pd.NA]) + + result = comparison_op(ser, 3) + + if comparison_op is operator.eq: + expected = Series([False, False, pd.NA], dtype="boolean") + else: + expected = Series([True, True, pd.NA], dtype="boolean") + + tm.assert_series_equal(result, expected) + # ------------------------------------------------------------------ # Unsorted