From 26a4fc8a873ae66176cfbc15528cf25dc71c8aaa Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Dec 2025 10:35:56 +0100 Subject: [PATCH] BUG: ensure we still honor copy=True in Series constructor in all cases --- pandas/core/series.py | 22 +++++++---- pandas/tests/copy_view/test_constructors.py | 41 +++++++++++++++++++-- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1ea8bbbaa0cfb..5d2c6654c63b3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -265,8 +265,12 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] See the :ref:`user guide ` for more usages. name : Hashable, default None The name to give to the Series. - copy : bool, default False - Copy input data. Only affects Series or 1d ndarray input. See examples. + copy : bool, default None + Copy input data. By default, will copy if the input data is a numpy or + pandas array. + Set to False to avoid copying, at your own risk (if you know the input + data won't be modified elsewhere). + Only affects Series or 1d ndarray input. See examples. See Also -------- @@ -397,6 +401,7 @@ def __init__( if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): data = data.copy() + copy = False if copy is None: copy = False @@ -411,6 +416,7 @@ def __init__( Pandas4Warning, stacklevel=2, ) + allow_mgr = True name = ibase.maybe_extract_name(name, data, type(self)) @@ -436,9 +442,8 @@ def __init__( if isinstance(data, Index): if dtype is not None: data = data.astype(dtype) - - refs = data._references - copy = False + if not copy: + refs = data._references elif isinstance(data, np.ndarray): if len(data.dtype): @@ -454,8 +459,9 @@ def __init__( data = data._mgr.copy(deep=False) else: data = data.reindex(index) - copy = False data = data._mgr + if data._has_no_reference(0): + copy = False elif isinstance(data, Mapping): data, index = self._init_dict(data, index, dtype) dtype = None @@ -500,8 +506,10 @@ def __init__( # create/copy the manager if isinstance(data, SingleBlockManager): if dtype is not None: + if not astype_is_view(data.dtype, pandas_dtype(dtype)): + copy = False data = data.astype(dtype=dtype) - elif copy: + if copy: data = data.copy(deep=True) else: data = sanitize_array(data, index, dtype, copy) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 4e4df4ba3cf22..72fc814d25d45 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -47,6 +47,16 @@ def test_series_from_series(dtype): ser.iloc[0] = 0 assert result.iloc[0] == 1 + # forcing copy=False still gives a CoW shallow copy + result = Series(ser, dtype=dtype, copy=False) + assert np.shares_memory(get_array(ser), get_array(result)) + assert result._mgr.blocks[0].refs.has_reference() + + # forcing copy=True still results in an actual hard copy up front + result = Series(ser, dtype=dtype, copy=True) + assert not np.shares_memory(get_array(ser), get_array(result)) + assert ser._mgr._has_no_reference(0) + def test_series_from_series_with_reindex(): # Case: constructing a Series from another Series with specifying an index @@ -54,7 +64,7 @@ def test_series_from_series_with_reindex(): ser = Series([1, 2, 3], name="name") # passing an index that doesn't actually require a reindex of the values - # -> without CoW we get an actual mutating view + # -> still getting a CoW shallow copy for index in [ ser.index, ser.index.copy(), @@ -66,6 +76,11 @@ def test_series_from_series_with_reindex(): result.iloc[0] = 0 assert ser.iloc[0] == 1 + # forcing copy=True still results in an actual hard copy up front + result = Series(ser, index=index, copy=True) + assert not np.shares_memory(ser.values, result.values) + assert not result._mgr.blocks[0].refs.has_reference() + # ensure that if an actual reindex is needed, we don't have any refs # (mutating the result wouldn't trigger CoW) result = Series(ser, index=[0, 1, 2, 3]) @@ -87,6 +102,13 @@ def test_series_from_array(idx, dtype, arr): arr[0] = 100 tm.assert_series_equal(ser, ser_orig) + # if the user explicitly passes copy=False, we get an actual view + # not protected by CoW + ser = Series(arr, dtype=dtype, index=idx, copy=False) + assert np.shares_memory(get_array(ser), data) + arr[0] = 50 + assert ser.iloc[0] == 50 + @pytest.mark.parametrize("copy", [True, False, None]) def test_series_from_array_different_dtype(copy): @@ -112,9 +134,22 @@ def test_series_from_index(idx): ser.iloc[0] = ser.iloc[1] tm.assert_index_equal(idx, expected) + # forcing copy=False still gives a CoW shallow copy + ser = Series(idx, copy=False) + assert np.shares_memory(get_array(ser), get_array(idx)) + assert not ser._mgr._has_no_reference(0) + ser.iloc[0] = ser.iloc[1] + tm.assert_index_equal(idx, expected) + + # forcing copy=True still results in a copy + ser = Series(idx, copy=True) + assert not np.shares_memory(get_array(ser), get_array(idx)) + assert ser._mgr._has_no_reference(0) -def test_series_from_index_different_dtypes(): - idx = Index([1, 2, 3], dtype="int64") + +@pytest.mark.parametrize("copy", [True, False, None]) +def test_series_from_index_different_dtypes(copy): + idx = Index([1, 2, 3], dtype="int64", copy=copy) ser = Series(idx, dtype="int32") assert not np.shares_memory(get_array(ser), get_array(idx)) assert ser._mgr._has_no_reference(0)