From 8b681caafa4d9f763ab842fc6d259eea7548eff0 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 1 Jan 2022 22:38:09 +0100 Subject: [PATCH 1/4] BUG: clip raising with na series bounds for datetimes or ea int --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/generic.py | 8 +++++++- pandas/tests/series/methods/test_clip.py | 16 ++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 4c3e53ddcfa26..95825c814d57e 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -749,6 +749,7 @@ Numeric - Bug in :class:`DataFrame` arithmetic ops with a subclass whose :meth:`_constructor` attribute is a callable other than the subclass itself (:issue:`43201`) - Bug in arithmetic operations involving :class:`RangeIndex` where the result would have the incorrect ``name`` (:issue:`43962`) - Bug in arithmetic operations involving :class:`Series` where the result could have the incorrect ``name`` when the operands having matching NA or matching tuple names (:issue:`44459`) +- Bug in :meth:`Series.clip` raising if bounds are a :class:`Series` with ``NA`` values for datetimes or nullable integer dtypes (:issue:`44785`) - Bug in division with ``IntegerDtype`` or ``BooleanDtype`` array and NA scalar incorrectly raising (:issue:`44685`) - Bug in multiplying a :class:`Series` with ``FloatingDtype`` with a timedelta-like scalar incorrectly raising (:issue:`44772`) - diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1e25b0f4eb176..1fe41376647a7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7381,7 +7381,13 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): # GH 40420 # Treat missing thresholds as no bounds, not clipping the values if is_list_like(threshold): - fill_value = np.inf if method.__name__ == "le" else -np.inf + method_name_le = method.__name__ == "le" + if is_datetime64_any_dtype(self.dtype): + fill_value = Timestamp.max if method_name_le else Timestamp.min + elif is_extension_array_dtype(self.dtype): + fill_value = self.max() if method_name_le else self.min() + else: + fill_value = np.inf if method_name_le else -np.inf threshold_inf = threshold.fillna(fill_value) else: threshold_inf = threshold diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index bc6d5aeb0a581..29c50b7bf5ca4 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -89,6 +89,14 @@ def test_clip_against_series(self): tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) + def test_clip_against_series_ea_int_dtype(self, any_int_ea_dtype): + # GH#44785 + ser = Series([1, 1], dtype=any_int_ea_dtype) + bounds = Series([pd.NA, 1], dtype=any_int_ea_dtype) + expected = ser.copy() + result = ser.clip(bounds) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])]) def test_clip_against_list_like(self, inplace, upper): @@ -138,6 +146,14 @@ def test_clip_with_timestamps_and_oob_datetimes(self): tm.assert_series_equal(result, expected) + def test_clip_timestamp_and_na(self): + # GH#44785 + ser = Series([Timestamp("1970-01-01")] * 2) + bounds = Series([pd.NaT, Timestamp("1970-01-01")]) + expected = ser.copy() + result = ser.clip(bounds) + tm.assert_series_equal(result, expected) + def test_clip_pos_args_deprecation(self): # https://github.com/pandas-dev/pandas/issues/41485 ser = Series([1, 2, 3]) From 4441f446cf8bac4040760ebb37c465b29be9fc46 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 1 Jan 2022 22:39:38 +0100 Subject: [PATCH 2/4] Adjust tests --- pandas/tests/series/methods/test_clip.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index 29c50b7bf5ca4..000f590f3754d 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -89,10 +89,11 @@ def test_clip_against_series(self): tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) - def test_clip_against_series_ea_int_dtype(self, any_int_ea_dtype): + @pytest.mark.parametrize("bound_values", [[pd.NA, 1], [1, pd.NA]]) + def test_clip_against_series_ea_int_dtype(self, any_int_ea_dtype, bound_values): # GH#44785 ser = Series([1, 1], dtype=any_int_ea_dtype) - bounds = Series([pd.NA, 1], dtype=any_int_ea_dtype) + bounds = Series(bound_values, dtype=any_int_ea_dtype) expected = ser.copy() result = ser.clip(bounds) tm.assert_series_equal(result, expected) @@ -146,10 +147,14 @@ def test_clip_with_timestamps_and_oob_datetimes(self): tm.assert_series_equal(result, expected) - def test_clip_timestamp_and_na(self): + @pytest.mark.parametrize( + "bound_values", + [[pd.NaT, Timestamp("1970-01-01")], [Timestamp("1970-01-01"), pd.NaT]], + ) + def test_clip_timestamp_and_na(self, bound_values): # GH#44785 ser = Series([Timestamp("1970-01-01")] * 2) - bounds = Series([pd.NaT, Timestamp("1970-01-01")]) + bounds = Series(bound_values) expected = ser.copy() result = ser.clip(bounds) tm.assert_series_equal(result, expected) From 2a420a25dd6c7971318d1e448df3f2b0ddedc255 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 1 Jan 2022 23:15:01 +0100 Subject: [PATCH 3/4] Add dim check --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1fe41376647a7..e0eaea01e775e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7382,9 +7382,9 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): # Treat missing thresholds as no bounds, not clipping the values if is_list_like(threshold): method_name_le = method.__name__ == "le" - if is_datetime64_any_dtype(self.dtype): + if self.ndim == 1 and is_datetime64_any_dtype(self.dtype): fill_value = Timestamp.max if method_name_le else Timestamp.min - elif is_extension_array_dtype(self.dtype): + elif self.ndim == 1 and is_extension_array_dtype(self.dtype): fill_value = self.max() if method_name_le else self.min() else: fill_value = np.inf if method_name_le else -np.inf From c09df12c35a35031c4c4e347f5b55c5996b481d6 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 1 Jan 2022 23:20:37 +0100 Subject: [PATCH 4/4] Adjust test --- pandas/core/generic.py | 7 ++++--- pandas/tests/series/methods/test_clip.py | 15 ++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e0eaea01e775e..ba69847c55814 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7382,9 +7382,10 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): # Treat missing thresholds as no bounds, not clipping the values if is_list_like(threshold): method_name_le = method.__name__ == "le" - if self.ndim == 1 and is_datetime64_any_dtype(self.dtype): - fill_value = Timestamp.max if method_name_le else Timestamp.min - elif self.ndim == 1 and is_extension_array_dtype(self.dtype): + if self.ndim == 1 and ( + is_extension_array_dtype(self.dtype) + or is_datetime64_any_dtype(self.dtype) + ): fill_value = self.max() if method_name_le else self.min() else: fill_value = np.inf if method_name_le else -np.inf diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index 000f590f3754d..8a1f1bdaea091 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -147,15 +147,16 @@ def test_clip_with_timestamps_and_oob_datetimes(self): tm.assert_series_equal(result, expected) - @pytest.mark.parametrize( - "bound_values", - [[pd.NaT, Timestamp("1970-01-01")], [Timestamp("1970-01-01"), pd.NaT]], - ) - def test_clip_timestamp_and_na(self, bound_values): + def test_clip_timestamp_and_na(self, tz_naive_fixture): # GH#44785 - ser = Series([Timestamp("1970-01-01")] * 2) - bounds = Series(bound_values) + ser = Series([Timestamp("1970-01-01", tz=tz_naive_fixture)] * 2) expected = ser.copy() + + bounds = Series([pd.NaT, Timestamp("1970-01-01", tz=tz_naive_fixture)]) + result = ser.clip(bounds) + tm.assert_series_equal(result, expected) + + bounds = Series([Timestamp("1970-01-01", tz=tz_naive_fixture), pd.NaT]) result = ser.clip(bounds) tm.assert_series_equal(result, expected)