pandas-dev
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 4 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎ci/code_checks.sh‎
Lines changed: 1 addition & 0 deletions b/‎ci/code_checks.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 60 additions & 11 deletions b/‎doc/source/whatsnew/v3.0.0.rst‎
Lines changed: 60 additions & 11 deletions
diff --git a/‎pandas/_config/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎pandas/_config/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/_libs/hashtable_class_helper.pxi.in‎
Lines changed: 2 additions & 2 deletions b/‎pandas/_libs/hashtable_class_helper.pxi.in‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/index.pyx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/lib.pyx‎
Lines changed: 0 additions & 6 deletions b/‎pandas/_libs/lib.pyx‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎pandas/_libs/tslib.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslib.pyx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/tslibs/conversion.pxd‎
Lines changed: 0 additions & 3 deletions b/‎pandas/_libs/tslibs/conversion.pxd‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎pandas/_libs/tslibs/conversion.pyx‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/tslibs/conversion.pyx‎
Lines changed: 1 addition & 1 deletion
@@ -19,7 +19,7 @@ ci:
     skip: [pyright, mypy]
 repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.14.3
+    rev: v0.14.7
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -71,7 +71,7 @@ repos:
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v3.21.0
+    rev: v3.21.2
     hooks:
     -   id: pyupgrade
         args: [--py311-plus]
@@ -87,12 +87,12 @@ repos:
         types: [text]  # overwrite types: [rst]
         types_or: [python, rst]
 -   repo: https://github.com/sphinx-contrib/sphinx-lint
-    rev: v1.0.1
+    rev: v1.0.2
     hooks:
     - id: sphinx-lint
       args: ["--enable", "all", "--disable", "line-too-long"]
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v21.1.2
+    rev: v21.1.6
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include
 
@@ -72,6 +72,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
+        -i "pandas.errors.ChainedAssignmentError SA01" \
         -i "pandas.errors.IncompatibleFrequency SA01,SS06,EX01" \
         -i "pandas.api.extensions.ExtensionArray.value_counts EX01,RT03,SA01" \
         -i "pandas.api.typing.DataFrameGroupBy.plot PR02" \
 
@@ -117,6 +117,9 @@ process in more detail.
 
     `PDEP-7: Consistent copy/view semantics in pandas with Copy-on-Write <https://pandas.pydata.org/pdeps/0007-copy-on-write.html>`__
 
+Setting the option ``mode.copy_on_write`` no longer has any impact. The option is deprecated
+and will be removed in pandas 4.0.
+
 .. _whatsnew_300.enhancements.col:
 
 ``pd.col`` syntax can now be used in :meth:`DataFrame.assign` and :meth:`DataFrame.loc`
@@ -382,6 +385,8 @@ In cases with mixed-resolution inputs, the highest resolution is used:
 
 .. warning:: Many users will now get "M8[us]" dtype data in cases when they used to get "M8[ns]". For most use cases they should not notice a difference. One big exception is converting to integers, which will give integers 1000x smaller.
 
+Similarly, the :class:`Timedelta` constructor and :func:`to_timedelta` with a string input now defaults to a microsecond unit, using nanosecond unit only in cases that actually have nanosecond precision.
+
 .. _whatsnew_300.api_breaking.concat_datetime_sorting:
 
 :func:`concat` no longer ignores ``sort`` when all objects have a :class:`DatetimeIndex`
@@ -548,29 +553,55 @@ small behavior differences as collateral:
 Changed treatment of NaN values in pyarrow and numpy-nullable floating dtypes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Previously, when dealing with a nullable dtype (e.g. ``Float64Dtype`` or ``int64[pyarrow]``), ``NaN`` was treated as interchangeable with :class:`NA` in some circumstances but not others. This was done to make adoption easier, but caused some confusion (:issue:`32265`). In 3.0, an option ``"mode.nan_is_na"`` (default ``True``) controls whether to treat ``NaN`` as equivalent to :class:`NA`.
+Previously, when dealing with a nullable dtype (e.g. ``Float64Dtype`` or ``int64[pyarrow]``),
+``NaN`` was treated as interchangeable with :class:`NA` in some circumstances but not others.
+This was done to make adoption easier, but caused some confusion (:issue:`32265`).
+In 3.0, this behaviour is made consistent to by default treat ``NaN`` as equivalent
+to :class:`NA` in all cases.
 
-With ``pd.set_option("mode.nan_is_na", True)`` (again, this is the default), ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__`` and be treated the same as :class:`NA`. The only change users will see is that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN`` entries produce :class:`NA` entries instead:
+By default, ``NaN`` can be passed to constructors, ``__setitem__``, ``__contains__``
+and will be treated the same as :class:`NA`. The only change users will see is
+that arithmetic and ``np.ufunc`` operations that previously introduced ``NaN``
+entries produce :class:`NA` entries instead.
 
 *Old behavior:*
 
 .. code-block:: ipython
 
-    In [2]: ser = pd.Series([0, None], dtype=pd.Float64Dtype())
+    # NaN in input gets converted to NA
+    In [1]: ser = pd.Series([0, np.nan], dtype=pd.Float64Dtype())
+    In [2]: ser
+    Out[2]:
+    0     0.0
+    1    <NA>
+    dtype: Float64
+    # NaN produced by arithmetic (0/0) remained NaN
     In [3]: ser / 0
     Out[3]:
     0     NaN
     1    <NA>
     dtype: Float64
+    # the NaN value is not considered as missing
+    In [4]: (ser / 0).isna()
+    Out[4]:
+    0    False
+    1     True
+    dtype: bool
 
 *New behavior:*
 
 .. ipython:: python
 
-    ser = pd.Series([0, None], dtype=pd.Float64Dtype())
+    ser = pd.Series([0, np.nan], dtype=pd.Float64Dtype())
+    ser
     ser / 0
+    (ser / 0).isna()
 
-By contrast, with ``pd.set_option("mode.nan_is_na", False)``, ``NaN`` is always considered distinct and specifically as a floating-point value, so cannot be used with integer dtypes:
+In the future, the intention is to consider ``NaN`` and :class:`NA` as distinct
+values, and an option to control this behaviour is added in 3.0 through
+``pd.options.future.distinguish_nan_and_na``. When enabled, ``NaN`` is always
+considered distinct and specifically as a floating-point value. As a consequence,
+it cannot be used with integer dtypes.
 
 *Old behavior:*
 
@@ -584,13 +615,21 @@ By contrast, with ``pd.set_option("mode.nan_is_na", False)``, ``NaN`` is always
 
 .. ipython:: python
 
-    pd.set_option("mode.nan_is_na", False)
-    ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype())
-    ser[1]
+    with pd.option_context("future.distinguish_nan_and_na", True):
+        ser = pd.Series([1, np.nan], dtype=pd.Float64Dtype())
+        print(ser[1])
+
+If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in
+the latter example, this would raise, as a float ``NaN`` cannot be held by an
+integer dtype.
 
-If we had passed ``pd.Int64Dtype()`` or ``"int64[pyarrow]"`` for the dtype in the latter example, this would raise, as a float ``NaN`` cannot be held by an integer dtype.
+With ``"future.distinguish_nan_and_na"`` enabled, ``ser.to_numpy()`` (and
+``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if
+:class:`NA` entries are present, where before they would coerce to
+``NaN``.  To retain a float numpy dtype, explicitly pass ``na_value=np.nan``
+to :meth:`Series.to_numpy`.
 
-With ``"mode.nan_is_na"`` set to ``False``, ``ser.to_numpy()`` (and ``frame.values`` and ``np.asarray(obj)``) will convert to ``object`` dtype if :class:`NA` entries are present, where before they would coerce to ``NaN``.  To retain a float numpy dtype, explicitly pass ``na_value=np.nan`` to :meth:`Series.to_numpy`.
+Note that the option is experimental and subject to change in future releases.
 
 The ``__module__`` attribute now points to public modules
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -749,11 +788,16 @@ Other API changes
   the dtype of the resulting Index (:issue:`60797`)
 - :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
 - :class:`Series` "flex" methods like :meth:`Series.add` no longer allow passing a :class:`DataFrame` for ``other``; use the DataFrame reversed method instead (:issue:`46179`)
+- :func:`date_range` and :func:`timedelta_range` no longer default to ``unit="ns"``, instead will infer a unit from the ``start``, ``end``, and ``freq`` parameters. Explicitly specify a desired ``unit`` to override these (:issue:`59031`)
 - :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
 - :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
 - Arithmetic operations between a :class:`Series`, :class:`Index`, or :class:`ExtensionArray` with a ``list`` now consistently wrap that list with an array equivalent to ``Series(my_list).array``. To do any other kind of type inference or casting, do so explicitly before operating (:issue:`62552`)
 - Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
 - Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)
+- Methods that can operate in-place (:meth:`~DataFrame.replace`, :meth:`~DataFrame.fillna`,
+  :meth:`~DataFrame.ffill`, :meth:`~DataFrame.bfill`, :meth:`~DataFrame.interpolate`,
+  :meth:`~DataFrame.where`, :meth:`~DataFrame.mask`, :meth:`~DataFrame.clip`) now return
+  the modified DataFrame or Series (``self``) instead of ``None`` when ``inplace=True`` (:issue:`63207`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.deprecations:
@@ -1183,9 +1227,11 @@ MultiIndex
 I/O
 ^^^
 - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`)
+- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``timedelta64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`63239`)
 - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
   ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
 - Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`)
+- Bug in :func:`pandas.json_normalize` raising ``TypeError`` when ``meta`` contained a non-string key (e.g., ``int``) and ``record_path`` was specified, which was inconsistent with the behavior when ``record_path`` was ``None`` (:issue:`63019`)
 - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
 - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
 - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
@@ -1239,6 +1285,7 @@ Plotting
 - Bug in :meth:`Series.plot` preventing a line and bar from being aligned on the same plot (:issue:`61161`)
 - Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`)
 - Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)
+- Bug in plotting with a :class:`TimedeltaIndex` with non-nanosecond resolution displaying incorrect labels (:issue:`63237`)
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1269,7 +1316,8 @@ Groupby/resample/rolling
 - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
 - Bug in :meth:`Rolling.sem` computing incorrect results because it divided by ``sqrt((n - 1) * (n - ddof))`` instead of ``sqrt(n * (n - ddof))``. (:issue:`63180`)
-- Bug in :meth:`Rolling.skew` incorrectly computing skewness for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`)
+- Bug in :meth:`Rolling.skew` and in :meth:`Rolling.kurt` incorrectly computing skewness and kurtosis, respectively, for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`, :issue:`61416`)
+- Bug in :meth:`Rolling.skew` and in :meth:`Rolling.kurt` where results varied with input length despite identical data and window contents (:issue:`54380`)
 - Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
 - Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
 - Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
@@ -1307,6 +1355,7 @@ Sparse
 - Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
 - Bug in :meth:`DataFrame.sparse.from_spmatrix` which hard coded an invalid ``fill_value`` for certain subtypes. (:issue:`59063`)
 - Bug in :meth:`DataFrame.sparse.to_dense` which ignored subclassing and always returned an instance of :class:`DataFrame` (:issue:`59913`)
+- Bug in :meth:`cumsum` for integer arrays Calling SparseArray.cumsum caused max recursion depth error. (:issue:`62669`)
 
 ExtensionArray
 ^^^^^^^^^^^^^^
 
@@ -36,5 +36,5 @@ def using_string_dtype() -> bool:
 
 
 def is_nan_na() -> bool:
-    _mode_options = _global_config["mode"]
-    return _mode_options["nan_is_na"]
+    _mode_options = _global_config["future"]
+    return not _mode_options["distinguish_nan_and_na"]
@@ -1070,7 +1070,7 @@ cdef class StringHashTable(HashTable):
             val = values[i]
 
             if isinstance(val, str):
-                # GH#31499 if we have a np.str_ PyUnicode_AsUTF8 won't recognize
+                # GH#31499 if we have an np.str_ PyUnicode_AsUTF8 won't recognize
                 #  it as a str, even though isinstance does.
                 v = PyUnicode_AsUTF8(<str>val)
             else:
@@ -1108,7 +1108,7 @@ cdef class StringHashTable(HashTable):
             val = values[i]
 
             if isinstance(val, str):
-                # GH#31499 if we have a np.str_ PyUnicode_AsUTF8 won't recognize
+                # GH#31499 if we have an np.str_ PyUnicode_AsUTF8 won't recognize
                 #  it as a str, even though isinstance does.
                 v = PyUnicode_AsUTF8(<str>val)
             else:
 
@@ -58,7 +58,7 @@ cdef bint is_definitely_invalid_key(object val):
 
 cdef ndarray _get_bool_indexer(ndarray values, object val, ndarray mask = None):
     """
-    Return a ndarray[bool] of locations where val matches self.values.
+    Return an ndarray[bool] of locations where val matches self.values.
 
     If val is not NA, this is equivalent to `self.values == val`
     """
 
@@ -106,7 +106,6 @@ from pandas._libs.tslibs.nattype cimport (
 )
 from pandas._libs.tslibs.offsets cimport is_offset_object
 from pandas._libs.tslibs.period cimport is_period_object
-from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
 from pandas._libs.tslibs.timezones cimport tz_compare
 
 # constants that will be compared to potentially arbitrarily large
@@ -2674,11 +2673,6 @@ def maybe_convert_objects(ndarray[object] objects,
         elif is_timedelta(val):
             if convert_non_numeric:
                 seen.timedelta_ = True
-                try:
-                    convert_to_timedelta64(val, "ns")
-                except OutOfBoundsTimedelta:
-                    seen.object_ = True
-                    break
                 break
             else:
                 seen.object_ = True
 
@@ -120,7 +120,7 @@ def format_array_from_datetime(
     NPY_DATETIMEUNIT reso=NPY_FR_ns,
 ) -> np.ndarray:
     """
-    return a np object array of the string formatted values
+    return an np object array of the string formatted values
 
     Parameters
     ----------
 
@@ -45,9 +45,6 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1
 
 cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
 cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
-cdef (int64_t, int) precision_from_unit(
-    NPY_DATETIMEUNIT in_reso, NPY_DATETIMEUNIT out_reso=*
-)
 
 cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)
 
 
@@ -276,7 +276,7 @@ cdef (int64_t, int) precision_from_unit(
 
 cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1:
     """
-    Extract the value and unit from a np.datetime64 object, then convert the
+    Extract the value and unit from an np.datetime64 object, then convert the
     value to nanoseconds if necessary.
     """
     cdef: