pandas-dev
diff --git a/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/wheels.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfile‎
Lines changed: 11 additions & 1 deletion b/‎Dockerfile‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/algorithms.py‎
Lines changed: 2 additions & 2 deletions b/‎asv_bench/benchmarks/algorithms.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎asv_bench/benchmarks/frame_methods.py‎
Lines changed: 3 additions & 0 deletions b/‎asv_bench/benchmarks/frame_methods.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/groupby.py‎
Lines changed: 4 additions & 0 deletions b/‎asv_bench/benchmarks/groupby.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎doc/source/getting_started/comparison/comparison_with_sql.rst‎
Lines changed: 36 additions & 0 deletions b/‎doc/source/getting_started/comparison/comparison_with_sql.rst‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎doc/source/reference/missing_value.rst‎
Lines changed: 0 additions & 2 deletions b/‎doc/source/reference/missing_value.rst‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎doc/source/user_guide/scale.rst‎
Lines changed: 23 additions & 18 deletions b/‎doc/source/user_guide/scale.rst‎
Lines changed: 23 additions & 18 deletions
diff --git a/‎doc/source/user_guide/text.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/text.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v0.21.0.rst‎
Lines changed: 6 additions & 11 deletions b/‎doc/source/whatsnew/v0.21.0.rst‎
Lines changed: 6 additions & 11 deletions
@@ -162,7 +162,7 @@ jobs:
         run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v3.1.4
+        uses: pypa/cibuildwheel@v3.2.0
         with:
          package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
         env:
 
@@ -1,6 +1,9 @@
 FROM python:3.11.13
 WORKDIR /home/pandas
 
+# https://docs.docker.com/reference/dockerfile/#automatic-platform-args-in-the-global-scope
+ARG TARGETPLATFORM
+
 RUN apt-get update && \
     apt-get --no-install-recommends -y upgrade && \
     apt-get --no-install-recommends -y install \
@@ -13,7 +16,14 @@ RUN apt-get update && \
     rm -rf /var/lib/apt/lists/*
 
 COPY requirements-dev.txt /tmp
-RUN python -m pip install --no-cache-dir --upgrade pip && \
+
+RUN case "$TARGETPLATFORM" in \
+    linux/arm*) \
+        # Drop PyQt5 for ARM GH#61037
+        sed -i "/^pyqt5/Id" /tmp/requirements-dev.txt \
+        ;; \
+    esac && \
+    python -m pip install --no-cache-dir --upgrade pip && \
     python -m pip install --no-cache-dir -r /tmp/requirements-dev.txt
 RUN git config --global --add safe.directory /home/pandas
 
 
@@ -199,8 +199,8 @@ class SortIntegerArray:
     params = [10**3, 10**5]
 
     def setup(self, N):
-        data = np.arange(N, dtype=float)
-        data[40] = np.nan
+        data = np.arange(N, dtype=float).astype(object)
+        data[40] = pd.NA
         self.array = pd.array(data, dtype="Int64")
 
     def time_argsort(self, N):
 
@@ -4,6 +4,7 @@
 import numpy as np
 
 from pandas import (
+    NA,
     DataFrame,
     Index,
     MultiIndex,
@@ -445,6 +446,8 @@ def setup(self, inplace, dtype):
             values[::2] = np.nan
             if dtype == "Int64":
                 values = values.round()
+                values = values.astype(object)
+                values[::2] = NA
             self.df = DataFrame(values, dtype=dtype)
         self.fill_values = self.df.iloc[self.df.first_valid_index()].to_dict()
 
 
@@ -689,6 +689,10 @@ def setup(self, dtype, method, with_nans):
             null_vals = vals.astype(float, copy=True)
             null_vals[::2, :] = np.nan
             null_vals[::3, :] = np.nan
+            if dtype in ["Int64", "Float64"]:
+                null_vals = null_vals.astype(object)
+                null_vals[::2, :] = NA
+                null_vals[::3, :] = NA
             df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
             df["key"] = keys
             self.df = df
 
@@ -270,6 +270,42 @@ column with another DataFrame's index.
     indexed_df2 = df2.set_index("key")
     pd.merge(df1, indexed_df2, left_on="key", right_index=True)
 
+:meth:`~pandas.merge` also supports joining on multiple columns by passing a list of column names.
+
+.. code-block:: sql
+
+    SELECT *
+    FROM df1_multi
+    INNER JOIN df2_multi
+      ON df1_multi.key1 = df2_multi.key1
+        AND df1_multi.key2 = df2_multi.key2;
+
+.. ipython:: python
+
+    df1_multi = pd.DataFrame({
+        "key1": ["A", "B", "C", "D"],
+        "key2": [1, 2, 3, 4],
+        "value": np.random.randn(4)
+    })
+    df2_multi = pd.DataFrame({
+        "key1": ["B", "D", "D", "E"],
+        "key2": [2, 4, 4, 5],
+        "value": np.random.randn(4)
+    })
+    pd.merge(df1_multi, df2_multi, on=["key1", "key2"])
+
+If the columns have different names between DataFrames, on can be replaced with left_on and
+right_on.
+
+.. ipython:: python
+
+    df2_multi = pd.DataFrame({
+        "key_1": ["B", "D", "D", "E"],
+        "key_2": [2, 4, 4, 5],
+        "value": np.random.randn(4)
+    })
+    pd.merge(df1_multi, df2_multi, left_on=["key1", "key2"], right_on=["key_1", "key_2"])
+
 LEFT OUTER JOIN
 ~~~~~~~~~~~~~~~
 
 
@@ -11,14 +11,12 @@ NA is the way to represent missing values for nullable dtypes (see below):
 
 .. autosummary::
    :toctree: api/
-   :template: autosummary/class_without_autosummary.rst
 
    NA
 
 NaT is the missing value for timedelta and datetime data (see below):
 
 .. autosummary::
    :toctree: api/
-   :template: autosummary/class_without_autosummary.rst
 
    NaT
@@ -164,35 +164,35 @@ files. Each file in the directory represents a different year of the entire data
 .. ipython:: python
    :okwarning:
 
-   import pathlib
+   import glob
+   import tempfile
 
    N = 12
    starts = [f"20{i:>02d}-01-01" for i in range(N)]
    ends = [f"20{i:>02d}-12-13" for i in range(N)]
 
-   pathlib.Path("data/timeseries").mkdir(exist_ok=True)
+   tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
 
    for i, (start, end) in enumerate(zip(starts, ends)):
        ts = make_timeseries(start=start, end=end, freq="1min", seed=i)
-       ts.to_parquet(f"data/timeseries/ts-{i:0>2d}.parquet")
+       ts.to_parquet(f"{tmpdir.name}/ts-{i:0>2d}.parquet")
 
 
 ::
 
-   data
-   └── timeseries
-       ├── ts-00.parquet
-       ├── ts-01.parquet
-       ├── ts-02.parquet
-       ├── ts-03.parquet
-       ├── ts-04.parquet
-       ├── ts-05.parquet
-       ├── ts-06.parquet
-       ├── ts-07.parquet
-       ├── ts-08.parquet
-       ├── ts-09.parquet
-       ├── ts-10.parquet
-       └── ts-11.parquet
+   tmpdir
+   ├── ts-00.parquet
+   ├── ts-01.parquet
+   ├── ts-02.parquet
+   ├── ts-03.parquet
+   ├── ts-04.parquet
+   ├── ts-05.parquet
+   ├── ts-06.parquet
+   ├── ts-07.parquet
+   ├── ts-08.parquet
+   ├── ts-09.parquet
+   ├── ts-10.parquet
+   └── ts-11.parquet
 
 Now we'll implement an out-of-core :meth:`pandas.Series.value_counts`. The peak memory usage of this
 workflow is the single largest chunk, plus a small series storing the unique value
@@ -202,13 +202,18 @@ work for arbitrary-sized datasets.
 .. ipython:: python
 
    %%time
-   files = pathlib.Path("data/timeseries/").glob("ts*.parquet")
+   files = glob.iglob(f"{tmpdir.name}/ts*.parquet")
    counts = pd.Series(dtype=int)
    for path in files:
        df = pd.read_parquet(path)
        counts = counts.add(df["name"].value_counts(), fill_value=0)
    counts.astype(int)
 
+.. ipython:: python
+   :suppress:
+
+   tmpdir.cleanup()
+
 Some readers, like :meth:`pandas.read_csv`, offer parameters to control the
 ``chunksize`` when reading a single file.
 
 
@@ -75,7 +75,7 @@ or convert from existing pandas data:
 
 .. ipython:: python
 
-   s1 = pd.Series([1, 2, np.nan], dtype="Int64")
+   s1 = pd.Series([1, 2, pd.NA], dtype="Int64")
    s1
    s2 = s1.astype("string")
    s2
 
@@ -635,22 +635,17 @@ Previous behavior:
 
 New behavior:
 
-.. code-block:: ipython
+.. ipython:: python
 
-   In [1]: pi = pd.period_range('2017-01', periods=12, freq='M')
+   pi = pd.period_range('2017-01', periods=12, freq='M')
 
-   In [2]: s = pd.Series(np.arange(12), index=pi)
+   s = pd.Series(np.arange(12), index=pi)
 
-   In [3]: resampled = s.resample('2Q').mean()
+   resampled = s.resample('2Q').mean()
 
-   In [4]: resampled
-   Out[4]:
-   2017Q1    2.5
-   2017Q3    8.5
-   Freq: 2Q-DEC, dtype: float64
+   resampled
 
-   In [5]: resampled.index
-   Out[5]: PeriodIndex(['2017Q1', '2017Q3'], dtype='period[2Q-DEC]')
+   resampled.index
 
 Upsampling and calling ``.ohlc()`` previously returned a ``Series``, basically identical to calling ``.asfreq()``. OHLC upsampling now returns a DataFrame with columns ``open``, ``high``, ``low`` and ``close`` (:issue:`13083`). This is consistent with downsampling and ``DatetimeIndex`` behavior.