From d090db7fae2bfa720399e8502c148e8526b116ad Mon Sep 17 00:00:00 2001
From: Jixun Sun <160219251+AnonToky@users.noreply.github.com>
Date: Sun, 2 Nov 2025 19:28:20 +0800
Subject: [PATCH 1/3] Add tests for groupby dropna=False behavior

Add tests to ensure groupby with dropna=False preserves NaN groups in both DataFrame and Series.
---
 tests/groupby/test_groupby_dropna.py | 41 ++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 tests/groupby/test_groupby_dropna.py

diff --git a/tests/groupby/test_groupby_dropna.py b/tests/groupby/test_groupby_dropna.py
new file mode 100644
index 0000000000000..6c3921b06ee8d
--- /dev/null
+++ b/tests/groupby/test_groupby_dropna.py
@@ -0,0 +1,41 @@
+import numpy as np
+import pandas as pd
+import pandas._testing as tm
+
+def test_groupby_dataframe_dropna_false_preserves_nan_group():
+    # Ensure DataFrame.groupby(..., dropna=False) preserves NA entries as a single group
+    # Tests-only addition to lock current behavior (GHxxxx)
+    data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "val": [0, 1, 2, 3, 4]}
+    df = pd.DataFrame(data)
+
+    gb_keepna = df.groupby("group", dropna=False)
+    result = gb_keepna.indices
+
+    # expected: g1 -> [0,2], g2 -> [3], NaN -> [1,4]
+    expected = {
+        "g1": np.array([0, 2], dtype=np.intp),
+        "g2": np.array([3], dtype=np.intp),
+        np.nan: np.array([1, 4], dtype=np.intp),
+    }
+
+    # Compare group indices allowing for np.nan key
+    for res_vals, exp_vals in zip(result.values(), expected.values()):
+        tm.assert_numpy_array_equal(res_vals, exp_vals)
+    # check there is an NaN key present
+    assert any(pd.isna(k) for k in result.keys())
+
+
+def test_groupby_series_dropna_false_preserves_nan_group():
+    # Verify Series.groupby(..., dropna=False) also preserves NA groups
+    s = pd.Series([1, 2, 3, 4], index=["a", np.nan, "a", np.nan], name="s")
+    gb = s.groupby(level=0, dropna=False)
+    res = gb.indices
+
+    expected = {
+        "a": np.array([0, 2], dtype=np.intp),
+        np.nan: np.array([1, 3], dtype=np.intp),
+    }
+
+    for res_vals, exp_vals in zip(res.values(), expected.values()):
+        tm.assert_numpy_array_equal(res_vals, exp_vals)
+    assert any(pd.isna(k) for k in res.keys())

From 877ecd97271db54bee06eda8d48da5c79ddb0047 Mon Sep 17 00:00:00 2001
From: Jixun Sun <160219251+AnonToky@users.noreply.github.com>
Date: Sun, 2 Nov 2025 19:29:38 +0800
Subject: [PATCH 2/3] Delete tests/groupby directory

---
 tests/groupby/test_groupby_dropna.py | 41 ----------------------------
 1 file changed, 41 deletions(-)
 delete mode 100644 tests/groupby/test_groupby_dropna.py

diff --git a/tests/groupby/test_groupby_dropna.py b/tests/groupby/test_groupby_dropna.py
deleted file mode 100644
index 6c3921b06ee8d..0000000000000
--- a/tests/groupby/test_groupby_dropna.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import numpy as np
-import pandas as pd
-import pandas._testing as tm
-
-def test_groupby_dataframe_dropna_false_preserves_nan_group():
-    # Ensure DataFrame.groupby(..., dropna=False) preserves NA entries as a single group
-    # Tests-only addition to lock current behavior (GHxxxx)
-    data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "val": [0, 1, 2, 3, 4]}
-    df = pd.DataFrame(data)
-
-    gb_keepna = df.groupby("group", dropna=False)
-    result = gb_keepna.indices
-
-    # expected: g1 -> [0,2], g2 -> [3], NaN -> [1,4]
-    expected = {
-        "g1": np.array([0, 2], dtype=np.intp),
-        "g2": np.array([3], dtype=np.intp),
-        np.nan: np.array([1, 4], dtype=np.intp),
-    }
-
-    # Compare group indices allowing for np.nan key
-    for res_vals, exp_vals in zip(result.values(), expected.values()):
-        tm.assert_numpy_array_equal(res_vals, exp_vals)
-    # check there is an NaN key present
-    assert any(pd.isna(k) for k in result.keys())
-
-
-def test_groupby_series_dropna_false_preserves_nan_group():
-    # Verify Series.groupby(..., dropna=False) also preserves NA groups
-    s = pd.Series([1, 2, 3, 4], index=["a", np.nan, "a", np.nan], name="s")
-    gb = s.groupby(level=0, dropna=False)
-    res = gb.indices
-
-    expected = {
-        "a": np.array([0, 2], dtype=np.intp),
-        np.nan: np.array([1, 3], dtype=np.intp),
-    }
-
-    for res_vals, exp_vals in zip(res.values(), expected.values()):
-        tm.assert_numpy_array_equal(res_vals, exp_vals)
-    assert any(pd.isna(k) for k in res.keys())

From 32b6ee877960791f8935725f88bc1c1e3d22290c Mon Sep 17 00:00:00 2001
From: Jixun Sun <160219251+AnonToky@users.noreply.github.com>
Date: Tue, 25 Nov 2025 20:14:33 +0800
Subject: [PATCH 3/3] Refactor tests to use HDFStore with tmp_path

Updated tests to use HDFStore with tmp_path instead of ensure_clean_store.
---
 pandas/tests/io/pytables/test_errors.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py
index 37e6eeb05deec..c444090ebfb3c 100644
--- a/pandas/tests/io/pytables/test_errors.py
+++ b/pandas/tests/io/pytables/test_errors.py
@@ -14,7 +14,6 @@
     date_range,
     read_hdf,
 )
-from pandas.tests.io.pytables.common import ensure_clean_store
 
 from pandas.io.pytables import (
     Term,
@@ -24,14 +23,14 @@
 pytestmark = [pytest.mark.single_cpu]
 
 
-def test_pass_spec_to_storer(setup_path):
+def test_pass_spec_to_storer(tmp_path, setup_path):
     df = DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
         columns=Index(list("ABCD"), dtype=object),
         index=Index([f"i-{i}" for i in range(30)], dtype=object),
     )
 
-    with ensure_clean_store(setup_path) as store:
+    with HDFStore(tmp_path / setup_path) as store:
         store.put("df", df)
         msg = (
             "cannot pass a column specification when reading a Fixed format "
@@ -47,19 +46,19 @@ def test_pass_spec_to_storer(setup_path):
             store.select("df", where=["columns=A"])
 
 
-def test_table_index_incompatible_dtypes(setup_path):
+def test_table_index_incompatible_dtypes(tmp_path, setup_path):
     df1 = DataFrame({"a": [1, 2, 3]})
     df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3))
 
-    with ensure_clean_store(setup_path) as store:
+    with HDFStore(tmp_path / setup_path) as store:
         store.put("frame", df1, format="table")
         msg = re.escape("incompatible kind in col [integer - datetime64[ns]]")
         with pytest.raises(TypeError, match=msg):
             store.put("frame", df2, format="table", append=True)
 
 
-def test_unimplemented_dtypes_table_columns(setup_path):
-    with ensure_clean_store(setup_path) as store:
+def test_unimplemented_dtypes_table_columns(tmp_path, setup_path):
+    with HDFStore(tmp_path / setup_path) as store:
         dtypes = [("date", datetime.date(2001, 1, 2))]
 
         # currently not supported dtypes ####
@@ -85,7 +84,7 @@ def test_unimplemented_dtypes_table_columns(setup_path):
     df["datetime1"] = datetime.date(2001, 1, 2)
     df = df._consolidate()
 
-    with ensure_clean_store(setup_path) as store:
+    with HDFStore(tmp_path / setup_path) as store:
         # this fails because we have a date in the object block......
         msg = "|".join(
             [
@@ -101,7 +100,7 @@ def test_unimplemented_dtypes_table_columns(setup_path):
 
 
 def test_invalid_terms(tmp_path, setup_path):
-    with ensure_clean_store(setup_path) as store:
+    with HDFStore(tmp_path / setup_path) as store:
         df = DataFrame(
             np.random.default_rng(2).standard_normal((10, 4)),
             columns=Index(list("ABCD"), dtype=object),
@@ -162,14 +161,14 @@ def test_invalid_terms(tmp_path, setup_path):
         read_hdf(path, "dfq", where="A>0 or C>0")
 
 
-def test_append_with_diff_col_name_types_raises_value_error(setup_path):
+def test_append_with_diff_col_name_types_raises_value_error(tmp_path, setup_path):
     df = DataFrame(np.random.default_rng(2).standard_normal((10, 1)))
     df2 = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
     df3 = DataFrame({(1, 2): np.random.default_rng(2).standard_normal(10)})
     df4 = DataFrame({("1", 2): np.random.default_rng(2).standard_normal(10)})
     df5 = DataFrame({("1", 2, object): np.random.default_rng(2).standard_normal(10)})
 
-    with ensure_clean_store(setup_path) as store:
+    with HDFStore(tmp_path / setup_path) as store:
         name = "df_diff_valerror"
         store.append(name, df)