From 67df8a3b859f8ac94308edc2abd1b4f212218d39 Mon Sep 17 00:00:00 2001 From: JatinP <99117056+Jatinprajapati7869@users.noreply.github.com> Date: Wed, 3 Dec 2025 20:49:00 +0530 Subject: [PATCH 1/4] Refactor tests to use tmp_path for file storage Refactor tests to use tmp_path instead of setup_path for temporary file storage. --- pandas/tests/io/pytables/test_select.py | 167 ++++++++++++++---------- 1 file changed, 96 insertions(+), 71 deletions(-) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 73548ad3eb283..fb4ed075d198f 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -1,6 +1,8 @@ import numpy as np import pytest +pytest.importorskip("tables") + from pandas._libs.tslibs import Timestamp from pandas.compat import PY312 @@ -20,7 +22,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_store, ) from pandas.io.pytables import Term @@ -28,7 +29,7 @@ pytestmark = [pytest.mark.single_cpu] -def test_select_columns_in_where(setup_path): +def test_select_columns_in_where(tmp_path): # GH 6169 # recreate multi-indexes when columns is passed # in the `where` argument @@ -44,8 +45,8 @@ def test_select_columns_in_where(setup_path): index=index, columns=["A", "B", "C"], ) - - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h5" + with HDFStore(path) as store: store.put("df", df, format="table") expected = df[["A"]] @@ -55,19 +56,21 @@ def test_select_columns_in_where(setup_path): # With a Series s = Series(np.random.default_rng(2).standard_normal(10), index=index, name="A") - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h6" + with HDFStore(path) as store: store.put("s", s, format="table") tm.assert_series_equal(store.select("s", where="columns=['A']"), s) -def test_select_with_dups(setup_path): +def test_select_with_dups(tmp_path): # single dtypes df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "A", "B", "B"] ) - df.index = date_range("20130101 9:30", periods=10, freq="min", unit="ns") + df.index = date_range("20130101 9:30", periods=10, freq="min") - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h7" + with HDFStore(path) as store: store.append("df", df) result = store.select("df") @@ -96,9 +99,9 @@ def test_select_with_dups(setup_path): ], axis=1, ) - df.index = date_range("20130101 9:30", periods=10, freq="min", unit="ns") - - with ensure_clean_store(setup_path) as store: + df.index = date_range("20130101 9:30", periods=10, freq="min") + path = tmp_path / "file.h8" + with HDFStore(path) as store: store.append("df", df) result = store.select("df") @@ -118,7 +121,8 @@ def test_select_with_dups(setup_path): tm.assert_frame_equal(result, expected, by_blocks=True) # duplicates on both index and columns - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h9" + with HDFStore(path) as store: store.append("df", df) store.append("df", df) @@ -128,13 +132,14 @@ def test_select_with_dups(setup_path): tm.assert_frame_equal(result, expected, by_blocks=True) -def test_select(setup_path): - with ensure_clean_store(setup_path) as store: +def test_select(tmp_path): + path = tmp_path / "file.h10" + with HDFStore(path) as store: # select with columns= df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) _maybe_remove(store, "df") store.append("df", df) @@ -169,12 +174,13 @@ def test_select(setup_path): tm.assert_frame_equal(expected, result) -def test_select_dtypes(setup_path, request): - with ensure_clean_store(setup_path) as store: +def test_select_dtypes(tmp_path, request): + path = tmp_path / "file.h11" + with HDFStore(path) as store: # with a Timestamp data column (GH #2637) df = DataFrame( { - "ts": bdate_range("2012-01-01", periods=300, unit="ns"), + "ts": bdate_range("2012-01-01", periods=300), "A": np.random.default_rng(2).standard_normal(300), } ) @@ -232,7 +238,8 @@ def test_select_dtypes(setup_path, request): expected = df.reindex(index=list(df.index)[0:10], columns=["A"]) tm.assert_frame_equal(expected, result) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h12" + with HDFStore(path) as store: # floats w/o NaN df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64") df["cols"] = (df["cols"] + 10).apply(str) @@ -251,7 +258,7 @@ def test_select_dtypes(setup_path, request): tm.assert_frame_equal(expected, result) # https://github.com/PyTables/PyTables/issues/282 - # bug in selection when 0th row has an np.nan and an index + # bug in selection when 0th row has a np.nan and an index # store.append('df3',df,data_columns=True) # result = store.select( # 'df3', where='values>2.0') @@ -270,7 +277,8 @@ def test_select_dtypes(setup_path, request): # test selection with comparison against numpy scalar # GH 11283 - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h13" + with HDFStore(path) as store: df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=Index(list("ABCD")), @@ -292,11 +300,12 @@ def test_select_dtypes(setup_path, request): tm.assert_frame_equal(expected, result) -def test_select_with_many_inputs(setup_path): - with ensure_clean_store(setup_path) as store: +def test_select_with_many_inputs(tmp_path): + path = tmp_path / "file.h14" + with HDFStore(path) as store: df = DataFrame( { - "ts": bdate_range("2012-01-01", periods=300, unit="ns"), + "ts": bdate_range("2012-01-01", periods=300), "A": np.random.default_rng(2).standard_normal(300), "B": range(300), "users": ["a"] * 50 @@ -340,13 +349,14 @@ def test_select_with_many_inputs(setup_path): assert len(result) == 100 -def test_select_iterator(tmp_path, setup_path): +def test_select_iterator(tmp_path): # single table - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h15" + with HDFStore(path) as store: df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) _maybe_remove(store, "df") store.append("df", df) @@ -366,12 +376,12 @@ def test_select_iterator(tmp_path, setup_path): result = concat(results) tm.assert_frame_equal(result, expected) - path = tmp_path / setup_path + path = tmp_path / "file.h15" df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) df.to_hdf(path, key="df_non_table") @@ -382,12 +392,12 @@ def test_select_iterator(tmp_path, setup_path): with pytest.raises(TypeError, match=msg): read_hdf(path, "df_non_table", iterator=True) - path = tmp_path / setup_path + path = tmp_path / "file.h15" df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) df.to_hdf(path, key="df", format="table") @@ -400,11 +410,12 @@ def test_select_iterator(tmp_path, setup_path): # multiple - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h16" + with HDFStore(path) as store: df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) store.append("df1", df1, data_columns=True) df2 = df1.copy().rename(columns="{}_2".format) @@ -422,17 +433,18 @@ def test_select_iterator(tmp_path, setup_path): tm.assert_frame_equal(expected, result) -def test_select_iterator_complete_8014(setup_path): +def test_select_iterator_complete_8014(tmp_path): # GH 8014 # using iterator and where clause chunksize = 1e4 # no iterator - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h17" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -463,11 +475,12 @@ def test_select_iterator_complete_8014(setup_path): tm.assert_frame_equal(expected, result) # with iterator, full range - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h18" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -499,17 +512,18 @@ def test_select_iterator_complete_8014(setup_path): tm.assert_frame_equal(expected, result) -def test_select_iterator_non_complete_8014(setup_path): +def test_select_iterator_non_complete_8014(tmp_path): # GH 8014 # using iterator and where clause chunksize = 1e4 # with iterator, non complete range - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h19" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -539,11 +553,12 @@ def test_select_iterator_non_complete_8014(setup_path): tm.assert_frame_equal(rexpected, result) # with iterator, empty where - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h20" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -556,18 +571,19 @@ def test_select_iterator_non_complete_8014(setup_path): assert 0 == len(results) -def test_select_iterator_many_empty_frames(setup_path): +def test_select_iterator_many_empty_frames(tmp_path): # GH 8014 # using iterator and where clause can return many empty # frames. chunksize = 10_000 # with iterator, range limited to the first chunk - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h21" + with HDFStore(path) as store: expected = DataFrame( np.random.default_rng(2).standard_normal((100064, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=100064, freq="s", unit="ns"), + index=date_range("2000-01-01", periods=100064, freq="s"), ) _maybe_remove(store, "df") store.append("df", expected) @@ -615,14 +631,15 @@ def test_select_iterator_many_empty_frames(setup_path): assert len(results) == 0 -def test_frame_select(setup_path, request): +def test_frame_select(tmp_path, request): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h22" + with HDFStore(path) as store: store.put("frame", df, format="table") date = df.index[len(df) // 2] @@ -651,7 +668,7 @@ def test_frame_select(setup_path, request): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) store.append("df_time", df) msg = "day is out of range for month: 0" @@ -664,18 +681,19 @@ def test_frame_select(setup_path, request): # store.select('frame', [crit1, crit2]) -def test_frame_select_complex(setup_path): +def test_frame_select_complex(tmp_path): # select via complex criteria df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) df["string"] = "foo" df.loc[df.index[0:4], "string"] = "bar" - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h23" + with HDFStore(path) as store: store.put("df", df, format="table", data_columns=["string"]) # empty @@ -781,16 +799,17 @@ def test_frame_select_complex2(tmp_path): tm.assert_frame_equal(result, expected) -def test_invalid_filtering(setup_path): +def test_invalid_filtering(tmp_path): # can't use more than one filter (atm) df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h24" + with HDFStore(path) as store: store.put("df", df, format="table") msg = "unable to collapse Joint Filters" @@ -803,13 +822,14 @@ def test_invalid_filtering(setup_path): store.select("df", "columns=['A','B'] & columns=['C']") -def test_string_select(setup_path): +def test_string_select(tmp_path): # GH 2973 - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h25" + with HDFStore(path) as store: df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) # test string ==/!= @@ -849,16 +869,17 @@ def test_string_select(setup_path): tm.assert_frame_equal(result, expected) -def test_select_as_multiple(setup_path): +def test_select_as_multiple(tmp_path): df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), + index=date_range("2000-01-01", periods=10, freq="B"), ) df2 = df1.copy().rename(columns="{}_2".format) df2["foo"] = "bar" - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h26" + with HDFStore(path) as store: msg = "keys must be a list/tuple" # no tables stored with pytest.raises(TypeError, match=msg): @@ -924,8 +945,9 @@ def test_select_as_multiple(setup_path): ) -def test_nan_selection_bug_4858(setup_path): - with ensure_clean_store(setup_path) as store: +def test_nan_selection_bug_4858(tmp_path): + path = tmp_path / "file.h27" + with HDFStore(path) as store: df = DataFrame({"cols": range(6), "values": range(6)}, dtype="float64") df["cols"] = (df["cols"] + 10).apply(str) df.iloc[0] = np.nan @@ -941,7 +963,7 @@ def test_nan_selection_bug_4858(setup_path): tm.assert_frame_equal(result, expected) -def test_query_with_nested_special_character(setup_path): +def test_query_with_nested_special_character(tmp_path): df = DataFrame( { "a": ["a", "a", "c", "b", "test & test", "c", "b", "e"], @@ -949,17 +971,19 @@ def test_query_with_nested_special_character(setup_path): } ) expected = df[df.a == "test & test"] - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h28" + with HDFStore(path) as store: store.append("test", df, format="table", data_columns=True) result = store.select("test", 'a = "test & test"') tm.assert_frame_equal(expected, result) -def test_query_long_float_literal(setup_path): +def test_query_long_float_literal(tmp_path): # GH 14241 df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h29" + with HDFStore(path) as store: store.append("test", df, format="table", data_columns=True) cutoff = 1000000000.0006 @@ -977,19 +1001,20 @@ def test_query_long_float_literal(setup_path): tm.assert_frame_equal(expected, result) -def test_query_compare_column_type(setup_path): +def test_query_compare_column_type(tmp_path): # GH 15492 df = DataFrame( { "date": ["2014-01-01", "2014-01-02"], - "real_date": date_range("2014-01-01", periods=2, unit="ns"), + "real_date": date_range("2014-01-01", periods=2), "float": [1.1, 1.2], "int": [1, 2], }, columns=["date", "real_date", "float", "int"], ) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "file.h30" + with HDFStore(path) as store: store.append("test", df, format="table", data_columns=True) ts = Timestamp("2014-01-01") # noqa: F841 From 871c6f1232a4a5b44c71a5ed8c8261edb613d524 Mon Sep 17 00:00:00 2001 From: JatinP <99117056+Jatinprajapati7869@users.noreply.github.com> Date: Wed, 3 Dec 2025 21:10:37 +0530 Subject: [PATCH 2/4] Update test_select.py --- pandas/tests/io/pytables/test_select.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index fb4ed075d198f..6a4137291b011 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -1,7 +1,6 @@ import numpy as np import pytest -pytest.importorskip("tables") from pandas._libs.tslibs import Timestamp from pandas.compat import PY312 From 16d66d9dab663d21b9ae62f77b74eca9217f1f94 Mon Sep 17 00:00:00 2001 From: JatinP <99117056+Jatinprajapati7869@users.noreply.github.com> Date: Wed, 3 Dec 2025 21:10:54 +0530 Subject: [PATCH 3/4] Update test_select.py --- pandas/tests/io/pytables/test_select.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 6a4137291b011..d34a91ec35f51 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -1,7 +1,6 @@ import numpy as np import pytest - from pandas._libs.tslibs import Timestamp from pandas.compat import PY312 From 873bee8747f86a4660d7391a04e004d3c277b7dd Mon Sep 17 00:00:00 2001 From: JatinP <99117056+Jatinprajapati7869@users.noreply.github.com> Date: Wed, 3 Dec 2025 21:16:23 +0530 Subject: [PATCH 4/4] Update index to include nanoseconds in date_range --- pandas/tests/io/pytables/test_select.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index d34a91ec35f51..842eed3ac6aa3 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -137,7 +137,7 @@ def test_select(tmp_path): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) _maybe_remove(store, "df") store.append("df", df) @@ -354,7 +354,7 @@ def test_select_iterator(tmp_path): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) _maybe_remove(store, "df") store.append("df", df) @@ -379,7 +379,7 @@ def test_select_iterator(tmp_path): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) df.to_hdf(path, key="df_non_table") @@ -395,7 +395,7 @@ def test_select_iterator(tmp_path): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) df.to_hdf(path, key="df", format="table") @@ -413,7 +413,7 @@ def test_select_iterator(tmp_path): df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) store.append("df1", df1, data_columns=True) df2 = df1.copy().rename(columns="{}_2".format) @@ -633,7 +633,7 @@ def test_frame_select(tmp_path, request): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) path = tmp_path / "file.h22" @@ -666,7 +666,7 @@ def test_frame_select(tmp_path, request): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) store.append("df_time", df) msg = "day is out of range for month: 0" @@ -685,7 +685,7 @@ def test_frame_select_complex(tmp_path): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) df["string"] = "foo" df.loc[df.index[0:4], "string"] = "bar" @@ -803,7 +803,7 @@ def test_invalid_filtering(tmp_path): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) path = tmp_path / "file.h24" @@ -827,7 +827,7 @@ def test_string_select(tmp_path): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) # test string ==/!= @@ -871,7 +871,7 @@ def test_select_as_multiple(tmp_path): df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=10, freq="B"), + index=date_range("2000-01-01", periods=10, freq="B", unit="ns"), ) df2 = df1.copy().rename(columns="{}_2".format) df2["foo"] = "bar"