TST: Fixturize / parameterize test_sas7bdat (#45826)

mroeschke · web-flow · commit 9af3bd0d1015 · 2022-02-04T16:37:55.000-05:00
diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py
@@ -173,7 +173,7 @@ def test_column_dups_indexes(self):
             this_df = df.copy()
             expected_ser = Series(index.values, index=this_df.index)
             expected_df = DataFrame(
-                {"A": expected_ser, "B": this_df["B"], "A": expected_ser},
+                {"A": expected_ser, "B": this_df["B"]},
                 columns=["A", "B", "A"],
             )
             this_df["A"] = index
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -238,7 +238,7 @@ def assert_bool_op_api(
     ----------
     opname : str
         Name of the operator to test on frame
-    float_frame : DataFrame
+    bool_frame_with_na : DataFrame
         DataFrame with columns of type float
     float_string_frame : DataFrame
         DataFrame with both float and string columns
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
@@ -15,104 +15,98 @@
 import pandas._testing as tm
 
 
+@pytest.fixture
+def dirpath(datapath):
+    return datapath("io", "sas", "data")
+
+
+@pytest.fixture(params=[(1, range(1, 16)), (2, [16])])
+def data_test_ix(request, dirpath):
+    i, test_ix = request.param
+    fname = os.path.join(dirpath, f"test_sas7bdat_{i}.csv")
+    df = pd.read_csv(fname)
+    epoch = datetime(1960, 1, 1)
+    t1 = pd.to_timedelta(df["Column4"], unit="d")
+    df["Column4"] = epoch + t1
+    t2 = pd.to_timedelta(df["Column12"], unit="d")
+    df["Column12"] = epoch + t2
+    for k in range(df.shape[1]):
+        col = df.iloc[:, k]
+        if col.dtype == np.int64:
+            df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
+    return df, test_ix
+
+
 # https://github.com/cython/cython/issues/1720
 @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
 class TestSAS7BDAT:
-    @pytest.fixture(autouse=True)
-    def setup_method(self, datapath):
-        self.dirpath = datapath("io", "sas", "data")
-        self.data = []
-        self.test_ix = [list(range(1, 16)), [16]]
-        for j in 1, 2:
-            fname = os.path.join(self.dirpath, f"test_sas7bdat_{j}.csv")
-            df = pd.read_csv(fname)
-            epoch = datetime(1960, 1, 1)
-            t1 = pd.to_timedelta(df["Column4"], unit="d")
-            df["Column4"] = epoch + t1
-            t2 = pd.to_timedelta(df["Column12"], unit="d")
-            df["Column12"] = epoch + t2
-            for k in range(df.shape[1]):
-                col = df.iloc[:, k]
-                if col.dtype == np.int64:
-                    df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
-            self.data.append(df)
-
     @pytest.mark.slow
-    def test_from_file(self):
-        for j in 0, 1:
-            df0 = self.data[j]
-            for k in self.test_ix[j]:
-                fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
-                df = pd.read_sas(fname, encoding="utf-8")
-                tm.assert_frame_equal(df, df0)
+    def test_from_file(self, dirpath, data_test_ix):
+        df0, test_ix = data_test_ix
+        for k in test_ix:
+            fname = os.path.join(dirpath, f"test{k}.sas7bdat")
+            df = pd.read_sas(fname, encoding="utf-8")
+            tm.assert_frame_equal(df, df0)
 
     @pytest.mark.slow
-    def test_from_buffer(self):
-        for j in 0, 1:
-            df0 = self.data[j]
-            for k in self.test_ix[j]:
-                fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
-                with open(fname, "rb") as f:
-                    byts = f.read()
-                buf = io.BytesIO(byts)
-                with pd.read_sas(
-                    buf, format="sas7bdat", iterator=True, encoding="utf-8"
-                ) as rdr:
-                    df = rdr.read()
-                tm.assert_frame_equal(df, df0, check_exact=False)
+    def test_from_buffer(self, dirpath, data_test_ix):
+        df0, test_ix = data_test_ix
+        for k in test_ix:
+            fname = os.path.join(dirpath, f"test{k}.sas7bdat")
+            with open(fname, "rb") as f:
+                byts = f.read()
+            buf = io.BytesIO(byts)
+            with pd.read_sas(
+                buf, format="sas7bdat", iterator=True, encoding="utf-8"
+            ) as rdr:
+                df = rdr.read()
+            tm.assert_frame_equal(df, df0, check_exact=False)
 
     @pytest.mark.slow
-    def test_from_iterator(self):
-        for j in 0, 1:
-            df0 = self.data[j]
-            for k in self.test_ix[j]:
-                fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
-                with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr:
-                    df = rdr.read(2)
-                    tm.assert_frame_equal(df, df0.iloc[0:2, :])
-                    df = rdr.read(3)
-                    tm.assert_frame_equal(df, df0.iloc[2:5, :])
+    def test_from_iterator(self, dirpath, data_test_ix):
+        df0, test_ix = data_test_ix
+        for k in test_ix:
+            fname = os.path.join(dirpath, f"test{k}.sas7bdat")
+            with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr:
+                df = rdr.read(2)
+                tm.assert_frame_equal(df, df0.iloc[0:2, :])
+                df = rdr.read(3)
+                tm.assert_frame_equal(df, df0.iloc[2:5, :])
 
     @pytest.mark.slow
-    def test_path_pathlib(self):
-        for j in 0, 1:
-            df0 = self.data[j]
-            for k in self.test_ix[j]:
-                fname = Path(os.path.join(self.dirpath, f"test{k}.sas7bdat"))
-                df = pd.read_sas(fname, encoding="utf-8")
-                tm.assert_frame_equal(df, df0)
+    def test_path_pathlib(self, dirpath, data_test_ix):
+        df0, test_ix = data_test_ix
+        for k in test_ix:
+            fname = Path(os.path.join(dirpath, f"test{k}.sas7bdat"))
+            df = pd.read_sas(fname, encoding="utf-8")
+            tm.assert_frame_equal(df, df0)
 
     @td.skip_if_no("py.path")
     @pytest.mark.slow
-    def test_path_localpath(self):
+    def test_path_localpath(self, dirpath, data_test_ix):
         from py.path import local as LocalPath
 
-        for j in 0, 1:
-            df0 = self.data[j]
-            for k in self.test_ix[j]:
-                fname = LocalPath(os.path.join(self.dirpath, f"test{k}.sas7bdat"))
-                df = pd.read_sas(fname, encoding="utf-8")
-                tm.assert_frame_equal(df, df0)
+        df0, test_ix = data_test_ix
+        for k in test_ix:
+            fname = LocalPath(os.path.join(dirpath, f"test{k}.sas7bdat"))
+            df = pd.read_sas(fname, encoding="utf-8")
+            tm.assert_frame_equal(df, df0)
 
     @pytest.mark.slow
-    def test_iterator_loop(self):
+    @pytest.mark.parametrize("chunksize", (3, 5, 10, 11))
+    @pytest.mark.parametrize("k", range(1, 17))
+    def test_iterator_loop(self, dirpath, k, chunksize):
         # github #13654
-        for j in 0, 1:
-            for k in self.test_ix[j]:
-                for chunksize in (3, 5, 10, 11):
-                    fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
-                    with pd.read_sas(
-                        fname, chunksize=chunksize, encoding="utf-8"
-                    ) as rdr:
-                        y = 0
-                        for x in rdr:
-                            y += x.shape[0]
-                    assert y == rdr.row_count
-
-    def test_iterator_read_too_much(self):
+        fname = os.path.join(dirpath, f"test{k}.sas7bdat")
+        with pd.read_sas(fname, chunksize=chunksize, encoding="utf-8") as rdr:
+            y = 0
+            for x in rdr:
+                y += x.shape[0]
+        assert y == rdr.row_count
+
+    def test_iterator_read_too_much(self, dirpath):
         # github #14734
-        k = self.test_ix[0][0]
-        fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
+        fname = os.path.join(dirpath, "test1.sas7bdat")
         with pd.read_sas(
             fname, format="sas7bdat", iterator=True, encoding="utf-8"
         ) as rdr:
@@ -183,19 +177,17 @@ def test_date_time(datapath):
     tm.assert_frame_equal(df, df0)
 
 
-def test_compact_numerical_values(datapath):
+@pytest.mark.parametrize("column", ["WGT", "CYL"])
+def test_compact_numerical_values(datapath, column):
     # Regression test for #21616
     fname = datapath("io", "sas", "data", "cars.sas7bdat")
     df = pd.read_sas(fname, encoding="latin-1")
     # The two columns CYL and WGT in cars.sas7bdat have column
     # width < 8 and only contain integral values.
     # Test that pandas doesn't corrupt the numbers by adding
     # decimals.
-    result = df["WGT"]
-    expected = df["WGT"].round()
-    tm.assert_series_equal(result, expected, check_exact=True)
-    result = df["CYL"]
-    expected = df["CYL"].round()
+    result = df[column]
+    expected = df[column].round()
     tm.assert_series_equal(result, expected, check_exact=True)
 
 
diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py
@@ -45,7 +45,7 @@ def test_series_getitem_multiindex_xs_by_label(self):
         result = ser.xs("one", level="L2")
         tm.assert_series_equal(result, expected)
 
-    def test_series_getitem_multiindex_xs(xs):
+    def test_series_getitem_multiindex_xs(self):
         # GH#6258
         dt = list(date_range("20130903", periods=3))
         idx = MultiIndex.from_product([list("AB"), dt])

Original file line number	Diff line number	Diff line change
`@@ -173,7 +173,7 @@ def test_column_dups_indexes(self):`
`173`	`173`	`this_df = df.copy()`
`174`	`174`	`expected_ser = Series(index.values, index=this_df.index)`
`175`	`175`	`expected_df = DataFrame(`
`176`		`- {"A": expected_ser, "B": this_df["B"], "A": expected_ser},`
	`176`	`+ {"A": expected_ser, "B": this_df["B"]},`
`177`	`177`	`columns=["A", "B", "A"],`
`178`	`178`	`)`
`179`	`179`	`this_df["A"] = index`