|
15 | 15 | import pandas._testing as tm |
16 | 16 |
|
17 | 17 |
|
| 18 | +@pytest.fixture |
| 19 | +def dirpath(datapath): |
| 20 | + return datapath("io", "sas", "data") |
| 21 | + |
| 22 | + |
| 23 | +@pytest.fixture(params=[(1, range(1, 16)), (2, [16])]) |
| 24 | +def data_test_ix(request, dirpath): |
| 25 | + i, test_ix = request.param |
| 26 | + fname = os.path.join(dirpath, f"test_sas7bdat_{i}.csv") |
| 27 | + df = pd.read_csv(fname) |
| 28 | + epoch = datetime(1960, 1, 1) |
| 29 | + t1 = pd.to_timedelta(df["Column4"], unit="d") |
| 30 | + df["Column4"] = epoch + t1 |
| 31 | + t2 = pd.to_timedelta(df["Column12"], unit="d") |
| 32 | + df["Column12"] = epoch + t2 |
| 33 | + for k in range(df.shape[1]): |
| 34 | + col = df.iloc[:, k] |
| 35 | + if col.dtype == np.int64: |
| 36 | + df.iloc[:, k] = df.iloc[:, k].astype(np.float64) |
| 37 | + return df, test_ix |
| 38 | + |
| 39 | + |
18 | 40 | # https://github.com/cython/cython/issues/1720 |
19 | 41 | @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") |
20 | 42 | class TestSAS7BDAT: |
21 | | - @pytest.fixture(autouse=True) |
22 | | - def setup_method(self, datapath): |
23 | | - self.dirpath = datapath("io", "sas", "data") |
24 | | - self.data = [] |
25 | | - self.test_ix = [list(range(1, 16)), [16]] |
26 | | - for j in 1, 2: |
27 | | - fname = os.path.join(self.dirpath, f"test_sas7bdat_{j}.csv") |
28 | | - df = pd.read_csv(fname) |
29 | | - epoch = datetime(1960, 1, 1) |
30 | | - t1 = pd.to_timedelta(df["Column4"], unit="d") |
31 | | - df["Column4"] = epoch + t1 |
32 | | - t2 = pd.to_timedelta(df["Column12"], unit="d") |
33 | | - df["Column12"] = epoch + t2 |
34 | | - for k in range(df.shape[1]): |
35 | | - col = df.iloc[:, k] |
36 | | - if col.dtype == np.int64: |
37 | | - df.iloc[:, k] = df.iloc[:, k].astype(np.float64) |
38 | | - self.data.append(df) |
39 | | - |
40 | 43 | @pytest.mark.slow |
41 | | - def test_from_file(self): |
42 | | - for j in 0, 1: |
43 | | - df0 = self.data[j] |
44 | | - for k in self.test_ix[j]: |
45 | | - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
46 | | - df = pd.read_sas(fname, encoding="utf-8") |
47 | | - tm.assert_frame_equal(df, df0) |
| 44 | + def test_from_file(self, dirpath, data_test_ix): |
| 45 | + df0, test_ix = data_test_ix |
| 46 | + for k in test_ix: |
| 47 | + fname = os.path.join(dirpath, f"test{k}.sas7bdat") |
| 48 | + df = pd.read_sas(fname, encoding="utf-8") |
| 49 | + tm.assert_frame_equal(df, df0) |
48 | 50 |
|
49 | 51 | @pytest.mark.slow |
50 | | - def test_from_buffer(self): |
51 | | - for j in 0, 1: |
52 | | - df0 = self.data[j] |
53 | | - for k in self.test_ix[j]: |
54 | | - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
55 | | - with open(fname, "rb") as f: |
56 | | - byts = f.read() |
57 | | - buf = io.BytesIO(byts) |
58 | | - with pd.read_sas( |
59 | | - buf, format="sas7bdat", iterator=True, encoding="utf-8" |
60 | | - ) as rdr: |
61 | | - df = rdr.read() |
62 | | - tm.assert_frame_equal(df, df0, check_exact=False) |
| 52 | + def test_from_buffer(self, dirpath, data_test_ix): |
| 53 | + df0, test_ix = data_test_ix |
| 54 | + for k in test_ix: |
| 55 | + fname = os.path.join(dirpath, f"test{k}.sas7bdat") |
| 56 | + with open(fname, "rb") as f: |
| 57 | + byts = f.read() |
| 58 | + buf = io.BytesIO(byts) |
| 59 | + with pd.read_sas( |
| 60 | + buf, format="sas7bdat", iterator=True, encoding="utf-8" |
| 61 | + ) as rdr: |
| 62 | + df = rdr.read() |
| 63 | + tm.assert_frame_equal(df, df0, check_exact=False) |
63 | 64 |
|
64 | 65 | @pytest.mark.slow |
65 | | - def test_from_iterator(self): |
66 | | - for j in 0, 1: |
67 | | - df0 = self.data[j] |
68 | | - for k in self.test_ix[j]: |
69 | | - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
70 | | - with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr: |
71 | | - df = rdr.read(2) |
72 | | - tm.assert_frame_equal(df, df0.iloc[0:2, :]) |
73 | | - df = rdr.read(3) |
74 | | - tm.assert_frame_equal(df, df0.iloc[2:5, :]) |
| 66 | + def test_from_iterator(self, dirpath, data_test_ix): |
| 67 | + df0, test_ix = data_test_ix |
| 68 | + for k in test_ix: |
| 69 | + fname = os.path.join(dirpath, f"test{k}.sas7bdat") |
| 70 | + with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr: |
| 71 | + df = rdr.read(2) |
| 72 | + tm.assert_frame_equal(df, df0.iloc[0:2, :]) |
| 73 | + df = rdr.read(3) |
| 74 | + tm.assert_frame_equal(df, df0.iloc[2:5, :]) |
75 | 75 |
|
76 | 76 | @pytest.mark.slow |
77 | | - def test_path_pathlib(self): |
78 | | - for j in 0, 1: |
79 | | - df0 = self.data[j] |
80 | | - for k in self.test_ix[j]: |
81 | | - fname = Path(os.path.join(self.dirpath, f"test{k}.sas7bdat")) |
82 | | - df = pd.read_sas(fname, encoding="utf-8") |
83 | | - tm.assert_frame_equal(df, df0) |
| 77 | + def test_path_pathlib(self, dirpath, data_test_ix): |
| 78 | + df0, test_ix = data_test_ix |
| 79 | + for k in test_ix: |
| 80 | + fname = Path(os.path.join(dirpath, f"test{k}.sas7bdat")) |
| 81 | + df = pd.read_sas(fname, encoding="utf-8") |
| 82 | + tm.assert_frame_equal(df, df0) |
84 | 83 |
|
85 | 84 | @td.skip_if_no("py.path") |
86 | 85 | @pytest.mark.slow |
87 | | - def test_path_localpath(self): |
| 86 | + def test_path_localpath(self, dirpath, data_test_ix): |
88 | 87 | from py.path import local as LocalPath |
89 | 88 |
|
90 | | - for j in 0, 1: |
91 | | - df0 = self.data[j] |
92 | | - for k in self.test_ix[j]: |
93 | | - fname = LocalPath(os.path.join(self.dirpath, f"test{k}.sas7bdat")) |
94 | | - df = pd.read_sas(fname, encoding="utf-8") |
95 | | - tm.assert_frame_equal(df, df0) |
| 89 | + df0, test_ix = data_test_ix |
| 90 | + for k in test_ix: |
| 91 | + fname = LocalPath(os.path.join(dirpath, f"test{k}.sas7bdat")) |
| 92 | + df = pd.read_sas(fname, encoding="utf-8") |
| 93 | + tm.assert_frame_equal(df, df0) |
96 | 94 |
|
97 | 95 | @pytest.mark.slow |
98 | | - def test_iterator_loop(self): |
| 96 | + @pytest.mark.parametrize("chunksize", (3, 5, 10, 11)) |
| 97 | + @pytest.mark.parametrize("k", range(1, 17)) |
| 98 | + def test_iterator_loop(self, dirpath, k, chunksize): |
99 | 99 | # github #13654 |
100 | | - for j in 0, 1: |
101 | | - for k in self.test_ix[j]: |
102 | | - for chunksize in (3, 5, 10, 11): |
103 | | - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
104 | | - with pd.read_sas( |
105 | | - fname, chunksize=chunksize, encoding="utf-8" |
106 | | - ) as rdr: |
107 | | - y = 0 |
108 | | - for x in rdr: |
109 | | - y += x.shape[0] |
110 | | - assert y == rdr.row_count |
111 | | - |
112 | | - def test_iterator_read_too_much(self): |
| 100 | + fname = os.path.join(dirpath, f"test{k}.sas7bdat") |
| 101 | + with pd.read_sas(fname, chunksize=chunksize, encoding="utf-8") as rdr: |
| 102 | + y = 0 |
| 103 | + for x in rdr: |
| 104 | + y += x.shape[0] |
| 105 | + assert y == rdr.row_count |
| 106 | + |
| 107 | + def test_iterator_read_too_much(self, dirpath): |
113 | 108 | # github #14734 |
114 | | - k = self.test_ix[0][0] |
115 | | - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") |
| 109 | + fname = os.path.join(dirpath, "test1.sas7bdat") |
116 | 110 | with pd.read_sas( |
117 | 111 | fname, format="sas7bdat", iterator=True, encoding="utf-8" |
118 | 112 | ) as rdr: |
@@ -183,19 +177,17 @@ def test_date_time(datapath): |
183 | 177 | tm.assert_frame_equal(df, df0) |
184 | 178 |
|
185 | 179 |
|
186 | | -def test_compact_numerical_values(datapath): |
| 180 | +@pytest.mark.parametrize("column", ["WGT", "CYL"]) |
| 181 | +def test_compact_numerical_values(datapath, column): |
187 | 182 | # Regression test for #21616 |
188 | 183 | fname = datapath("io", "sas", "data", "cars.sas7bdat") |
189 | 184 | df = pd.read_sas(fname, encoding="latin-1") |
190 | 185 | # The two columns CYL and WGT in cars.sas7bdat have column |
191 | 186 | # width < 8 and only contain integral values. |
192 | 187 | # Test that pandas doesn't corrupt the numbers by adding |
193 | 188 | # decimals. |
194 | | - result = df["WGT"] |
195 | | - expected = df["WGT"].round() |
196 | | - tm.assert_series_equal(result, expected, check_exact=True) |
197 | | - result = df["CYL"] |
198 | | - expected = df["CYL"].round() |
| 189 | + result = df[column] |
| 190 | + expected = df[column].round() |
199 | 191 | tm.assert_series_equal(result, expected, check_exact=True) |
200 | 192 |
|
201 | 193 |
|
|
0 commit comments