1- import os
2-
31import numpy as np
42import pytest
53
@@ -24,145 +22,154 @@ def numeric_as_float(data):
2422
2523class TestXport :
2624 @pytest .fixture (autouse = True )
27- def setup_method (self , datapath ):
28- self .dirpath = datapath ("io" , "sas" , "data" )
29- self .file01 = os .path .join (self .dirpath , "DEMO_G.xpt" )
30- self .file02 = os .path .join (self .dirpath , "SSHSV1_A.xpt" )
31- self .file03 = os .path .join (self .dirpath , "DRXFCD_G.xpt" )
32- self .file04 = os .path .join (self .dirpath , "paxraw_d_short.xpt" )
33- self .file05 = os .path .join (self .dirpath , "DEMO_PUF.cpt" )
34-
25+ def setup_method (self ):
3526 with td .file_leak_context ():
3627 yield
3728
29+ @pytest .fixture
30+ def file01 (self , datapath ):
31+ return datapath ("io" , "sas" , "data" , "DEMO_G.xpt" )
32+
33+ @pytest .fixture
34+ def file02 (self , datapath ):
35+ return datapath ("io" , "sas" , "data" , "SSHSV1_A.xpt" )
36+
37+ @pytest .fixture
38+ def file03 (self , datapath ):
39+ return datapath ("io" , "sas" , "data" , "DRXFCD_G.xpt" )
40+
41+ @pytest .fixture
42+ def file04 (self , datapath ):
43+ return datapath ("io" , "sas" , "data" , "paxraw_d_short.xpt" )
44+
45+ @pytest .fixture
46+ def file05 (self , datapath ):
47+ return datapath ("io" , "sas" , "data" , "DEMO_PUF.cpt" )
48+
3849 @pytest .mark .slow
39- def test1_basic (self ):
50+ def test1_basic (self , file01 ):
4051 # Tests with DEMO_G.xpt (all numeric file)
4152
4253 # Compare to this
43- data_csv = pd .read_csv (self . file01 .replace (".xpt" , ".csv" ))
54+ data_csv = pd .read_csv (file01 .replace (".xpt" , ".csv" ))
4455 numeric_as_float (data_csv )
4556
4657 # Read full file
47- data = read_sas (self . file01 , format = "xport" )
58+ data = read_sas (file01 , format = "xport" )
4859 tm .assert_frame_equal (data , data_csv )
4960 num_rows = data .shape [0 ]
5061
5162 # Test reading beyond end of file
52- with read_sas (self . file01 , format = "xport" , iterator = True ) as reader :
63+ with read_sas (file01 , format = "xport" , iterator = True ) as reader :
5364 data = reader .read (num_rows + 100 )
5465 assert data .shape [0 ] == num_rows
5566
5667 # Test incremental read with `read` method.
57- with read_sas (self . file01 , format = "xport" , iterator = True ) as reader :
68+ with read_sas (file01 , format = "xport" , iterator = True ) as reader :
5869 data = reader .read (10 )
5970 tm .assert_frame_equal (data , data_csv .iloc [0 :10 , :])
6071
6172 # Test incremental read with `get_chunk` method.
62- with read_sas (self . file01 , format = "xport" , chunksize = 10 ) as reader :
73+ with read_sas (file01 , format = "xport" , chunksize = 10 ) as reader :
6374 data = reader .get_chunk ()
6475 tm .assert_frame_equal (data , data_csv .iloc [0 :10 , :])
6576
6677 # Test read in loop
6778 m = 0
68- with read_sas (self . file01 , format = "xport" , chunksize = 100 ) as reader :
79+ with read_sas (file01 , format = "xport" , chunksize = 100 ) as reader :
6980 for x in reader :
7081 m += x .shape [0 ]
7182 assert m == num_rows
7283
7384 # Read full file with `read_sas` method
74- data = read_sas (self . file01 )
85+ data = read_sas (file01 )
7586 tm .assert_frame_equal (data , data_csv )
7687
77- def test1_index (self ):
88+ def test1_index (self , file01 ):
7889 # Tests with DEMO_G.xpt using index (all numeric file)
7990
8091 # Compare to this
81- data_csv = pd .read_csv (self . file01 .replace (".xpt" , ".csv" ))
92+ data_csv = pd .read_csv (file01 .replace (".xpt" , ".csv" ))
8293 data_csv = data_csv .set_index ("SEQN" )
8394 numeric_as_float (data_csv )
8495
8596 # Read full file
86- data = read_sas (self . file01 , index = "SEQN" , format = "xport" )
97+ data = read_sas (file01 , index = "SEQN" , format = "xport" )
8798 tm .assert_frame_equal (data , data_csv , check_index_type = False )
8899
89100 # Test incremental read with `read` method.
90- with read_sas (
91- self .file01 , index = "SEQN" , format = "xport" , iterator = True
92- ) as reader :
101+ with read_sas (file01 , index = "SEQN" , format = "xport" , iterator = True ) as reader :
93102 data = reader .read (10 )
94103 tm .assert_frame_equal (data , data_csv .iloc [0 :10 , :], check_index_type = False )
95104
96105 # Test incremental read with `get_chunk` method.
97- with read_sas (
98- self .file01 , index = "SEQN" , format = "xport" , chunksize = 10
99- ) as reader :
106+ with read_sas (file01 , index = "SEQN" , format = "xport" , chunksize = 10 ) as reader :
100107 data = reader .get_chunk ()
101108 tm .assert_frame_equal (data , data_csv .iloc [0 :10 , :], check_index_type = False )
102109
103- def test1_incremental (self ):
110+ def test1_incremental (self , file01 ):
104111 # Test with DEMO_G.xpt, reading full file incrementally
105112
106- data_csv = pd .read_csv (self . file01 .replace (".xpt" , ".csv" ))
113+ data_csv = pd .read_csv (file01 .replace (".xpt" , ".csv" ))
107114 data_csv = data_csv .set_index ("SEQN" )
108115 numeric_as_float (data_csv )
109116
110- with read_sas (self . file01 , index = "SEQN" , chunksize = 1000 ) as reader :
117+ with read_sas (file01 , index = "SEQN" , chunksize = 1000 ) as reader :
111118 all_data = list (reader )
112119 data = pd .concat (all_data , axis = 0 )
113120
114121 tm .assert_frame_equal (data , data_csv , check_index_type = False )
115122
116- def test2 (self ):
123+ def test2 (self , file02 ):
117124 # Test with SSHSV1_A.xpt
118125
119126 # Compare to this
120- data_csv = pd .read_csv (self . file02 .replace (".xpt" , ".csv" ))
127+ data_csv = pd .read_csv (file02 .replace (".xpt" , ".csv" ))
121128 numeric_as_float (data_csv )
122129
123- data = read_sas (self . file02 )
130+ data = read_sas (file02 )
124131 tm .assert_frame_equal (data , data_csv )
125132
126- def test2_binary (self ):
133+ def test2_binary (self , file02 ):
127134 # Test with SSHSV1_A.xpt, read as a binary file
128135
129136 # Compare to this
130- data_csv = pd .read_csv (self . file02 .replace (".xpt" , ".csv" ))
137+ data_csv = pd .read_csv (file02 .replace (".xpt" , ".csv" ))
131138 numeric_as_float (data_csv )
132139
133- with open (self . file02 , "rb" ) as fd :
140+ with open (file02 , "rb" ) as fd :
134141 with td .file_leak_context ():
135142 # GH#35693 ensure that if we pass an open file, we
136143 # dont incorrectly close it in read_sas
137144 data = read_sas (fd , format = "xport" )
138145
139146 tm .assert_frame_equal (data , data_csv )
140147
141- def test_multiple_types (self ):
148+ def test_multiple_types (self , file03 ):
142149 # Test with DRXFCD_G.xpt (contains text and numeric variables)
143150
144151 # Compare to this
145- data_csv = pd .read_csv (self . file03 .replace (".xpt" , ".csv" ))
152+ data_csv = pd .read_csv (file03 .replace (".xpt" , ".csv" ))
146153
147- data = read_sas (self . file03 , encoding = "utf-8" )
154+ data = read_sas (file03 , encoding = "utf-8" )
148155 tm .assert_frame_equal (data , data_csv )
149156
150- def test_truncated_float_support (self ):
157+ def test_truncated_float_support (self , file04 ):
151158 # Test with paxraw_d_short.xpt, a shortened version of:
152159 # http://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/PAXRAW_D.ZIP
153160 # This file has truncated floats (5 bytes in this case).
154161
155162 # GH 11713
156163
157- data_csv = pd .read_csv (self . file04 .replace (".xpt" , ".csv" ))
164+ data_csv = pd .read_csv (file04 .replace (".xpt" , ".csv" ))
158165
159- data = read_sas (self . file04 , format = "xport" )
166+ data = read_sas (file04 , format = "xport" )
160167 tm .assert_frame_equal (data .astype ("int64" ), data_csv )
161168
162- def test_cport_header_found_raises (self ):
169+ def test_cport_header_found_raises (self , file05 ):
163170 # Test with DEMO_PUF.cpt, the beginning of puf2019_1_fall.xpt
164171 # from https://www.cms.gov/files/zip/puf2019.zip
165172 # (despite the extension, it's a cpt file)
166173 msg = "Header record indicates a CPORT file, which is not readable."
167174 with pytest .raises (ValueError , match = msg ):
168- read_sas (self . file05 , format = "xport" )
175+ read_sas (file05 , format = "xport" )
0 commit comments