@@ -70,12 +70,20 @@ def get_writer(engine_name):
7070 except KeyError :
7171 raise ValueError ("No Excel writer '%s'" % engine_name )
7272
73-
74- excel_doc_common = """
73+ def read_excel (io , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
74+ index_col = None , parse_cols = None , parse_dates = False ,
75+ date_parser = None , na_values = None , thousands = None ,
76+ convert_float = True , has_index_names = None , converters = None ,
77+ engine = None , ** kwds ):
78+ """
7579 Read an Excel table into a pandas DataFrame
7680
7781 Parameters
78- ----------%(io)s
82+ ----------
83+ io : string, file-like object, pandas ExcelFile, or xlrd workbook.
84+ The string could be a URL. Valid URL schemes include http, ftp, s3,
85+ and file. For file URLs, a host is expected. For instance, a local
86+ file could be file://localhost/path/to/workbook.xlsx
7987 sheetname : string, int, mixed list of strings/ints, or None, default 0
8088
8189 Strings are used for sheet names, Integers are used in zero-indexed sheet
@@ -122,18 +130,24 @@ def get_writer(engine_name):
122130 na_values : list-like, default None
123131 List of additional strings to recognize as NA/NaN
124132 thousands : str, default None
125- Thousands separator
133+ Thousands separator for parsing string columns to numeric. Note that
134+ this parameter is only necessary for columns stored as TEXT in Excel,
135+ any numeric columns will automatically be parsed, regardless of display
136+ format.
126137 keep_default_na : bool, default True
127138 If na_values are specified and keep_default_na is False the default NaN
128139 values are overridden, otherwise they're appended to
129140 verbose : boolean, default False
130- Indicate number of NA values placed in non-numeric columns%(eng)s
141+ Indicate number of NA values placed in non-numeric columns
142+ engine: string, default None
143+ If io is not a buffer or path, this must be set to identify io.
144+ Acceptable values are None or xlrd
131145 convert_float : boolean, default True
132146 convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
133147 data will be read in as floats: Excel stores all numbers as floats
134148 internally
135149 has_index_names : boolean, default None
136- DEPCRECATED : for version 0.17+ index names will be automatically inferred
150+ DEPRECATED : for version 0.17+ index names will be automatically inferred
137151 based on index_col. To read Excel output from 0.16.2 and prior that
138152 had saved index names, use True.
139153
@@ -144,28 +158,21 @@ def get_writer(engine_name):
144158 for more information on when a Dict of Dataframes is returned.
145159
146160 """
147- read_excel_kwargs = dict ()
148- read_excel_kwargs ['io' ] = """
149- io : string, file-like object, or xlrd workbook.
150- The string could be a URL. Valid URL schemes include http, ftp, s3,
151- and file. For file URLs, a host is expected. For instance, a local
152- file could be file://localhost/path/to/workbook.xlsx"""
153- read_excel_kwargs ['eng' ] = """
154- engine: string, default None
155- If io is not a buffer or path, this must be set to identify io.
156- Acceptable values are None or xlrd"""
157-
158- @Appender (excel_doc_common % read_excel_kwargs )
159- def read_excel (io , sheetname = 0 , ** kwds ):
160- engine = kwds .pop ('engine' , None )
161161
162- return ExcelFile (io , engine = engine ).parse (sheetname = sheetname , ** kwds )
162+ if not isinstance (io , ExcelFile ):
163+ io = ExcelFile (io , engine = engine )
163164
165+ return io ._parse_excel (
166+ sheetname = sheetname , header = header , skiprows = skiprows ,
167+ index_col = index_col , parse_cols = parse_cols , parse_dates = parse_dates ,
168+ date_parser = date_parser , na_values = na_values , thousands = thousands ,
169+ convert_float = convert_float , has_index_names = has_index_names ,
170+ skip_footer = skip_footer , converters = converters , ** kwds )
164171
165172class ExcelFile (object ):
166173 """
167174 Class for parsing tabular excel sheets into DataFrame objects.
168- Uses xlrd. See ExcelFile.parse for more documentation
175+ Uses xlrd. See read_excel for more documentation
169176
170177 Parameters
171178 ----------
@@ -207,23 +214,16 @@ def __init__(self, io, **kwds):
207214 raise ValueError ('Must explicitly set engine if not passing in'
208215 ' buffer or path for io.' )
209216
210- @Appender (excel_doc_common % dict (io = '' , eng = '' ))
211217 def parse (self , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
212218 index_col = None , parse_cols = None , parse_dates = False ,
213- date_parser = None , na_values = None , thousands = None , chunksize = None ,
219+ date_parser = None , na_values = None , thousands = None ,
214220 convert_float = True , has_index_names = None , converters = None , ** kwds ):
221+ """
222+ Parse specified sheet(s) into a DataFrame
215223
216- skipfooter = kwds .pop ('skipfooter' , None )
217- if skipfooter is not None :
218- skip_footer = skipfooter
219-
220- _validate_header_arg (header )
221- if has_index_names is not None :
222- warn ("\n The has_index_names argument is deprecated; index names "
223- "will be automatically inferred based on index_col.\n "
224- "This argmument is still necessary if reading Excel output "
225- "from 0.16.2 or prior with index names." , FutureWarning ,
226- stacklevel = 3 )
224+ Equivalent to read_excel(ExcelFile, ...) See the read_excel
225+ docstring for more info on accepted parameters
226+ """
227227
228228 return self ._parse_excel (sheetname = sheetname , header = header ,
229229 skiprows = skiprows ,
@@ -232,7 +232,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
232232 parse_cols = parse_cols ,
233233 parse_dates = parse_dates ,
234234 date_parser = date_parser , na_values = na_values ,
235- thousands = thousands , chunksize = chunksize ,
235+ thousands = thousands ,
236236 skip_footer = skip_footer ,
237237 convert_float = convert_float ,
238238 converters = converters ,
@@ -274,8 +274,25 @@ def _excel2num(x):
274274 def _parse_excel (self , sheetname = 0 , header = 0 , skiprows = None , skip_footer = 0 ,
275275 index_col = None , has_index_names = None , parse_cols = None ,
276276 parse_dates = False , date_parser = None , na_values = None ,
277- thousands = None , chunksize = None , convert_float = True ,
277+ thousands = None , convert_float = True ,
278278 verbose = False , ** kwds ):
279+
280+ skipfooter = kwds .pop ('skipfooter' , None )
281+ if skipfooter is not None :
282+ skip_footer = skipfooter
283+
284+ _validate_header_arg (header )
285+ if has_index_names is not None :
286+ warn ("\n The has_index_names argument is deprecated; index names "
287+ "will be automatically inferred based on index_col.\n "
288+ "This argmument is still necessary if reading Excel output "
289+ "from 0.16.2 or prior with index names." , FutureWarning ,
290+ stacklevel = 3 )
291+
292+ if 'chunksize' in kwds :
293+ raise NotImplementedError ("Reading an Excel file in chunks "
294+ "is not implemented" )
295+
279296 import xlrd
280297 from xlrd import (xldate , XL_CELL_DATE ,
281298 XL_CELL_ERROR , XL_CELL_BOOLEAN ,
@@ -416,7 +433,6 @@ def _parse_cell(cell_contents,cell_typ):
416433 date_parser = date_parser ,
417434 skiprows = skiprows ,
418435 skip_footer = skip_footer ,
419- chunksize = chunksize ,
420436 ** kwds )
421437
422438 output [asheetname ] = parser .read ()
0 commit comments