@@ -640,7 +640,7 @@ def get_handle(
640640 .. versionchanged:: 1.4.0 Zstandard support.
641641
642642 memory_map : bool, default False
643- See parsers._parser_params for more information.
643+ See parsers._parser_params for more information. Only used by read_csv.
644644 is_text : bool, default True
645645 Whether the type of the content passed to the file/buffer is string or
646646 bytes. This is not the same as `"b" not in mode`. If a string content is
@@ -659,6 +659,8 @@ def get_handle(
659659 # Windows does not default to utf-8. Set to utf-8 for a consistent behavior
660660 encoding = encoding or "utf-8"
661661
662+ errors = errors or "strict"
663+
662664 # read_csv does not know whether the buffer is opened in binary/text mode
663665 if _is_binary_mode (path_or_buf , mode ) and "b" not in mode :
664666 mode += "b"
@@ -681,6 +683,7 @@ def get_handle(
681683 handles : list [BaseBuffer ]
682684
683685 # memory mapping needs to be the first step
686+ # only used for read_csv
684687 handle , memory_map , handles = _maybe_memory_map (
685688 handle ,
686689 memory_map ,
@@ -1064,7 +1067,7 @@ def closed(self):
10641067 return self .fp is None
10651068
10661069
1067- class _MMapWrapper (abc .Iterator ):
1070+ class _CSVMMapWrapper (abc .Iterator ):
10681071 """
10691072 Wrapper for the Python's mmap class so that it can be properly read in
10701073 by Python's csv.reader class.
@@ -1079,7 +1082,7 @@ class _MMapWrapper(abc.Iterator):
10791082
10801083 def __init__ (
10811084 self ,
1082- f : IO ,
1085+ f : ReadBuffer [ bytes ] ,
10831086 encoding : str = "utf-8" ,
10841087 errors : str = "strict" ,
10851088 decode : bool = True ,
@@ -1089,19 +1092,21 @@ def __init__(
10891092 self .decoder = codecs .getincrementaldecoder (encoding )(errors = errors )
10901093 self .decode = decode
10911094
1095+ # needed for compression libraries and TextIOWrapper
10921096 self .attributes = {}
10931097 for attribute in ("seekable" , "readable" ):
10941098 if not hasattr (f , attribute ):
10951099 continue
10961100 self .attributes [attribute ] = getattr (f , attribute )()
1101+
10971102 self .mmap = mmap .mmap (f .fileno (), 0 , access = mmap .ACCESS_READ )
10981103
10991104 def __getattr__ (self , name : str ):
11001105 if name in self .attributes :
11011106 return lambda : self .attributes [name ]
11021107 return getattr (self .mmap , name )
11031108
1104- def __iter__ (self ) -> _MMapWrapper :
1109+ def __iter__ (self ) -> _CSVMMapWrapper :
11051110 return self
11061111
11071112 def read (self , size : int = - 1 ) -> str | bytes :
@@ -1196,7 +1201,7 @@ def _maybe_memory_map(
11961201 memory_map : bool ,
11971202 encoding : str ,
11981203 mode : str ,
1199- errors : str | None ,
1204+ errors : str ,
12001205 decode : bool ,
12011206) -> tuple [str | BaseBuffer , bool , list [BaseBuffer ]]:
12021207 """Try to memory map file/buffer."""
@@ -1207,25 +1212,22 @@ def _maybe_memory_map(
12071212
12081213 # need to open the file first
12091214 if isinstance (handle , str ):
1210- if encoding and "b" not in mode :
1211- # Encoding
1212- handle = open (handle , mode , encoding = encoding , errors = errors , newline = "" )
1213- else :
1214- # Binary mode
1215- handle = open (handle , mode )
1215+ handle = open (handle , "rb" )
12161216 handles .append (handle )
12171217
12181218 # error: Argument 1 to "_MMapWrapper" has incompatible type "Union[IO[Any],
12191219 # RawIOBase, BufferedIOBase, TextIOBase, mmap]"; expected "IO[Any]"
12201220 try :
1221+ # open mmap, adds *-able, and convert to string
12211222 wrapped = cast (
12221223 BaseBuffer ,
1223- _MMapWrapper (handle , encoding , errors , decode ), # type: ignore[arg-type]
1224+ _CSVMMapWrapper (handle , encoding , errors , decode ), # type: ignore[arg-type]
12241225 )
12251226 finally :
12261227 for handle in reversed (handles ):
12271228 # error: "BaseBuffer" has no attribute "close"
12281229 handle .close () # type: ignore[attr-defined]
1230+ handles = []
12291231 handles .append (wrapped )
12301232
12311233 return wrapped , memory_map , handles
0 commit comments