22
33import bz2
44import codecs
5- from contextlib import closing , contextmanager
65import csv
76import gzip
7+ from http .client import HTTPException # noqa
88import lzma
99import mmap
1010import os
11+ from urllib .error import URLError # noqa
12+ from urllib .parse import ( # noqa
13+ urlencode , urljoin , urlparse as parse_url , uses_netloc , uses_params ,
14+ uses_relative )
15+ from urllib .request import pathname2url , urlopen
1116import zipfile
1217
1318import pandas .compat as compat
14- from pandas .compat import BytesIO , StringIO , string_types , text_type
19+ from pandas .compat import BytesIO , string_types , text_type
1520from pandas .errors import ( # noqa
1621 AbstractMethodError , DtypeWarning , EmptyDataError , ParserError ,
1722 ParserWarning )
1823
19- from pandas .core .dtypes .common import is_file_like , is_number
20-
21- from pandas .io .formats .printing import pprint_thing
24+ from pandas .core .dtypes .common import is_file_like
2225
2326# gh-12665: Alias for now and remove later.
2427CParserError = ParserError
3134 '-nan' , '' }
3235
3336
34- if compat .PY3 :
35- from urllib .request import urlopen , pathname2url
36- _urlopen = urlopen
37- from urllib .parse import urlparse as parse_url
38- from urllib .parse import (uses_relative , uses_netloc , uses_params ,
39- urlencode , urljoin )
40- from urllib .error import URLError
41- from http .client import HTTPException # noqa
42- else :
43- from urllib2 import urlopen as _urlopen
44- from urllib import urlencode , pathname2url # noqa
45- from urlparse import urlparse as parse_url
46- from urlparse import uses_relative , uses_netloc , uses_params , urljoin
47- from urllib2 import URLError # noqa
48- from httplib import HTTPException # noqa
49- from contextlib import contextmanager , closing # noqa
50- from functools import wraps # noqa
51-
52- # @wraps(_urlopen)
53- @contextmanager
54- def urlopen (* args , ** kwargs ):
55- with closing (_urlopen (* args , ** kwargs )) as f :
56- yield f
57-
58-
5937_VALID_URLS = set (uses_relative + uses_netloc + uses_params )
6038_VALID_URLS .discard ('' )
6139
@@ -72,10 +50,6 @@ def __next__(self):
7250 raise AbstractMethodError (self )
7351
7452
75- if not compat .PY3 :
76- BaseIterator .next = lambda self : self .__next__ ()
77-
78-
7953def _is_url (url ):
8054 """Check to see if a URL has a valid protocol.
8155
@@ -189,7 +163,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
189163 ----------
190164 filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
191165 or buffer
192- encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
166+ compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional
167+ encoding : the encoding to use to decode bytes, default is 'utf-8'
193168 mode : str, optional
194169
195170 Returns
@@ -202,7 +177,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
202177 filepath_or_buffer = _stringify_path (filepath_or_buffer )
203178
204179 if _is_url (filepath_or_buffer ):
205- req = _urlopen (filepath_or_buffer )
180+ req = urlopen (filepath_or_buffer )
206181 content_encoding = req .headers .get ('Content-Encoding' , None )
207182 if content_encoding == 'gzip' :
208183 # Override compression based on Content-Encoding header
@@ -361,10 +336,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
361336
362337 if compression :
363338
364- if compat .PY2 and not is_path and encoding :
365- msg = 'compression with encoding is not yet supported in Python 2'
366- raise ValueError (msg )
367-
368339 # GZ Compression
369340 if compression == 'gzip' :
370341 if is_path :
@@ -376,11 +347,6 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
376347 elif compression == 'bz2' :
377348 if is_path :
378349 f = bz2 .BZ2File (path_or_buf , mode )
379- elif compat .PY2 :
380- # Python 2's bz2 module can't take file objects, so have to
381- # run through decompress manually
382- f = StringIO (bz2 .decompress (path_or_buf .read ()))
383- path_or_buf .close ()
384350 else :
385351 f = bz2 .BZ2File (path_or_buf )
386352
@@ -415,24 +381,19 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
415381 handles .append (f )
416382
417383 elif is_path :
418- if compat .PY2 :
419- # Python 2
420- mode = "wb" if mode == "w" else mode
421- f = open (path_or_buf , mode )
422- elif encoding :
423- # Python 3 and encoding
384+ if encoding :
385+ # Encoding
424386 f = open (path_or_buf , mode , encoding = encoding , newline = "" )
425387 elif is_text :
426- # Python 3 and no explicit encoding
388+ # No explicit encoding
427389 f = open (path_or_buf , mode , errors = 'replace' , newline = "" )
428390 else :
429- # Python 3 and binary mode
391+ # Binary mode
430392 f = open (path_or_buf , mode )
431393 handles .append (f )
432394
433- # in Python 3, convert BytesIO or fileobjects passed with an encoding
434- if (compat .PY3 and is_text and
435- (compression or isinstance (f , need_text_wrapping ))):
395+ # Convert BytesIO or file objects passed with an encoding
396+ if is_text and (compression or isinstance (f , need_text_wrapping )):
436397 from io import TextIOWrapper
437398 f = TextIOWrapper (f , encoding = encoding , newline = '' )
438399 handles .append (f )
@@ -499,11 +460,9 @@ def __iter__(self):
499460 def __next__ (self ):
500461 newline = self .mmap .readline ()
501462
502- # readline returns bytes, not str, in Python 3,
503- # but Python's CSV reader expects str, so convert
504- # the output to str before continuing
505- if compat .PY3 :
506- newline = compat .bytes_to_str (newline )
463+ # readline returns bytes, not str, but Python's CSV reader
464+ # expects str, so convert the output to str before continuing
465+ newline = compat .bytes_to_str (newline )
507466
508467 # mmap doesn't raise if reading past the allocated
509468 # data but instead returns an empty string, so raise
@@ -513,14 +472,10 @@ def __next__(self):
513472 return newline
514473
515474
516- if not compat .PY3 :
517- MMapWrapper .next = lambda self : self .__next__ ()
518-
519-
520475class UTF8Recoder (BaseIterator ):
521476
522477 """
523- Iterator that reads an encoded stream and reencodes the input to UTF-8
478+ Iterator that reads an encoded stream and re-encodes the input to UTF-8
524479 """
525480
526481 def __init__ (self , f , encoding ):
@@ -536,82 +491,12 @@ def next(self):
536491 return next (self .reader ).encode ("utf-8" )
537492
538493
539- if compat .PY3 : # pragma: no cover
540- def UnicodeReader (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
541- # ignore encoding
542- return csv .reader (f , dialect = dialect , ** kwds )
543-
544- def UnicodeWriter (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
545- return csv .writer (f , dialect = dialect , ** kwds )
546- else :
547- class UnicodeReader (BaseIterator ):
548-
549- """
550- A CSV reader which will iterate over lines in the CSV file "f",
551- which is encoded in the given encoding.
552-
553- On Python 3, this is replaced (below) by csv.reader, which handles
554- unicode.
555- """
556-
557- def __init__ (self , f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
558- f = UTF8Recoder (f , encoding )
559- self .reader = csv .reader (f , dialect = dialect , ** kwds )
560-
561- def __next__ (self ):
562- row = next (self .reader )
563- return [compat .text_type (s , "utf-8" ) for s in row ]
564-
565- class UnicodeWriter (object ):
566-
567- """
568- A CSV writer which will write rows to CSV file "f",
569- which is encoded in the given encoding.
570- """
571-
572- def __init__ (self , f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
573- # Redirect output to a queue
574- self .queue = StringIO ()
575- self .writer = csv .writer (self .queue , dialect = dialect , ** kwds )
576- self .stream = f
577- self .encoder = codecs .getincrementalencoder (encoding )()
578- self .quoting = kwds .get ("quoting" , None )
579-
580- def writerow (self , row ):
581- def _check_as_is (x ):
582- return (self .quoting == csv .QUOTE_NONNUMERIC and
583- is_number (x )) or isinstance (x , str )
584-
585- row = [x if _check_as_is (x )
586- else pprint_thing (x ).encode ("utf-8" ) for x in row ]
587-
588- self .writer .writerow ([s for s in row ])
589- # Fetch UTF-8 output from the queue ...
590- data = self .queue .getvalue ()
591- data = data .decode ("utf-8" )
592- # ... and re-encode it into the target encoding
593- data = self .encoder .encode (data )
594- # write to the target stream
595- self .stream .write (data )
596- # empty queue
597- self .queue .truncate (0 )
598-
599- def writerows (self , rows ):
600- def _check_as_is (x ):
601- return (self .quoting == csv .QUOTE_NONNUMERIC and
602- is_number (x )) or isinstance (x , str )
603-
604- for i , row in enumerate (rows ):
605- rows [i ] = [x if _check_as_is (x )
606- else pprint_thing (x ).encode ("utf-8" ) for x in row ]
607-
608- self .writer .writerows ([[s for s in row ] for row in rows ])
609- # Fetch UTF-8 output from the queue ...
610- data = self .queue .getvalue ()
611- data = data .decode ("utf-8" )
612- # ... and re-encode it into the target encoding
613- data = self .encoder .encode (data )
614- # write to the target stream
615- self .stream .write (data )
616- # empty queue
617- self .queue .truncate (0 )
494+ # Keeping these class for now because it provides a necessary convenience
495+ # for "dropping" the "encoding" argument from our I/O arguments when
496+ # creating a Unicode I/O object.
497+ def UnicodeReader (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
498+ return csv .reader (f , dialect = dialect , ** kwds )
499+
500+
501+ def UnicodeWriter (f , dialect = csv .excel , encoding = "utf-8" , ** kwds ):
502+ return csv .writer (f , dialect = dialect , ** kwds )
0 commit comments