@@ -57,7 +57,7 @@ cdef dict _parse_code_table = {'y': 0,
5757 ' u' : 22 }
5858
5959
60- def array_strptime (ndarray[object] values , object fmt , bint exact = True , errors = ' raise' ):
60+ def array_strptime (ndarray[object] values , str fmt , bint exact = True , errors = ' raise' ):
6161 """
6262 Calculates the datetime structs represented by the passed array of strings
6363
@@ -349,7 +349,7 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='
349349
350350
351351"""
352- _getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
352+ TimeRE, _calc_julian_from_U_or_W are vendored
353353from the standard library, see
354354https://github.com/python/cpython/blob/master/Lib/_strptime.py
355355The original module-level docstring follows.
@@ -364,161 +364,14 @@ FUNCTIONS:
364364 strptime -- Calculates the time struct represented by the passed-in string
365365"""
366366
367-
368- def _getlang ():
369- """ Figure out what language is being used for the locale"""
370- return locale.getlocale(locale.LC_TIME)
371-
372-
373- class LocaleTime :
374- """
375- Stores and handles locale-specific information related to time.
376-
377- ATTRIBUTES:
378- f_weekday -- full weekday names (7-item list)
379- a_weekday -- abbreviated weekday names (7-item list)
380- f_month -- full month names (13-item list; dummy value in [0], which
381- is added by code)
382- a_month -- abbreviated month names (13-item list, dummy value in
383- [0], which is added by code)
384- am_pm -- AM/PM representation (2-item list)
385- LC_date_time -- format string for date/time representation (string)
386- LC_date -- format string for date representation (string)
387- LC_time -- format string for time representation (string)
388- timezone -- daylight- and non-daylight-savings timezone representation
389- (2-item list of sets)
390- lang -- Language used by instance (2-item tuple)
391- """
392-
393- def __init__ (self ):
394- """
395- Set all attributes.
396-
397- Order of methods called matters for dependency reasons.
398-
399- The locale language is set at the offset and then checked again before
400- exiting. This is to make sure that the attributes were not set with a
401- mix of information from more than one locale. This would most likely
402- happen when using threads where one thread calls a locale-dependent
403- function while another thread changes the locale while the function in
404- the other thread is still running. Proper coding would call for
405- locks to prevent changing the locale while locale-dependent code is
406- running. The check here is done in case someone does not think about
407- doing this.
408-
409- Only other possible issue is if someone changed the timezone and did
410- not call tz.tzset . That is an issue for the programmer, though,
411- since changing the timezone is worthless without that call.
412- """
413- self .lang = _getlang()
414- self .__calc_weekday()
415- self .__calc_month()
416- self .__calc_am_pm()
417- self .__calc_timezone()
418- self .__calc_date_time()
419- if _getlang() != self .lang:
420- raise ValueError (" locale changed during initialization" )
421-
422- def __pad (self , seq , front ):
423- # Add '' to seq to either the front (is True), else the back.
424- seq = list (seq)
425- if front:
426- seq.insert(0 , ' ' )
427- else :
428- seq.append(' ' )
429- return seq
430-
431- def __calc_weekday (self ):
432- # Set self.a_weekday and self.f_weekday using the calendar
433- # module.
434- a_weekday = [calendar.day_abbr[i].lower() for i in range (7 )]
435- f_weekday = [calendar.day_name[i].lower() for i in range (7 )]
436- self .a_weekday = a_weekday
437- self .f_weekday = f_weekday
438-
439- def __calc_month (self ):
440- # Set self.f_month and self.a_month using the calendar module.
441- a_month = [calendar.month_abbr[i].lower() for i in range (13 )]
442- f_month = [calendar.month_name[i].lower() for i in range (13 )]
443- self .a_month = a_month
444- self .f_month = f_month
445-
446- def __calc_am_pm (self ):
447- # Set self.am_pm by using time.strftime().
448-
449- # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
450- # magical; just happened to have used it everywhere else where a
451- # static date was needed.
452- am_pm = []
453- for hour in (01 , 22 ):
454- time_tuple = time.struct_time(
455- (1999 , 3 , 17 , hour, 44 , 55 , 2 , 76 , 0 ))
456- am_pm.append(time.strftime(" %p " , time_tuple).lower())
457- self .am_pm = am_pm
458-
459- def __calc_date_time (self ):
460- # Set self.date_time, self.date, & self.time by using
461- # time.strftime().
462-
463- # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
464- # overloaded numbers is minimized. The order in which searches for
465- # values within the format string is very important; it eliminates
466- # possible ambiguity for what something represents.
467- time_tuple = time.struct_time((1999 , 3 , 17 , 22 , 44 , 55 , 2 , 76 , 0 ))
468- date_time = [None , None , None ]
469- date_time[0 ] = time.strftime(" %c " , time_tuple).lower()
470- date_time[1 ] = time.strftime(" %x " , time_tuple).lower()
471- date_time[2 ] = time.strftime(" %X " , time_tuple).lower()
472- replacement_pairs = [(' %' , ' %% ' ), (self .f_weekday[2 ], ' %A ' ),
473- (self .f_month[3 ], ' %B ' ),
474- (self .a_weekday[2 ], ' %a ' ),
475- (self .a_month[3 ], ' %b ' ), (self .am_pm[1 ], ' %p ' ),
476- (' 1999' , ' %Y ' ), (' 99' , ' %y ' ), (' 22' , ' %H ' ),
477- (' 44' , ' %M ' ), (' 55' , ' %S ' ), (' 76' , ' %j ' ),
478- (' 17' , ' %d ' ), (' 03' , ' %m ' ), (' 3' , ' %m ' ),
479- # '3' needed for when no leading zero.
480- (' 2' , ' %w ' ), (' 10' , ' %I ' )]
481- replacement_pairs.extend([(tz, " %Z " ) for tz_values in self .timezone
482- for tz in tz_values])
483- for offset, directive in ((0 , ' %c ' ), (1 , ' %x ' ), (2 , ' %X ' )):
484- current_format = date_time[offset]
485- for old, new in replacement_pairs:
486- # Must deal with possible lack of locale info
487- # manifesting itself as the empty string (e.g., Swedish's
488- # lack of AM/PM info) or a platform returning a tuple of empty
489- # strings (e.g., MacOS 9 having timezone as ('','')).
490- if old:
491- current_format = current_format.replace(old, new)
492- # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
493- # 2005-01-03 occurs before the first Monday of the year. Otherwise
494- # %U is used.
495- time_tuple = time.struct_time((1999 , 1 , 3 , 1 , 1 , 1 , 6 , 3 , 0 ))
496- if ' 00' in time.strftime(directive, time_tuple):
497- U_W = ' %W '
498- else :
499- U_W = ' %U '
500- date_time[offset] = current_format.replace(' 11' , U_W)
501- self .LC_date_time = date_time[0 ]
502- self .LC_date = date_time[1 ]
503- self .LC_time = date_time[2 ]
504-
505- def __calc_timezone (self ):
506- # Set self.timezone by using time.tzname.
507- # Do not worry about possibility of time.tzname[0] == timetzname[1]
508- # and time.daylight; handle that in strptime .
509- try :
510- time.tzset()
511- except AttributeError :
512- pass
513- no_saving = frozenset ([" utc" , " gmt" , time.tzname[0 ].lower()])
514- if time.daylight:
515- has_saving = frozenset ([time.tzname[1 ].lower()])
516- else :
517- has_saving = frozenset ()
518- self .timezone = (no_saving, has_saving)
367+ from _strptime import (
368+ LocaleTime,
369+ TimeRE as _TimeRE,
370+ _getlang,
371+ )
519372
520373
521- class TimeRE (dict ):
374+ class TimeRE (_TimeRE ):
522375 """
523376 Handle conversion from format directives to regexes.
524377
@@ -532,100 +385,20 @@ class TimeRE(dict):
532385
533386 Order of execution is important for dependency reasons.
534387 """
535- if locale_time:
536- self .locale_time = locale_time
537- else :
538- self .locale_time = LocaleTime()
539388 self ._Z = None
540- base = super ()
541- base.__init__ ({
542- # The " \d" part of the regex is to make %c from ANSI C work
543- ' d' : r " ( ?P<d> 3[0-1 ]| [1-2 ]\d | 0[1-9 ]| [1-9 ]| [1-9 ]) " ,
544- ' f' : r " ( ?P<f> [0-9 ]{1,9} ) " ,
545- ' G' : r " ( ?P<G> \d\d\d\d ) " ,
546- ' H' : r " ( ?P<H> 2[0-3 ]| [0-1 ]\d | \d ) " ,
547- ' I' : r " ( ?P<I> 1[0-2 ]| 0[1-9 ]| [1-9 ]) " ,
548- ' j' : (r " ( ?P<j> 36[0-6 ]| 3[0-5 ]\d | [1-2 ]\d\d | 0[1-9 ]\d | 00[1-9 ]| "
549- r"[1-9 ]\d | 0[1-9 ]| [1-9 ]) " ),
550- ' m' : r " ( ?P<m> 1[0-2 ]| 0[1-9 ]| [1-9 ]) " ,
551- ' M' : r " ( ?P<M> [0-5 ]\d | \d ) " ,
552- ' S' : r " ( ?P<S> 6[0-1 ]| [0-5 ]\d | \d ) " ,
553- ' u' : r " ( ?P<u> [1-7 ]) " ,
554- ' U' : r " ( ?P<U> 5[0-3 ]| [0-4 ]\d | \d ) " ,
555- ' V' : r " ( ?P<V> 5[0-3 ]| 0[1-9 ]| [1-4 ]\d | \d ) " ,
556- ' w' : r " ( ?P<w> [0-6 ]) " ,
557- # W is set below by using 'U'
558- ' y' : r " ( ?P<y> \d\d ) " ,
559- # TODO: Does 'Y' need to worry about having less or more than
560- # 4 digits?
561- ' Y' : r " ( ?P<Y> \d\d\d\d ) " ,
562- ' z' : r " ( ?P<z> [+- ]\d\d :? [0-5 ]\d ( :? [0-5 ]\d ( \. \d {1,6} ) ? ) ? | Z) " ,
563- ' A' : self .__seqToRE(self .locale_time.f_weekday, ' A' ),
564- ' a' : self .__seqToRE(self .locale_time.a_weekday, ' a' ),
565- ' B' : self .__seqToRE(self .locale_time.f_month[1 :], ' B' ),
566- ' b' : self .__seqToRE(self .locale_time.a_month[1 :], ' b' ),
567- ' p' : self .__seqToRE(self .locale_time.am_pm, ' p' ),
568- # 'Z' key is generated lazily via __getitem__
569- ' %' : ' %' })
570- base.__setitem__ (' W' , base.__getitem__ (' U' ).replace(' U' , ' W' ))
571- base.__setitem__ (' c' , self .pattern(self .locale_time.LC_date_time))
572- base.__setitem__ (' x' , self .pattern(self .locale_time.LC_date))
573- base.__setitem__ (' X' , self .pattern(self .locale_time.LC_time))
389+ super ().__init__(locale_time = locale_time)
574390
575391 def __getitem__ (self , key ):
576392 if key == " Z" :
577393 # lazy computation
578394 if self ._Z is None :
579395 self ._Z = self .__seqToRE(pytz.all_timezones, ' Z' )
396+ # Note: handling Z is the key difference vs using the stdlib
397+ # _strptime.TimeRE. test_to_datetime_parse_tzname_or_tzoffset with
398+ # fmt='%Y-%m-%d %H:%M:%S %Z' fails with the stdlib version.
580399 return self ._Z
581400 return super ().__getitem__(key)
582401
583- def __seqToRE (self , to_convert , directive ):
584- """
585- Convert a list to a regex string for matching a directive.
586-
587- Want possible matching values to be from longest to shortest. This
588- prevents the possibility of a match occurring for a value that also
589- a substring of a larger value that should have matched (e.g., 'abc'
590- matching when 'abcdef' should have been the match).
591- """
592- to_convert = sorted (to_convert, key = len , reverse = True )
593- for value in to_convert:
594- if value != ' ' :
595- break
596- else :
597- return ' '
598- regex = ' |' .join(re.escape(stuff) for stuff in to_convert)
599- regex = f" (?P<{directive}>{regex})"
600- return regex
601-
602- def pattern (self , format ):
603- """
604- Return regex pattern for the format string.
605-
606- Need to make sure that any characters that might be interpreted as
607- regex syntax are escaped.
608- """
609- processed_format = ' '
610- # The sub() call escapes all characters that might be misconstrued
611- # as regex syntax. Cannot use re.escape since we have to deal with
612- # format directives (%m, etc.).
613- regex_chars = re.compile(r " ( [\\ .^$*+? \(\) {} \[\] | ]) " )
614- format = regex_chars.sub(r " \\ \1 " , format)
615- whitespace_replacement = re.compile(r ' \s + ' )
616- format = whitespace_replacement.sub(r ' \\ s+ ' , format)
617- while ' %' in format:
618- directive_index = format.index(' %' ) + 1
619- processed_format = (f" {processed_format}"
620- f" {format[:directive_index -1]}"
621- f" {self[format[directive_index]]}" )
622- format = format[directive_index + 1 :]
623- return f" {processed_format}{format}"
624-
625- def compile (self , format ):
626- """ Return a compiled re object for the format string."""
627- return re.compile(self .pattern(format), re.IGNORECASE)
628-
629402
630403_cache_lock = _thread_allocate_lock()
631404# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
0 commit comments