1"""Strptime-related classes and functions.
2
3CLASSES:
4    LocaleTime -- Discovers and stores locale-specific time information
5    TimeRE -- Creates regexes for pattern matching a string of text containing
6                time information
7
8FUNCTIONS:
9    _getlang -- Figure out what language is being used for the locale
10    strptime -- Calculates the time struct represented by the passed-in string
11
12"""
13import time
14import locale
15import calendar
16from re import compile as re_compile
17from re import IGNORECASE
18from re import escape as re_escape
19from datetime import (date as datetime_date,
20                      timedelta as datetime_timedelta,
21                      timezone as datetime_timezone)
22from _thread import allocate_lock as _thread_allocate_lock
23
24__all__ = []
25
26def _getlang():
27    # Figure out what the current language is set to.
28    return locale.getlocale(locale.LC_TIME)
29
30class LocaleTime(object):
31    """Stores and handles locale-specific information related to time.
32
33    ATTRIBUTES:
34        f_weekday -- full weekday names (7-item list)
35        a_weekday -- abbreviated weekday names (7-item list)
36        f_month -- full month names (13-item list; dummy value in [0], which
37                    is added by code)
38        a_month -- abbreviated month names (13-item list, dummy value in
39                    [0], which is added by code)
40        am_pm -- AM/PM representation (2-item list)
41        LC_date_time -- format string for date/time representation (string)
42        LC_date -- format string for date representation (string)
43        LC_time -- format string for time representation (string)
44        timezone -- daylight- and non-daylight-savings timezone representation
45                    (2-item list of sets)
46        lang -- Language used by instance (2-item tuple)
47    """
48
49    def __init__(self):
50        """Set all attributes.
51
52        Order of methods called matters for dependency reasons.
53
54        The locale language is set at the offset and then checked again before
55        exiting.  This is to make sure that the attributes were not set with a
56        mix of information from more than one locale.  This would most likely
57        happen when using threads where one thread calls a locale-dependent
58        function while another thread changes the locale while the function in
59        the other thread is still running.  Proper coding would call for
60        locks to prevent changing the locale while locale-dependent code is
61        running.  The check here is done in case someone does not think about
62        doing this.
63
64        Only other possible issue is if someone changed the timezone and did
65        not call tz.tzset .  That is an issue for the programmer, though,
66        since changing the timezone is worthless without that call.
67
68        """
69        self.lang = _getlang()
70        self.__calc_weekday()
71        self.__calc_month()
72        self.__calc_am_pm()
73        self.__calc_timezone()
74        self.__calc_date_time()
75        if _getlang() != self.lang:
76            raise ValueError("locale changed during initialization")
77        if time.tzname != self.tzname or time.daylight != self.daylight:
78            raise ValueError("timezone changed during initialization")
79
80    def __calc_weekday(self):
81        # Set self.a_weekday and self.f_weekday using the calendar
82        # module.
83        a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
84        f_weekday = [calendar.day_name[i].lower() for i in range(7)]
85        self.a_weekday = a_weekday
86        self.f_weekday = f_weekday
87
88    def __calc_month(self):
89        # Set self.f_month and self.a_month using the calendar module.
90        a_month = [calendar.month_abbr[i].lower() for i in range(13)]
91        f_month = [calendar.month_name[i].lower() for i in range(13)]
92        self.a_month = a_month
93        self.f_month = f_month
94
95    def __calc_am_pm(self):
96        # Set self.am_pm by using time.strftime().
97
98        # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
99        # magical; just happened to have used it everywhere else where a
100        # static date was needed.
101        am_pm = []
102        for hour in (1, 22):
103            time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
104            am_pm.append(time.strftime("%p", time_tuple).lower())
105        self.am_pm = am_pm
106
107    def __calc_date_time(self):
108        # Set self.date_time, self.date, & self.time by using
109        # time.strftime().
110
111        # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
112        # overloaded numbers is minimized.  The order in which searches for
113        # values within the format string is very important; it eliminates
114        # possible ambiguity for what something represents.
115        time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
116        date_time = [None, None, None]
117        date_time[0] = time.strftime("%c", time_tuple).lower()
118        date_time[1] = time.strftime("%x", time_tuple).lower()
119        date_time[2] = time.strftime("%X", time_tuple).lower()
120        replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
121                    (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
122                    (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
123                    ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
124                    ('44', '%M'), ('55', '%S'), ('76', '%j'),
125                    ('17', '%d'), ('03', '%m'), ('3', '%m'),
126                    # '3' needed for when no leading zero.
127                    ('2', '%w'), ('10', '%I')]
128        replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
129                                                for tz in tz_values])
130        for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
131            current_format = date_time[offset]
132            for old, new in replacement_pairs:
133                # Must deal with possible lack of locale info
134                # manifesting itself as the empty string (e.g., Swedish's
135                # lack of AM/PM info) or a platform returning a tuple of empty
136                # strings (e.g., MacOS 9 having timezone as ('','')).
137                if old:
138                    current_format = current_format.replace(old, new)
139            # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
140            # 2005-01-03 occurs before the first Monday of the year.  Otherwise
141            # %U is used.
142            time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
143            if '00' in time.strftime(directive, time_tuple):
144                U_W = '%W'
145            else:
146                U_W = '%U'
147            date_time[offset] = current_format.replace('11', U_W)
148        self.LC_date_time = date_time[0]
149        self.LC_date = date_time[1]
150        self.LC_time = date_time[2]
151
152    def __calc_timezone(self):
153        # Set self.timezone by using time.tzname.
154        # Do not worry about possibility of time.tzname[0] == time.tzname[1]
155        # and time.daylight; handle that in strptime.
156        try:
157            time.tzset()
158        except AttributeError:
159            pass
160        self.tzname = time.tzname
161        self.daylight = time.daylight
162        no_saving = frozenset({"utc", "gmt", self.tzname[0].lower()})
163        if self.daylight:
164            has_saving = frozenset({self.tzname[1].lower()})
165        else:
166            has_saving = frozenset()
167        self.timezone = (no_saving, has_saving)
168
169
170class TimeRE(dict):
171    """Handle conversion from format directives to regexes."""
172
173    def __init__(self, locale_time=None):
174        """Create keys/values.
175
176        Order of execution is important for dependency reasons.
177
178        """
179        if locale_time:
180            self.locale_time = locale_time
181        else:
182            self.locale_time = LocaleTime()
183        base = super()
184        base.__init__({
185            # The " \d" part of the regex is to make %c from ANSI C work
186            'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
187            'f': r"(?P<f>[0-9]{1,6})",
188            'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
189            'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
190            'G': r"(?P<G>\d\d\d\d)",
191            'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
192            'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
193            'M': r"(?P<M>[0-5]\d|\d)",
194            'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
195            'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
196            'w': r"(?P<w>[0-6])",
197            'u': r"(?P<u>[1-7])",
198            'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)",
199            # W is set below by using 'U'
200            'y': r"(?P<y>\d\d)",
201            #XXX: Does 'Y' need to worry about having less or more than
202            #     4 digits?
203            'Y': r"(?P<Y>\d\d\d\d)",
204            'z': r"(?P<z>[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|Z)",
205            'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
206            'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
207            'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
208            'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
209            'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
210            'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
211                                        for tz in tz_names),
212                                'Z'),
213            '%': '%'})
214        base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
215        base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
216        base.__setitem__('x', self.pattern(self.locale_time.LC_date))
217        base.__setitem__('X', self.pattern(self.locale_time.LC_time))
218
219    def __seqToRE(self, to_convert, directive):
220        """Convert a list to a regex string for matching a directive.
221
222        Want possible matching values to be from longest to shortest.  This
223        prevents the possibility of a match occurring for a value that also
224        a substring of a larger value that should have matched (e.g., 'abc'
225        matching when 'abcdef' should have been the match).
226
227        """
228        to_convert = sorted(to_convert, key=len, reverse=True)
229        for value in to_convert:
230            if value != '':
231                break
232        else:
233            return ''
234        regex = '|'.join(re_escape(stuff) for stuff in to_convert)
235        regex = '(?P<%s>%s' % (directive, regex)
236        return '%s)' % regex
237
238    def pattern(self, format):
239        """Return regex pattern for the format string.
240
241        Need to make sure that any characters that might be interpreted as
242        regex syntax are escaped.
243
244        """
245        processed_format = ''
246        # The sub() call escapes all characters that might be misconstrued
247        # as regex syntax.  Cannot use re.escape since we have to deal with
248        # format directives (%m, etc.).
249        regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
250        format = regex_chars.sub(r"\\\1", format)
251        whitespace_replacement = re_compile(r'\s+')
252        format = whitespace_replacement.sub(r'\\s+', format)
253        while '%' in format:
254            directive_index = format.index('%')+1
255            processed_format = "%s%s%s" % (processed_format,
256                                           format[:directive_index-1],
257                                           self[format[directive_index]])
258            format = format[directive_index+1:]
259        return "%s%s" % (processed_format, format)
260
261    def compile(self, format):
262        """Return a compiled re object for the format string."""
263        return re_compile(self.pattern(format), IGNORECASE)
264
265_cache_lock = _thread_allocate_lock()
266# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
267# first!
268_TimeRE_cache = TimeRE()
269_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
270_regex_cache = {}
271
272def _calc_julian_from_U_or_W(year, week_of_year, day_of_week, week_starts_Mon):
273    """Calculate the Julian day based on the year, week of the year, and day of
274    the week, with week_start_day representing whether the week of the year
275    assumes the week starts on Sunday or Monday (6 or 0)."""
276    first_weekday = datetime_date(year, 1, 1).weekday()
277    # If we are dealing with the %U directive (week starts on Sunday), it's
278    # easier to just shift the view to Sunday being the first day of the
279    # week.
280    if not week_starts_Mon:
281        first_weekday = (first_weekday + 1) % 7
282        day_of_week = (day_of_week + 1) % 7
283    # Need to watch out for a week 0 (when the first day of the year is not
284    # the same as that specified by %U or %W).
285    week_0_length = (7 - first_weekday) % 7
286    if week_of_year == 0:
287        return 1 + day_of_week - first_weekday
288    else:
289        days_to_week = week_0_length + (7 * (week_of_year - 1))
290        return 1 + days_to_week + day_of_week
291
292
293def _calc_julian_from_V(iso_year, iso_week, iso_weekday):
294    """Calculate the Julian day based on the ISO 8601 year, week, and weekday.
295    ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
296    ISO week days range from 1 (Monday) to 7 (Sunday).
297    """
298    correction = datetime_date(iso_year, 1, 4).isoweekday() + 3
299    ordinal = (iso_week * 7) + iso_weekday - correction
300    # ordinal may be negative or 0 now, which means the date is in the previous
301    # calendar year
302    if ordinal < 1:
303        ordinal += datetime_date(iso_year, 1, 1).toordinal()
304        iso_year -= 1
305        ordinal -= datetime_date(iso_year, 1, 1).toordinal()
306    return iso_year, ordinal
307
308
309def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
310    """Return a 2-tuple consisting of a time struct and an int containing
311    the number of microseconds based on the input string and the
312    format string."""
313
314    for index, arg in enumerate([data_string, format]):
315        if not isinstance(arg, str):
316            msg = "strptime() argument {} must be str, not {}"
317            raise TypeError(msg.format(index, type(arg)))
318
319    global _TimeRE_cache, _regex_cache
320    with _cache_lock:
321        locale_time = _TimeRE_cache.locale_time
322        if (_getlang() != locale_time.lang or
323            time.tzname != locale_time.tzname or
324            time.daylight != locale_time.daylight):
325            _TimeRE_cache = TimeRE()
326            _regex_cache.clear()
327            locale_time = _TimeRE_cache.locale_time
328        if len(_regex_cache) > _CACHE_MAX_SIZE:
329            _regex_cache.clear()
330        format_regex = _regex_cache.get(format)
331        if not format_regex:
332            try:
333                format_regex = _TimeRE_cache.compile(format)
334            # KeyError raised when a bad format is found; can be specified as
335            # \\, in which case it was a stray % but with a space after it
336            except KeyError as err:
337                bad_directive = err.args[0]
338                if bad_directive == "\\":
339                    bad_directive = "%"
340                del err
341                raise ValueError("'%s' is a bad directive in format '%s'" %
342                                    (bad_directive, format)) from None
343            # IndexError only occurs when the format string is "%"
344            except IndexError:
345                raise ValueError("stray %% in format '%s'" % format) from None
346            _regex_cache[format] = format_regex
347    found = format_regex.match(data_string)
348    if not found:
349        raise ValueError("time data %r does not match format %r" %
350                         (data_string, format))
351    if len(data_string) != found.end():
352        raise ValueError("unconverted data remains: %s" %
353                          data_string[found.end():])
354
355    iso_year = year = None
356    month = day = 1
357    hour = minute = second = fraction = 0
358    tz = -1
359    gmtoff = None
360    gmtoff_fraction = 0
361    # Default to -1 to signify that values not known; not critical to have,
362    # though
363    iso_week = week_of_year = None
364    week_of_year_start = None
365    # weekday and julian defaulted to None so as to signal need to calculate
366    # values
367    weekday = julian = None
368    found_dict = found.groupdict()
369    for group_key in found_dict.keys():
370        # Directives not explicitly handled below:
371        #   c, x, X
372        #      handled by making out of other directives
373        #   U, W
374        #      worthless without day of the week
375        if group_key == 'y':
376            year = int(found_dict['y'])
377            # Open Group specification for strptime() states that a %y
378            #value in the range of [00, 68] is in the century 2000, while
379            #[69,99] is in the century 1900
380            if year <= 68:
381                year += 2000
382            else:
383                year += 1900
384        elif group_key == 'Y':
385            year = int(found_dict['Y'])
386        elif group_key == 'G':
387            iso_year = int(found_dict['G'])
388        elif group_key == 'm':
389            month = int(found_dict['m'])
390        elif group_key == 'B':
391            month = locale_time.f_month.index(found_dict['B'].lower())
392        elif group_key == 'b':
393            month = locale_time.a_month.index(found_dict['b'].lower())
394        elif group_key == 'd':
395            day = int(found_dict['d'])
396        elif group_key == 'H':
397            hour = int(found_dict['H'])
398        elif group_key == 'I':
399            hour = int(found_dict['I'])
400            ampm = found_dict.get('p', '').lower()
401            # If there was no AM/PM indicator, we'll treat this like AM
402            if ampm in ('', locale_time.am_pm[0]):
403                # We're in AM so the hour is correct unless we're
404                # looking at 12 midnight.
405                # 12 midnight == 12 AM == hour 0
406                if hour == 12:
407                    hour = 0
408            elif ampm == locale_time.am_pm[1]:
409                # We're in PM so we need to add 12 to the hour unless
410                # we're looking at 12 noon.
411                # 12 noon == 12 PM == hour 12
412                if hour != 12:
413                    hour += 12
414        elif group_key == 'M':
415            minute = int(found_dict['M'])
416        elif group_key == 'S':
417            second = int(found_dict['S'])
418        elif group_key == 'f':
419            s = found_dict['f']
420            # Pad to always return microseconds.
421            s += "0" * (6 - len(s))
422            fraction = int(s)
423        elif group_key == 'A':
424            weekday = locale_time.f_weekday.index(found_dict['A'].lower())
425        elif group_key == 'a':
426            weekday = locale_time.a_weekday.index(found_dict['a'].lower())
427        elif group_key == 'w':
428            weekday = int(found_dict['w'])
429            if weekday == 0:
430                weekday = 6
431            else:
432                weekday -= 1
433        elif group_key == 'u':
434            weekday = int(found_dict['u'])
435            weekday -= 1
436        elif group_key == 'j':
437            julian = int(found_dict['j'])
438        elif group_key in ('U', 'W'):
439            week_of_year = int(found_dict[group_key])
440            if group_key == 'U':
441                # U starts week on Sunday.
442                week_of_year_start = 6
443            else:
444                # W starts week on Monday.
445                week_of_year_start = 0
446        elif group_key == 'V':
447            iso_week = int(found_dict['V'])
448        elif group_key == 'z':
449            z = found_dict['z']
450            if z == 'Z':
451                gmtoff = 0
452            else:
453                if z[3] == ':':
454                    z = z[:3] + z[4:]
455                    if len(z) > 5:
456                        if z[5] != ':':
457                            msg = f"Inconsistent use of : in {found_dict['z']}"
458                            raise ValueError(msg)
459                        z = z[:5] + z[6:]
460                hours = int(z[1:3])
461                minutes = int(z[3:5])
462                seconds = int(z[5:7] or 0)
463                gmtoff = (hours * 60 * 60) + (minutes * 60) + seconds
464                gmtoff_remainder = z[8:]
465                # Pad to always return microseconds.
466                gmtoff_remainder_padding = "0" * (6 - len(gmtoff_remainder))
467                gmtoff_fraction = int(gmtoff_remainder + gmtoff_remainder_padding)
468                if z.startswith("-"):
469                    gmtoff = -gmtoff
470                    gmtoff_fraction = -gmtoff_fraction
471        elif group_key == 'Z':
472            # Since -1 is default value only need to worry about setting tz if
473            # it can be something other than -1.
474            found_zone = found_dict['Z'].lower()
475            for value, tz_values in enumerate(locale_time.timezone):
476                if found_zone in tz_values:
477                    # Deal with bad locale setup where timezone names are the
478                    # same and yet time.daylight is true; too ambiguous to
479                    # be able to tell what timezone has daylight savings
480                    if (time.tzname[0] == time.tzname[1] and
481                       time.daylight and found_zone not in ("utc", "gmt")):
482                        break
483                    else:
484                        tz = value
485                        break
486    # Deal with the cases where ambiguities arize
487    # don't assume default values for ISO week/year
488    if year is None and iso_year is not None:
489        if iso_week is None or weekday is None:
490            raise ValueError("ISO year directive '%G' must be used with "
491                             "the ISO week directive '%V' and a weekday "
492                             "directive ('%A', '%a', '%w', or '%u').")
493        if julian is not None:
494            raise ValueError("Day of the year directive '%j' is not "
495                             "compatible with ISO year directive '%G'. "
496                             "Use '%Y' instead.")
497    elif week_of_year is None and iso_week is not None:
498        if weekday is None:
499            raise ValueError("ISO week directive '%V' must be used with "
500                             "the ISO year directive '%G' and a weekday "
501                             "directive ('%A', '%a', '%w', or '%u').")
502        else:
503            raise ValueError("ISO week directive '%V' is incompatible with "
504                             "the year directive '%Y'. Use the ISO year '%G' "
505                             "instead.")
506
507    leap_year_fix = False
508    if year is None and month == 2 and day == 29:
509        year = 1904  # 1904 is first leap year of 20th century
510        leap_year_fix = True
511    elif year is None:
512        year = 1900
513
514
515    # If we know the week of the year and what day of that week, we can figure
516    # out the Julian day of the year.
517    if julian is None and weekday is not None:
518        if week_of_year is not None:
519            week_starts_Mon = True if week_of_year_start == 0 else False
520            julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
521                                                week_starts_Mon)
522        elif iso_year is not None and iso_week is not None:
523            year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1)
524        if julian is not None and julian <= 0:
525            year -= 1
526            yday = 366 if calendar.isleap(year) else 365
527            julian += yday
528
529    if julian is None:
530        # Cannot pre-calculate datetime_date() since can change in Julian
531        # calculation and thus could have different value for the day of
532        # the week calculation.
533        # Need to add 1 to result since first day of the year is 1, not 0.
534        julian = datetime_date(year, month, day).toordinal() - \
535                  datetime_date(year, 1, 1).toordinal() + 1
536    else:  # Assume that if they bothered to include Julian day (or if it was
537           # calculated above with year/week/weekday) it will be accurate.
538        datetime_result = datetime_date.fromordinal(
539                            (julian - 1) +
540                            datetime_date(year, 1, 1).toordinal())
541        year = datetime_result.year
542        month = datetime_result.month
543        day = datetime_result.day
544    if weekday is None:
545        weekday = datetime_date(year, month, day).weekday()
546    # Add timezone info
547    tzname = found_dict.get("Z")
548
549    if leap_year_fix:
550        # the caller didn't supply a year but asked for Feb 29th. We couldn't
551        # use the default of 1900 for computations. We set it back to ensure
552        # that February 29th is smaller than March 1st.
553        year = 1900
554
555    return (year, month, day,
556            hour, minute, second,
557            weekday, julian, tz, tzname, gmtoff), fraction, gmtoff_fraction
558
559def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
560    """Return a time struct based on the input string and the
561    format string."""
562    tt = _strptime(data_string, format)[0]
563    return time.struct_time(tt[:time._STRUCT_TM_ITEMS])
564
565def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"):
566    """Return a class cls instance based on the input string and the
567    format string."""
568    tt, fraction, gmtoff_fraction = _strptime(data_string, format)
569    tzname, gmtoff = tt[-2:]
570    args = tt[:6] + (fraction,)
571    if gmtoff is not None:
572        tzdelta = datetime_timedelta(seconds=gmtoff, microseconds=gmtoff_fraction)
573        if tzname:
574            tz = datetime_timezone(tzdelta, tzname)
575        else:
576            tz = datetime_timezone(tzdelta)
577        args += (tz,)
578
579    return cls(*args)
580