1"""Strptime-related classes and functions.
2
3CLASSES:
4    LocaleTime -- Discovers and stores locale-specific time information
5    TimeRE -- Creates regexes for pattern matching a string of text containing
6                time information
7
8FUNCTIONS:
9    _getlang -- Figure out what language is being used for the locale
10    strptime -- Calculates the time struct represented by the passed-in string
11
12"""
13# -----------------------------------------------------------------------------
14# _strptime.py
15#
16# Licensed under PYTHON SOFTWARE FOUNDATION LICENSE
17# See licenses/PYTHON.rst
18#
19# Copied from https://github.com/python/cpython/blob/3.5/Lib/_strptime.py
20# -----------------------------------------------------------------------------
21import time
22import locale
23import calendar
24from re import compile as re_compile
25from re import IGNORECASE
26from re import escape as re_escape
27from datetime import (date as datetime_date,
28                      timedelta as datetime_timedelta,
29                      timezone as datetime_timezone)
30try:
31    from _thread import allocate_lock as _thread_allocate_lock
32except ImportError:
33    from _dummy_thread import allocate_lock as _thread_allocate_lock
34
35__all__ = []
36
37def _getlang():
38    # Figure out what the current language is set to.
39    return locale.getlocale(locale.LC_TIME)
40
41class LocaleTime(object):
42    """Stores and handles locale-specific information related to time.
43
44    ATTRIBUTES:
45        f_weekday -- full weekday names (7-item list)
46        a_weekday -- abbreviated weekday names (7-item list)
47        f_month -- full month names (13-item list; dummy value in [0], which
48                    is added by code)
49        a_month -- abbreviated month names (13-item list, dummy value in
50                    [0], which is added by code)
51        am_pm -- AM/PM representation (2-item list)
52        LC_date_time -- format string for date/time representation (string)
53        LC_date -- format string for date representation (string)
54        LC_time -- format string for time representation (string)
55        timezone -- daylight- and non-daylight-savings timezone representation
56                    (2-item list of sets)
57        lang -- Language used by instance (2-item tuple)
58    """
59
60    def __init__(self):
61        """Set all attributes.
62
63        Order of methods called matters for dependency reasons.
64
65        The locale language is set at the offset and then checked again before
66        exiting.  This is to make sure that the attributes were not set with a
67        mix of information from more than one locale.  This would most likely
68        happen when using threads where one thread calls a locale-dependent
69        function while another thread changes the locale while the function in
70        the other thread is still running.  Proper coding would call for
71        locks to prevent changing the locale while locale-dependent code is
72        running.  The check here is done in case someone does not think about
73        doing this.
74
75        Only other possible issue is if someone changed the timezone and did
76        not call tz.tzset .  That is an issue for the programmer, though,
77        since changing the timezone is worthless without that call.
78
79        """
80        self.lang = _getlang()
81        self.__calc_weekday()
82        self.__calc_month()
83        self.__calc_am_pm()
84        self.__calc_timezone()
85        self.__calc_date_time()
86        if _getlang() != self.lang:
87            raise ValueError("locale changed during initialization")
88        if time.tzname != self.tzname or time.daylight != self.daylight:
89            raise ValueError("timezone changed during initialization")
90
91    def __pad(self, seq, front):
92        # Add '' to seq to either the front (is True), else the back.
93        seq = list(seq)
94        if front:
95            seq.insert(0, '')
96        else:
97            seq.append('')
98        return seq
99
100    def __calc_weekday(self):
101        # Set self.a_weekday and self.f_weekday using the calendar
102        # module.
103        a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
104        f_weekday = [calendar.day_name[i].lower() for i in range(7)]
105        self.a_weekday = a_weekday
106        self.f_weekday = f_weekday
107
108    def __calc_month(self):
109        # Set self.f_month and self.a_month using the calendar module.
110        a_month = [calendar.month_abbr[i].lower() for i in range(13)]
111        f_month = [calendar.month_name[i].lower() for i in range(13)]
112        self.a_month = a_month
113        self.f_month = f_month
114
115    def __calc_am_pm(self):
116        # Set self.am_pm by using time.strftime().
117
118        # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
119        # magical; just happened to have used it everywhere else where a
120        # static date was needed.
121        am_pm = []
122        for hour in (1, 22):
123            time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
124            am_pm.append(time.strftime("%p", time_tuple).lower())
125        self.am_pm = am_pm
126
127    def __calc_date_time(self):
128        # Set self.date_time, self.date, & self.time by using
129        # time.strftime().
130
131        # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
132        # overloaded numbers is minimized.  The order in which searches for
133        # values within the format string is very important; it eliminates
134        # possible ambiguity for what something represents.
135        time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
136        date_time = [None, None, None]
137        date_time[0] = time.strftime("%c", time_tuple).lower()
138        date_time[1] = time.strftime("%x", time_tuple).lower()
139        date_time[2] = time.strftime("%X", time_tuple).lower()
140        replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
141                    (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
142                    (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
143                    ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
144                    ('44', '%M'), ('55', '%S'), ('76', '%j'),
145                    ('17', '%d'), ('03', '%m'), ('3', '%m'),
146                    # '3' needed for when no leading zero.
147                    ('2', '%w'), ('10', '%I')]
148        replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
149                                                for tz in tz_values])
150        for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
151            current_format = date_time[offset]
152            for old, new in replacement_pairs:
153                # Must deal with possible lack of locale info
154                # manifesting itself as the empty string (e.g., Swedish's
155                # lack of AM/PM info) or a platform returning a tuple of empty
156                # strings (e.g., MacOS 9 having timezone as ('','')).
157                if old:
158                    current_format = current_format.replace(old, new)
159            # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
160            # 2005-01-03 occurs before the first Monday of the year.  Otherwise
161            # %U is used.
162            time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
163            if '00' in time.strftime(directive, time_tuple):
164                U_W = '%W'
165            else:
166                U_W = '%U'
167            date_time[offset] = current_format.replace('11', U_W)
168        self.LC_date_time = date_time[0]
169        self.LC_date = date_time[1]
170        self.LC_time = date_time[2]
171
172    def __calc_timezone(self):
173        # Set self.timezone by using time.tzname.
174        # Do not worry about possibility of time.tzname[0] == time.tzname[1]
175        # and time.daylight; handle that in strptime.
176        try:
177            time.tzset()
178        except AttributeError:
179            pass
180        self.tzname = time.tzname
181        self.daylight = time.daylight
182        no_saving = frozenset({"utc", "gmt", self.tzname[0].lower()})
183        if self.daylight:
184            has_saving = frozenset({self.tzname[1].lower()})
185        else:
186            has_saving = frozenset()
187        self.timezone = (no_saving, has_saving)
188
189
190class TimeRE(dict):
191    """Handle conversion from format directives to regexes."""
192
193    def __init__(self, locale_time=None):
194        """Create keys/values.
195
196        Order of execution is important for dependency reasons.
197
198        """
199        if locale_time:
200            self.locale_time = locale_time
201        else:
202            self.locale_time = LocaleTime()
203        base = super()
204        base.__init__({
205            # The " \d" part of the regex is to make %c from ANSI C work
206            'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
207            'f': r"(?P<f>[0-9]{1,6})",
208            'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
209            'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
210            'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
211            'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
212            'M': r"(?P<M>[0-5]\d|\d)",
213            'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
214            'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
215            'w': r"(?P<w>[0-6])",
216            # W is set below by using 'U'
217            'y': r"(?P<y>\d\d)",
218            #XXX: Does 'Y' need to worry about having less or more than
219            #     4 digits?
220            'Y': r"(?P<Y>\d\d\d\d)",
221            'z': r"(?P<z>[+-]\d\d[0-5]\d)",
222            'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
223            'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
224            'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
225            'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
226            'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
227            'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
228                                        for tz in tz_names),
229                                'Z'),
230            '%': '%'})
231        base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
232        base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
233        base.__setitem__('x', self.pattern(self.locale_time.LC_date))
234        base.__setitem__('X', self.pattern(self.locale_time.LC_time))
235
236    def __seqToRE(self, to_convert, directive):
237        """Convert a list to a regex string for matching a directive.
238
239        Want possible matching values to be from longest to shortest.  This
240        prevents the possibility of a match occurring for a value that also
241        a substring of a larger value that should have matched (e.g., 'abc'
242        matching when 'abcdef' should have been the match).
243
244        """
245        to_convert = sorted(to_convert, key=len, reverse=True)
246        for value in to_convert:
247            if value != '':
248                break
249        else:
250            return ''
251        regex = '|'.join(re_escape(stuff) for stuff in to_convert)
252        regex = '(?P<%s>%s' % (directive, regex)
253        return '%s)' % regex
254
255    def pattern(self, format):
256        """Return regex pattern for the format string.
257
258        Need to make sure that any characters that might be interpreted as
259        regex syntax are escaped.
260
261        """
262        processed_format = ''
263        # The sub() call escapes all characters that might be misconstrued
264        # as regex syntax.  Cannot use re.escape since we have to deal with
265        # format directives (%m, etc.).
266        regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
267        format = regex_chars.sub(r"\\\1", format)
268        whitespace_replacement = re_compile(r'\s+')
269        format = whitespace_replacement.sub(r'\\s+', format)
270        while '%' in format:
271            directive_index = format.index('%')+1
272            processed_format = "%s%s%s" % (processed_format,
273                                           format[:directive_index-1],
274                                           self[format[directive_index]])
275            format = format[directive_index+1:]
276        return "%s%s" % (processed_format, format)
277
278    def compile(self, format):
279        """Return a compiled re object for the format string."""
280        return re_compile(self.pattern(format), IGNORECASE)
281
282_cache_lock = _thread_allocate_lock()
283# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
284# first!
285_TimeRE_cache = TimeRE()
286_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
287_regex_cache = {}
288
289def _calc_julian_from_U_or_W(year, week_of_year, day_of_week, week_starts_Mon):
290    """Calculate the Julian day based on the year, week of the year, and day of
291    the week, with week_start_day representing whether the week of the year
292    assumes the week starts on Sunday or Monday (6 or 0)."""
293    first_weekday = datetime_date(year, 1, 1).weekday()
294    # If we are dealing with the %U directive (week starts on Sunday), it's
295    # easier to just shift the view to Sunday being the first day of the
296    # week.
297    if not week_starts_Mon:
298        first_weekday = (first_weekday + 1) % 7
299        day_of_week = (day_of_week + 1) % 7
300    # Need to watch out for a week 0 (when the first day of the year is not
301    # the same as that specified by %U or %W).
302    week_0_length = (7 - first_weekday) % 7
303    if week_of_year == 0:
304        return 1 + day_of_week - first_weekday
305    else:
306        days_to_week = week_0_length + (7 * (week_of_year - 1))
307        return 1 + days_to_week + day_of_week
308
309
310def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
311    """Return a 2-tuple consisting of a time struct and an int containing
312    the number of microseconds based on the input string and the
313    format string."""
314
315    for index, arg in enumerate([data_string, format]):
316        if not isinstance(arg, str):
317            msg = "strptime() argument {} must be str, not {}"
318            raise TypeError(msg.format(index, type(arg)))
319
320    global _TimeRE_cache, _regex_cache
321    with _cache_lock:
322        locale_time = _TimeRE_cache.locale_time
323        if (_getlang() != locale_time.lang or
324            time.tzname != locale_time.tzname or
325            time.daylight != locale_time.daylight):
326            _TimeRE_cache = TimeRE()
327            _regex_cache.clear()
328            locale_time = _TimeRE_cache.locale_time
329        if len(_regex_cache) > _CACHE_MAX_SIZE:
330            _regex_cache.clear()
331        format_regex = _regex_cache.get(format)
332        if not format_regex:
333            try:
334                format_regex = _TimeRE_cache.compile(format)
335            # KeyError raised when a bad format is found; can be specified as
336            # \\, in which case it was a stray % but with a space after it
337            except KeyError as err:
338                bad_directive = err.args[0]
339                if bad_directive == "\\":
340                    bad_directive = "%"
341                del err
342                raise ValueError("'%s' is a bad directive in format '%s'" %
343                                    (bad_directive, format)) from None
344            # IndexError only occurs when the format string is "%"
345            except IndexError:
346                raise ValueError("stray %% in format '%s'" % format) from None
347            _regex_cache[format] = format_regex
348    found = format_regex.match(data_string)
349    if not found:
350        raise ValueError("time data %r does not match format %r" %
351                         (data_string, format))
352    if len(data_string) != found.end():
353        raise ValueError("unconverted data remains: %s" %
354                          data_string[found.end():])
355
356    year = None
357    month = day = 1
358    hour = minute = second = fraction = 0
359    tz = -1
360    tzoffset = None
361    # Default to -1 to signify that values not known; not critical to have,
362    # though
363    week_of_year = -1
364    week_of_year_start = -1
365    # weekday and julian defaulted to None so as to signal need to calculate
366    # values
367    weekday = julian = None
368    found_dict = found.groupdict()
369    for group_key in found_dict.keys():
370        # Directives not explicitly handled below:
371        #   c, x, X
372        #      handled by making out of other directives
373        #   U, W
374        #      worthless without day of the week
375        if group_key == 'y':
376            year = int(found_dict['y'])
377            # Open Group specification for strptime() states that a %y
378            #value in the range of [00, 68] is in the century 2000, while
379            #[69,99] is in the century 1900
380            if year <= 68:
381                year += 2000
382            else:
383                year += 1900
384        elif group_key == 'Y':
385            year = int(found_dict['Y'])
386        elif group_key == 'm':
387            month = int(found_dict['m'])
388        elif group_key == 'B':
389            month = locale_time.f_month.index(found_dict['B'].lower())
390        elif group_key == 'b':
391            month = locale_time.a_month.index(found_dict['b'].lower())
392        elif group_key == 'd':
393            day = int(found_dict['d'])
394        elif group_key == 'H':
395            hour = int(found_dict['H'])
396        elif group_key == 'I':
397            hour = int(found_dict['I'])
398            ampm = found_dict.get('p', '').lower()
399            # If there was no AM/PM indicator, we'll treat this like AM
400            if ampm in ('', locale_time.am_pm[0]):
401                # We're in AM so the hour is correct unless we're
402                # looking at 12 midnight.
403                # 12 midnight == 12 AM == hour 0
404                if hour == 12:
405                    hour = 0
406            elif ampm == locale_time.am_pm[1]:
407                # We're in PM so we need to add 12 to the hour unless
408                # we're looking at 12 noon.
409                # 12 noon == 12 PM == hour 12
410                if hour != 12:
411                    hour += 12
412        elif group_key == 'M':
413            minute = int(found_dict['M'])
414        elif group_key == 'S':
415            second = int(found_dict['S'])
416        elif group_key == 'f':
417            s = found_dict['f']
418            # Pad to always return microseconds.
419            s += "0" * (6 - len(s))
420            fraction = int(s)
421        elif group_key == 'A':
422            weekday = locale_time.f_weekday.index(found_dict['A'].lower())
423        elif group_key == 'a':
424            weekday = locale_time.a_weekday.index(found_dict['a'].lower())
425        elif group_key == 'w':
426            weekday = int(found_dict['w'])
427            if weekday == 0:
428                weekday = 6
429            else:
430                weekday -= 1
431        elif group_key == 'j':
432            julian = int(found_dict['j'])
433        elif group_key in ('U', 'W'):
434            week_of_year = int(found_dict[group_key])
435            if group_key == 'U':
436                # U starts week on Sunday.
437                week_of_year_start = 6
438            else:
439                # W starts week on Monday.
440                week_of_year_start = 0
441        elif group_key == 'z':
442            z = found_dict['z']
443            tzoffset = int(z[1:3]) * 60 + int(z[3:5])
444            if z.startswith("-"):
445                tzoffset = -tzoffset
446        elif group_key == 'Z':
447            # Since -1 is default value only need to worry about setting tz if
448            # it can be something other than -1.
449            found_zone = found_dict['Z'].lower()
450            for value, tz_values in enumerate(locale_time.timezone):
451                if found_zone in tz_values:
452                    # Deal with bad locale setup where timezone names are the
453                    # same and yet time.daylight is true; too ambiguous to
454                    # be able to tell what timezone has daylight savings
455                    if (time.tzname[0] == time.tzname[1] and
456                       time.daylight and found_zone not in ("utc", "gmt")):
457                        break
458                    else:
459                        tz = value
460                        break
461    leap_year_fix = False
462    if year is None and month == 2 and day == 29:
463        year = 1904  # 1904 is first leap year of 20th century
464        leap_year_fix = True
465    elif year is None:
466        year = 1900
467    # If we know the week of the year and what day of that week, we can figure
468    # out the Julian day of the year.
469    if julian is None and week_of_year != -1 and weekday is not None:
470        week_starts_Mon = True if week_of_year_start == 0 else False
471        julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
472                                            week_starts_Mon)
473        if julian <= 0:
474            year -= 1
475            yday = 366 if calendar.isleap(year) else 365
476            julian += yday
477    # Cannot pre-calculate datetime_date() since can change in Julian
478    # calculation and thus could have different value for the day of the week
479    # calculation.
480    if julian is None:
481        # Need to add 1 to result since first day of the year is 1, not 0.
482        julian = datetime_date(year, month, day).toordinal() - \
483                  datetime_date(year, 1, 1).toordinal() + 1
484    else:  # Assume that if they bothered to include Julian day it will
485           # be accurate.
486        datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal())
487        year = datetime_result.year
488        month = datetime_result.month
489        day = datetime_result.day
490    if weekday is None:
491        weekday = datetime_date(year, month, day).weekday()
492    # Add timezone info
493    tzname = found_dict.get("Z")
494    if tzoffset is not None:
495        gmtoff = tzoffset * 60
496    else:
497        gmtoff = None
498
499    if leap_year_fix:
500        # the caller didn't supply a year but asked for Feb 29th. We couldn't
501        # use the default of 1900 for computations. We set it back to ensure
502        # that February 29th is smaller than March 1st.
503        year = 1900
504
505    return (year, month, day,
506            hour, minute, second,
507            weekday, julian, tz, tzname, gmtoff), fraction
508
509def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
510    """Return a time struct based on the input string and the
511    format string."""
512    tt = _strptime(data_string, format)[0]
513    return time.struct_time(tt[:time._STRUCT_TM_ITEMS])
514
515def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"):
516    """Return a class cls instance based on the input string and the
517    format string."""
518    tt, fraction = _strptime(data_string, format)
519    tzname, gmtoff = tt[-2:]
520    args = tt[:6] + (fraction,)
521    if gmtoff is not None:
522        tzdelta = datetime_timedelta(seconds=gmtoff)
523        if tzname:
524            tz = datetime_timezone(tzdelta, tzname)
525        else:
526            tz = datetime_timezone(tzdelta)
527        args += (tz,)
528
529    return cls(*args)
530