1# -*- coding: utf-8 -*-
2"""
3This module offers a generic date/time string parser which is able to parse
4most known formats to represent a date and/or time.
5
6This module attempts to be forgiving with regards to unlikely input formats,
7returning a datetime object even for dates which are ambiguous. If an element
8of a date/time stamp is omitted, the following rules are applied:
9
10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
11  on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
12  specified.
13- If a time zone is omitted, a timezone-naive datetime is returned.
14
15If any other elements are missing, they are taken from the
16:class:`datetime.datetime` object passed to the parameter ``default``. If this
17results in a day number exceeding the valid number of days per month, the
18value falls back to the end of the month.
19
20Additional resources about date/time string formats can be found below:
21
22- `A summary of the international standard date and time notation
23  <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
24- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
26- `CPAN ParseDate module
27  <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
28- `Java SimpleDateFormat Class
29  <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
30"""
31from __future__ import unicode_literals
32
33import datetime
34import re
35import string
36import time
37import warnings
38
39from calendar import monthrange
40from io import StringIO
41
42import six
43from six import integer_types, text_type
44
45from decimal import Decimal
46
47from warnings import warn
48
49from .. import relativedelta
50from .. import tz
51
52__all__ = ["parse", "parserinfo", "ParserError"]
53
54
55# TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
56# making public and/or figuring out if there is something we can
57# take off their plate.
58class _timelex(object):
59    # Fractional seconds are sometimes split by a comma
60    _split_decimal = re.compile("([.,])")
61
62    def __init__(self, instream):
63        if isinstance(instream, (bytes, bytearray)):
64            instream = instream.decode()
65
66        if isinstance(instream, text_type):
67            instream = StringIO(instream)
68        elif getattr(instream, 'read', None) is None:
69            raise TypeError('Parser must be a string or character stream, not '
70                            '{itype}'.format(itype=instream.__class__.__name__))
71
72        self.instream = instream
73        self.charstack = []
74        self.tokenstack = []
75        self.eof = False
76
77    def get_token(self):
78        """
79        This function breaks the time string into lexical units (tokens), which
80        can be parsed by the parser. Lexical units are demarcated by changes in
81        the character set, so any continuous string of letters is considered
82        one unit, any continuous string of numbers is considered one unit.
83
84        The main complication arises from the fact that dots ('.') can be used
85        both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
86        "4:30:21.447"). As such, it is necessary to read the full context of
87        any dot-separated strings before breaking it into tokens; as such, this
88        function maintains a "token stack", for when the ambiguous context
89        demands that multiple tokens be parsed at once.
90        """
91        if self.tokenstack:
92            return self.tokenstack.pop(0)
93
94        seenletters = False
95        token = None
96        state = None
97
98        while not self.eof:
99            # We only realize that we've reached the end of a token when we
100            # find a character that's not part of the current token - since
101            # that character may be part of the next token, it's stored in the
102            # charstack.
103            if self.charstack:
104                nextchar = self.charstack.pop(0)
105            else:
106                nextchar = self.instream.read(1)
107                while nextchar == '\x00':
108                    nextchar = self.instream.read(1)
109
110            if not nextchar:
111                self.eof = True
112                break
113            elif not state:
114                # First character of the token - determines if we're starting
115                # to parse a word, a number or something else.
116                token = nextchar
117                if self.isword(nextchar):
118                    state = 'a'
119                elif self.isnum(nextchar):
120                    state = '0'
121                elif self.isspace(nextchar):
122                    token = ' '
123                    break  # emit token
124                else:
125                    break  # emit token
126            elif state == 'a':
127                # If we've already started reading a word, we keep reading
128                # letters until we find something that's not part of a word.
129                seenletters = True
130                if self.isword(nextchar):
131                    token += nextchar
132                elif nextchar == '.':
133                    token += nextchar
134                    state = 'a.'
135                else:
136                    self.charstack.append(nextchar)
137                    break  # emit token
138            elif state == '0':
139                # If we've already started reading a number, we keep reading
140                # numbers until we find something that doesn't fit.
141                if self.isnum(nextchar):
142                    token += nextchar
143                elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
144                    token += nextchar
145                    state = '0.'
146                else:
147                    self.charstack.append(nextchar)
148                    break  # emit token
149            elif state == 'a.':
150                # If we've seen some letters and a dot separator, continue
151                # parsing, and the tokens will be broken up later.
152                seenletters = True
153                if nextchar == '.' or self.isword(nextchar):
154                    token += nextchar
155                elif self.isnum(nextchar) and token[-1] == '.':
156                    token += nextchar
157                    state = '0.'
158                else:
159                    self.charstack.append(nextchar)
160                    break  # emit token
161            elif state == '0.':
162                # If we've seen at least one dot separator, keep going, we'll
163                # break up the tokens later.
164                if nextchar == '.' or self.isnum(nextchar):
165                    token += nextchar
166                elif self.isword(nextchar) and token[-1] == '.':
167                    token += nextchar
168                    state = 'a.'
169                else:
170                    self.charstack.append(nextchar)
171                    break  # emit token
172
173        if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
174                                       token[-1] in '.,')):
175            l = self._split_decimal.split(token)
176            token = l[0]
177            for tok in l[1:]:
178                if tok:
179                    self.tokenstack.append(tok)
180
181        if state == '0.' and token.count('.') == 0:
182            token = token.replace(',', '.')
183
184        return token
185
186    def __iter__(self):
187        return self
188
189    def __next__(self):
190        token = self.get_token()
191        if token is None:
192            raise StopIteration
193
194        return token
195
196    def next(self):
197        return self.__next__()  # Python 2.x support
198
199    @classmethod
200    def split(cls, s):
201        return list(cls(s))
202
203    @classmethod
204    def isword(cls, nextchar):
205        """ Whether or not the next character is part of a word """
206        return nextchar.isalpha()
207
208    @classmethod
209    def isnum(cls, nextchar):
210        """ Whether the next character is part of a number """
211        return nextchar.isdigit()
212
213    @classmethod
214    def isspace(cls, nextchar):
215        """ Whether the next character is whitespace """
216        return nextchar.isspace()
217
218
219class _resultbase(object):
220
221    def __init__(self):
222        for attr in self.__slots__:
223            setattr(self, attr, None)
224
225    def _repr(self, classname):
226        l = []
227        for attr in self.__slots__:
228            value = getattr(self, attr)
229            if value is not None:
230                l.append("%s=%s" % (attr, repr(value)))
231        return "%s(%s)" % (classname, ", ".join(l))
232
233    def __len__(self):
234        return (sum(getattr(self, attr) is not None
235                    for attr in self.__slots__))
236
237    def __repr__(self):
238        return self._repr(self.__class__.__name__)
239
240
241class parserinfo(object):
242    """
243    Class which handles what inputs are accepted. Subclass this to customize
244    the language and acceptable values for each parameter.
245
246    :param dayfirst:
247        Whether to interpret the first value in an ambiguous 3-integer date
248        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
249        ``yearfirst`` is set to ``True``, this distinguishes between YDM
250        and YMD. Default is ``False``.
251
252    :param yearfirst:
253        Whether to interpret the first value in an ambiguous 3-integer date
254        (e.g. 01/05/09) as the year. If ``True``, the first number is taken
255        to be the year, otherwise the last number is taken to be the year.
256        Default is ``False``.
257    """
258
259    # m from a.m/p.m, t from ISO T separator
260    JUMP = [" ", ".", ",", ";", "-", "/", "'",
261            "at", "on", "and", "ad", "m", "t", "of",
262            "st", "nd", "rd", "th"]
263
264    WEEKDAYS = [("Mon", "Monday"),
265                ("Tue", "Tuesday"),     # TODO: "Tues"
266                ("Wed", "Wednesday"),
267                ("Thu", "Thursday"),    # TODO: "Thurs"
268                ("Fri", "Friday"),
269                ("Sat", "Saturday"),
270                ("Sun", "Sunday")]
271    MONTHS = [("Jan", "January"),
272              ("Feb", "February"),      # TODO: "Febr"
273              ("Mar", "March"),
274              ("Apr", "April"),
275              ("May", "May"),
276              ("Jun", "June"),
277              ("Jul", "July"),
278              ("Aug", "August"),
279              ("Sep", "Sept", "September"),
280              ("Oct", "October"),
281              ("Nov", "November"),
282              ("Dec", "December")]
283    HMS = [("h", "hour", "hours"),
284           ("m", "minute", "minutes"),
285           ("s", "second", "seconds")]
286    AMPM = [("am", "a"),
287            ("pm", "p")]
288    UTCZONE = ["UTC", "GMT", "Z", "z"]
289    PERTAIN = ["of"]
290    TZOFFSET = {}
291    # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
292    #              "Anno Domini", "Year of Our Lord"]
293
294    def __init__(self, dayfirst=False, yearfirst=False):
295        self._jump = self._convert(self.JUMP)
296        self._weekdays = self._convert(self.WEEKDAYS)
297        self._months = self._convert(self.MONTHS)
298        self._hms = self._convert(self.HMS)
299        self._ampm = self._convert(self.AMPM)
300        self._utczone = self._convert(self.UTCZONE)
301        self._pertain = self._convert(self.PERTAIN)
302
303        self.dayfirst = dayfirst
304        self.yearfirst = yearfirst
305
306        self._year = time.localtime().tm_year
307        self._century = self._year // 100 * 100
308
309    def _convert(self, lst):
310        dct = {}
311        for i, v in enumerate(lst):
312            if isinstance(v, tuple):
313                for v in v:
314                    dct[v.lower()] = i
315            else:
316                dct[v.lower()] = i
317        return dct
318
319    def jump(self, name):
320        return name.lower() in self._jump
321
322    def weekday(self, name):
323        try:
324            return self._weekdays[name.lower()]
325        except KeyError:
326            pass
327        return None
328
329    def month(self, name):
330        try:
331            return self._months[name.lower()] + 1
332        except KeyError:
333            pass
334        return None
335
336    def hms(self, name):
337        try:
338            return self._hms[name.lower()]
339        except KeyError:
340            return None
341
342    def ampm(self, name):
343        try:
344            return self._ampm[name.lower()]
345        except KeyError:
346            return None
347
348    def pertain(self, name):
349        return name.lower() in self._pertain
350
351    def utczone(self, name):
352        return name.lower() in self._utczone
353
354    def tzoffset(self, name):
355        if name in self._utczone:
356            return 0
357
358        return self.TZOFFSET.get(name)
359
360    def convertyear(self, year, century_specified=False):
361        """
362        Converts two-digit years to year within [-50, 49]
363        range of self._year (current local time)
364        """
365
366        # Function contract is that the year is always positive
367        assert year >= 0
368
369        if year < 100 and not century_specified:
370            # assume current century to start
371            year += self._century
372
373            if year >= self._year + 50:  # if too far in future
374                year -= 100
375            elif year < self._year - 50:  # if too far in past
376                year += 100
377
378        return year
379
380    def validate(self, res):
381        # move to info
382        if res.year is not None:
383            res.year = self.convertyear(res.year, res.century_specified)
384
385        if ((res.tzoffset == 0 and not res.tzname) or
386             (res.tzname == 'Z' or res.tzname == 'z')):
387            res.tzname = "UTC"
388            res.tzoffset = 0
389        elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
390            res.tzoffset = 0
391        return True
392
393
394class _ymd(list):
395    def __init__(self, *args, **kwargs):
396        super(self.__class__, self).__init__(*args, **kwargs)
397        self.century_specified = False
398        self.dstridx = None
399        self.mstridx = None
400        self.ystridx = None
401
402    @property
403    def has_year(self):
404        return self.ystridx is not None
405
406    @property
407    def has_month(self):
408        return self.mstridx is not None
409
410    @property
411    def has_day(self):
412        return self.dstridx is not None
413
414    def could_be_day(self, value):
415        if self.has_day:
416            return False
417        elif not self.has_month:
418            return 1 <= value <= 31
419        elif not self.has_year:
420            # Be permissive, assume leap year
421            month = self[self.mstridx]
422            return 1 <= value <= monthrange(2000, month)[1]
423        else:
424            month = self[self.mstridx]
425            year = self[self.ystridx]
426            return 1 <= value <= monthrange(year, month)[1]
427
428    def append(self, val, label=None):
429        if hasattr(val, '__len__'):
430            if val.isdigit() and len(val) > 2:
431                self.century_specified = True
432                if label not in [None, 'Y']:  # pragma: no cover
433                    raise ValueError(label)
434                label = 'Y'
435        elif val > 100:
436            self.century_specified = True
437            if label not in [None, 'Y']:  # pragma: no cover
438                raise ValueError(label)
439            label = 'Y'
440
441        super(self.__class__, self).append(int(val))
442
443        if label == 'M':
444            if self.has_month:
445                raise ValueError('Month is already set')
446            self.mstridx = len(self) - 1
447        elif label == 'D':
448            if self.has_day:
449                raise ValueError('Day is already set')
450            self.dstridx = len(self) - 1
451        elif label == 'Y':
452            if self.has_year:
453                raise ValueError('Year is already set')
454            self.ystridx = len(self) - 1
455
456    def _resolve_from_stridxs(self, strids):
457        """
458        Try to resolve the identities of year/month/day elements using
459        ystridx, mstridx, and dstridx, if enough of these are specified.
460        """
461        if len(self) == 3 and len(strids) == 2:
462            # we can back out the remaining stridx value
463            missing = [x for x in range(3) if x not in strids.values()]
464            key = [x for x in ['y', 'm', 'd'] if x not in strids]
465            assert len(missing) == len(key) == 1
466            key = key[0]
467            val = missing[0]
468            strids[key] = val
469
470        assert len(self) == len(strids)  # otherwise this should not be called
471        out = {key: self[strids[key]] for key in strids}
472        return (out.get('y'), out.get('m'), out.get('d'))
473
474    def resolve_ymd(self, yearfirst, dayfirst):
475        len_ymd = len(self)
476        year, month, day = (None, None, None)
477
478        strids = (('y', self.ystridx),
479                  ('m', self.mstridx),
480                  ('d', self.dstridx))
481
482        strids = {key: val for key, val in strids if val is not None}
483        if (len(self) == len(strids) > 0 or
484                (len(self) == 3 and len(strids) == 2)):
485            return self._resolve_from_stridxs(strids)
486
487        mstridx = self.mstridx
488
489        if len_ymd > 3:
490            raise ValueError("More than three YMD values")
491        elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
492            # One member, or two members with a month string
493            if mstridx is not None:
494                month = self[mstridx]
495                # since mstridx is 0 or 1, self[mstridx-1] always
496                # looks up the other element
497                other = self[mstridx - 1]
498            else:
499                other = self[0]
500
501            if len_ymd > 1 or mstridx is None:
502                if other > 31:
503                    year = other
504                else:
505                    day = other
506
507        elif len_ymd == 2:
508            # Two members with numbers
509            if self[0] > 31:
510                # 99-01
511                year, month = self
512            elif self[1] > 31:
513                # 01-99
514                month, year = self
515            elif dayfirst and self[1] <= 12:
516                # 13-01
517                day, month = self
518            else:
519                # 01-13
520                month, day = self
521
522        elif len_ymd == 3:
523            # Three members
524            if mstridx == 0:
525                if self[1] > 31:
526                    # Apr-2003-25
527                    month, year, day = self
528                else:
529                    month, day, year = self
530            elif mstridx == 1:
531                if self[0] > 31 or (yearfirst and self[2] <= 31):
532                    # 99-Jan-01
533                    year, month, day = self
534                else:
535                    # 01-Jan-01
536                    # Give precedence to day-first, since
537                    # two-digit years is usually hand-written.
538                    day, month, year = self
539
540            elif mstridx == 2:
541                # WTF!?
542                if self[1] > 31:
543                    # 01-99-Jan
544                    day, year, month = self
545                else:
546                    # 99-01-Jan
547                    year, day, month = self
548
549            else:
550                if (self[0] > 31 or
551                    self.ystridx == 0 or
552                        (yearfirst and self[1] <= 12 and self[2] <= 31)):
553                    # 99-01-01
554                    if dayfirst and self[2] <= 12:
555                        year, day, month = self
556                    else:
557                        year, month, day = self
558                elif self[0] > 12 or (dayfirst and self[1] <= 12):
559                    # 13-01-01
560                    day, month, year = self
561                else:
562                    # 01-13-01
563                    month, day, year = self
564
565        return year, month, day
566
567
568class parser(object):
569    def __init__(self, info=None):
570        self.info = info or parserinfo()
571
572    def parse(self, timestr, default=None,
573              ignoretz=False, tzinfos=None, **kwargs):
574        """
575        Parse the date/time string into a :class:`datetime.datetime` object.
576
577        :param timestr:
578            Any date/time string using the supported formats.
579
580        :param default:
581            The default datetime object, if this is a datetime object and not
582            ``None``, elements specified in ``timestr`` replace elements in the
583            default object.
584
585        :param ignoretz:
586            If set ``True``, time zones in parsed strings are ignored and a
587            naive :class:`datetime.datetime` object is returned.
588
589        :param tzinfos:
590            Additional time zone names / aliases which may be present in the
591            string. This argument maps time zone names (and optionally offsets
592            from those time zones) to time zones. This parameter can be a
593            dictionary with timezone aliases mapping time zone names to time
594            zones or a function taking two parameters (``tzname`` and
595            ``tzoffset``) and returning a time zone.
596
597            The timezones to which the names are mapped can be an integer
598            offset from UTC in seconds or a :class:`tzinfo` object.
599
600            .. doctest::
601               :options: +NORMALIZE_WHITESPACE
602
603                >>> from dateutil.parser import parse
604                >>> from dateutil.tz import gettz
605                >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
606                >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
607                datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
608                >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
609                datetime.datetime(2012, 1, 19, 17, 21,
610                                  tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
611
612            This parameter is ignored if ``ignoretz`` is set.
613
614        :param \\*\\*kwargs:
615            Keyword arguments as passed to ``_parse()``.
616
617        :return:
618            Returns a :class:`datetime.datetime` object or, if the
619            ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
620            first element being a :class:`datetime.datetime` object, the second
621            a tuple containing the fuzzy tokens.
622
623        :raises ParserError:
624            Raised for invalid or unknown string format, if the provided
625            :class:`tzinfo` is not in a valid format, or if an invalid date
626            would be created.
627
628        :raises TypeError:
629            Raised for non-string or character stream input.
630
631        :raises OverflowError:
632            Raised if the parsed date exceeds the largest valid C integer on
633            your system.
634        """
635
636        if default is None:
637            default = datetime.datetime.now().replace(hour=0, minute=0,
638                                                      second=0, microsecond=0)
639
640        res, skipped_tokens = self._parse(timestr, **kwargs)
641
642        if res is None:
643            raise ParserError("Unknown string format: %s", timestr)
644
645        if len(res) == 0:
646            raise ParserError("String does not contain a date: %s", timestr)
647
648        try:
649            ret = self._build_naive(res, default)
650        except ValueError as e:
651            six.raise_from(ParserError(str(e) + ": %s", timestr), e)
652
653        if not ignoretz:
654            ret = self._build_tzaware(ret, res, tzinfos)
655
656        if kwargs.get('fuzzy_with_tokens', False):
657            return ret, skipped_tokens
658        else:
659            return ret
660
661    class _result(_resultbase):
662        __slots__ = ["year", "month", "day", "weekday",
663                     "hour", "minute", "second", "microsecond",
664                     "tzname", "tzoffset", "ampm","any_unused_tokens"]
665
666    def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
667               fuzzy_with_tokens=False):
668        """
669        Private method which performs the heavy lifting of parsing, called from
670        ``parse()``, which passes on its ``kwargs`` to this function.
671
672        :param timestr:
673            The string to parse.
674
675        :param dayfirst:
676            Whether to interpret the first value in an ambiguous 3-integer date
677            (e.g. 01/05/09) as the day (``True``) or month (``False``). If
678            ``yearfirst`` is set to ``True``, this distinguishes between YDM
679            and YMD. If set to ``None``, this value is retrieved from the
680            current :class:`parserinfo` object (which itself defaults to
681            ``False``).
682
683        :param yearfirst:
684            Whether to interpret the first value in an ambiguous 3-integer date
685            (e.g. 01/05/09) as the year. If ``True``, the first number is taken
686            to be the year, otherwise the last number is taken to be the year.
687            If this is set to ``None``, the value is retrieved from the current
688            :class:`parserinfo` object (which itself defaults to ``False``).
689
690        :param fuzzy:
691            Whether to allow fuzzy parsing, allowing for string like "Today is
692            January 1, 2047 at 8:21:00AM".
693
694        :param fuzzy_with_tokens:
695            If ``True``, ``fuzzy`` is automatically set to True, and the parser
696            will return a tuple where the first element is the parsed
697            :class:`datetime.datetime` datetimestamp and the second element is
698            a tuple containing the portions of the string which were ignored:
699
700            .. doctest::
701
702                >>> from dateutil.parser import parse
703                >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
704                (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
705
706        """
707        if fuzzy_with_tokens:
708            fuzzy = True
709
710        info = self.info
711
712        if dayfirst is None:
713            dayfirst = info.dayfirst
714
715        if yearfirst is None:
716            yearfirst = info.yearfirst
717
718        res = self._result()
719        l = _timelex.split(timestr)         # Splits the timestr into tokens
720
721        skipped_idxs = []
722
723        # year/month/day list
724        ymd = _ymd()
725
726        len_l = len(l)
727        i = 0
728        try:
729            while i < len_l:
730
731                # Check if it's a number
732                value_repr = l[i]
733                try:
734                    value = float(value_repr)
735                except ValueError:
736                    value = None
737
738                if value is not None:
739                    # Numeric token
740                    i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
741
742                # Check weekday
743                elif info.weekday(l[i]) is not None:
744                    value = info.weekday(l[i])
745                    res.weekday = value
746
747                # Check month name
748                elif info.month(l[i]) is not None:
749                    value = info.month(l[i])
750                    ymd.append(value, 'M')
751
752                    if i + 1 < len_l:
753                        if l[i + 1] in ('-', '/'):
754                            # Jan-01[-99]
755                            sep = l[i + 1]
756                            ymd.append(l[i + 2])
757
758                            if i + 3 < len_l and l[i + 3] == sep:
759                                # Jan-01-99
760                                ymd.append(l[i + 4])
761                                i += 2
762
763                            i += 2
764
765                        elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
766                              info.pertain(l[i + 2])):
767                            # Jan of 01
768                            # In this case, 01 is clearly year
769                            if l[i + 4].isdigit():
770                                # Convert it here to become unambiguous
771                                value = int(l[i + 4])
772                                year = str(info.convertyear(value))
773                                ymd.append(year, 'Y')
774                            else:
775                                # Wrong guess
776                                pass
777                                # TODO: not hit in tests
778                            i += 4
779
780                # Check am/pm
781                elif info.ampm(l[i]) is not None:
782                    value = info.ampm(l[i])
783                    val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
784
785                    if val_is_ampm:
786                        res.hour = self._adjust_ampm(res.hour, value)
787                        res.ampm = value
788
789                    elif fuzzy:
790                        skipped_idxs.append(i)
791
792                # Check for a timezone name
793                elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
794                    res.tzname = l[i]
795                    res.tzoffset = info.tzoffset(res.tzname)
796
797                    # Check for something like GMT+3, or BRST+3. Notice
798                    # that it doesn't mean "I am 3 hours after GMT", but
799                    # "my time +3 is GMT". If found, we reverse the
800                    # logic so that timezone parsing code will get it
801                    # right.
802                    if i + 1 < len_l and l[i + 1] in ('+', '-'):
803                        l[i + 1] = ('+', '-')[l[i + 1] == '+']
804                        res.tzoffset = None
805                        if info.utczone(res.tzname):
806                            # With something like GMT+3, the timezone
807                            # is *not* GMT.
808                            res.tzname = None
809
810                # Check for a numbered timezone
811                elif res.hour is not None and l[i] in ('+', '-'):
812                    signal = (-1, 1)[l[i] == '+']
813                    len_li = len(l[i + 1])
814
815                    # TODO: check that l[i + 1] is integer?
816                    if len_li == 4:
817                        # -0300
818                        hour_offset = int(l[i + 1][:2])
819                        min_offset = int(l[i + 1][2:])
820                    elif i + 2 < len_l and l[i + 2] == ':':
821                        # -03:00
822                        hour_offset = int(l[i + 1])
823                        min_offset = int(l[i + 3])  # TODO: Check that l[i+3] is minute-like?
824                        i += 2
825                    elif len_li <= 2:
826                        # -[0]3
827                        hour_offset = int(l[i + 1][:2])
828                        min_offset = 0
829                    else:
830                        raise ValueError(timestr)
831
832                    res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
833
834                    # Look for a timezone name between parenthesis
835                    if (i + 5 < len_l and
836                            info.jump(l[i + 2]) and l[i + 3] == '(' and
837                            l[i + 5] == ')' and
838                            3 <= len(l[i + 4]) and
839                            self._could_be_tzname(res.hour, res.tzname,
840                                                  None, l[i + 4])):
841                        # -0300 (BRST)
842                        res.tzname = l[i + 4]
843                        i += 4
844
845                    i += 1
846
847                # Check jumps
848                elif not (info.jump(l[i]) or fuzzy):
849                    raise ValueError(timestr)
850
851                else:
852                    skipped_idxs.append(i)
853                i += 1
854
855            # Process year/month/day
856            year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
857
858            res.century_specified = ymd.century_specified
859            res.year = year
860            res.month = month
861            res.day = day
862
863        except (IndexError, ValueError):
864            return None, None
865
866        if not info.validate(res):
867            return None, None
868
869        if fuzzy_with_tokens:
870            skipped_tokens = self._recombine_skipped(l, skipped_idxs)
871            return res, tuple(skipped_tokens)
872        else:
873            return res, None
874
875    def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
876        # Token is a number
877        value_repr = tokens[idx]
878        try:
879            value = self._to_decimal(value_repr)
880        except Exception as e:
881            six.raise_from(ValueError('Unknown numeric token'), e)
882
883        len_li = len(value_repr)
884
885        len_l = len(tokens)
886
887        if (len(ymd) == 3 and len_li in (2, 4) and
888            res.hour is None and
889            (idx + 1 >= len_l or
890             (tokens[idx + 1] != ':' and
891              info.hms(tokens[idx + 1]) is None))):
892            # 19990101T23[59]
893            s = tokens[idx]
894            res.hour = int(s[:2])
895
896            if len_li == 4:
897                res.minute = int(s[2:])
898
899        elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
900            # YYMMDD or HHMMSS[.ss]
901            s = tokens[idx]
902
903            if not ymd and '.' not in tokens[idx]:
904                ymd.append(s[:2])
905                ymd.append(s[2:4])
906                ymd.append(s[4:])
907            else:
908                # 19990101T235959[.59]
909
910                # TODO: Check if res attributes already set.
911                res.hour = int(s[:2])
912                res.minute = int(s[2:4])
913                res.second, res.microsecond = self._parsems(s[4:])
914
915        elif len_li in (8, 12, 14):
916            # YYYYMMDD
917            s = tokens[idx]
918            ymd.append(s[:4], 'Y')
919            ymd.append(s[4:6])
920            ymd.append(s[6:8])
921
922            if len_li > 8:
923                res.hour = int(s[8:10])
924                res.minute = int(s[10:12])
925
926                if len_li > 12:
927                    res.second = int(s[12:])
928
929        elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
930            # HH[ ]h or MM[ ]m or SS[.ss][ ]s
931            hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
932            (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
933            if hms is not None:
934                # TODO: checking that hour/minute/second are not
935                # already set?
936                self._assign_hms(res, value_repr, hms)
937
938        elif idx + 2 < len_l and tokens[idx + 1] == ':':
939            # HH:MM[:SS[.ss]]
940            res.hour = int(value)
941            value = self._to_decimal(tokens[idx + 2])  # TODO: try/except for this?
942            (res.minute, res.second) = self._parse_min_sec(value)
943
944            if idx + 4 < len_l and tokens[idx + 3] == ':':
945                res.second, res.microsecond = self._parsems(tokens[idx + 4])
946
947                idx += 2
948
949            idx += 2
950
951        elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
952            sep = tokens[idx + 1]
953            ymd.append(value_repr)
954
955            if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
956                if tokens[idx + 2].isdigit():
957                    # 01-01[-01]
958                    ymd.append(tokens[idx + 2])
959                else:
960                    # 01-Jan[-01]
961                    value = info.month(tokens[idx + 2])
962
963                    if value is not None:
964                        ymd.append(value, 'M')
965                    else:
966                        raise ValueError()
967
968                if idx + 3 < len_l and tokens[idx + 3] == sep:
969                    # We have three members
970                    value = info.month(tokens[idx + 4])
971
972                    if value is not None:
973                        ymd.append(value, 'M')
974                    else:
975                        ymd.append(tokens[idx + 4])
976                    idx += 2
977
978                idx += 1
979            idx += 1
980
981        elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
982            if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
983                # 12 am
984                hour = int(value)
985                res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
986                idx += 1
987            else:
988                # Year, month or day
989                ymd.append(value)
990            idx += 1
991
992        elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
993            # 12am
994            hour = int(value)
995            res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
996            idx += 1
997
998        elif ymd.could_be_day(value):
999            ymd.append(value)
1000
1001        elif not fuzzy:
1002            raise ValueError()
1003
1004        return idx
1005
1006    def _find_hms_idx(self, idx, tokens, info, allow_jump):
1007        len_l = len(tokens)
1008
1009        if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
1010            # There is an "h", "m", or "s" label following this token.  We take
1011            # assign the upcoming label to the current token.
1012            # e.g. the "12" in 12h"
1013            hms_idx = idx + 1
1014
1015        elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
1016              info.hms(tokens[idx+2]) is not None):
1017            # There is a space and then an "h", "m", or "s" label.
1018            # e.g. the "12" in "12 h"
1019            hms_idx = idx + 2
1020
1021        elif idx > 0 and info.hms(tokens[idx-1]) is not None:
1022            # There is a "h", "m", or "s" preceding this token.  Since neither
1023            # of the previous cases was hit, there is no label following this
1024            # token, so we use the previous label.
1025            # e.g. the "04" in "12h04"
1026            hms_idx = idx-1
1027
1028        elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
1029              info.hms(tokens[idx-2]) is not None):
1030            # If we are looking at the final token, we allow for a
1031            # backward-looking check to skip over a space.
1032            # TODO: Are we sure this is the right condition here?
1033            hms_idx = idx - 2
1034
1035        else:
1036            hms_idx = None
1037
1038        return hms_idx
1039
1040    def _assign_hms(self, res, value_repr, hms):
1041        # See GH issue #427, fixing float rounding
1042        value = self._to_decimal(value_repr)
1043
1044        if hms == 0:
1045            # Hour
1046            res.hour = int(value)
1047            if value % 1:
1048                res.minute = int(60*(value % 1))
1049
1050        elif hms == 1:
1051            (res.minute, res.second) = self._parse_min_sec(value)
1052
1053        elif hms == 2:
1054            (res.second, res.microsecond) = self._parsems(value_repr)
1055
1056    def _could_be_tzname(self, hour, tzname, tzoffset, token):
1057        return (hour is not None and
1058                tzname is None and
1059                tzoffset is None and
1060                len(token) <= 5 and
1061                (all(x in string.ascii_uppercase for x in token)
1062                 or token in self.info.UTCZONE))
1063
1064    def _ampm_valid(self, hour, ampm, fuzzy):
1065        """
1066        For fuzzy parsing, 'a' or 'am' (both valid English words)
1067        may erroneously trigger the AM/PM flag. Deal with that
1068        here.
1069        """
1070        val_is_ampm = True
1071
1072        # If there's already an AM/PM flag, this one isn't one.
1073        if fuzzy and ampm is not None:
1074            val_is_ampm = False
1075
1076        # If AM/PM is found and hour is not, raise a ValueError
1077        if hour is None:
1078            if fuzzy:
1079                val_is_ampm = False
1080            else:
1081                raise ValueError('No hour specified with AM or PM flag.')
1082        elif not 0 <= hour <= 12:
1083            # If AM/PM is found, it's a 12 hour clock, so raise
1084            # an error for invalid range
1085            if fuzzy:
1086                val_is_ampm = False
1087            else:
1088                raise ValueError('Invalid hour specified for 12-hour clock.')
1089
1090        return val_is_ampm
1091
1092    def _adjust_ampm(self, hour, ampm):
1093        if hour < 12 and ampm == 1:
1094            hour += 12
1095        elif hour == 12 and ampm == 0:
1096            hour = 0
1097        return hour
1098
1099    def _parse_min_sec(self, value):
1100        # TODO: Every usage of this function sets res.second to the return
1101        # value. Are there any cases where second will be returned as None and
1102        # we *don't* want to set res.second = None?
1103        minute = int(value)
1104        second = None
1105
1106        sec_remainder = value % 1
1107        if sec_remainder:
1108            second = int(60 * sec_remainder)
1109        return (minute, second)
1110
1111    def _parse_hms(self, idx, tokens, info, hms_idx):
1112        # TODO: Is this going to admit a lot of false-positives for when we
1113        # just happen to have digits and "h", "m" or "s" characters in non-date
1114        # text?  I guess hex hashes won't have that problem, but there's plenty
1115        # of random junk out there.
1116        if hms_idx is None:
1117            hms = None
1118            new_idx = idx
1119        elif hms_idx > idx:
1120            hms = info.hms(tokens[hms_idx])
1121            new_idx = hms_idx
1122        else:
1123            # Looking backwards, increment one.
1124            hms = info.hms(tokens[hms_idx]) + 1
1125            new_idx = idx
1126
1127        return (new_idx, hms)
1128
1129    # ------------------------------------------------------------------
1130    # Handling for individual tokens.  These are kept as methods instead
1131    #  of functions for the sake of customizability via subclassing.
1132
1133    def _parsems(self, value):
1134        """Parse a I[.F] seconds value into (seconds, microseconds)."""
1135        if "." not in value:
1136            return int(value), 0
1137        else:
1138            i, f = value.split(".")
1139            return int(i), int(f.ljust(6, "0")[:6])
1140
1141    def _to_decimal(self, val):
1142        try:
1143            decimal_value = Decimal(val)
1144            # See GH 662, edge case, infinite value should not be converted
1145            #  via `_to_decimal`
1146            if not decimal_value.is_finite():
1147                raise ValueError("Converted decimal value is infinite or NaN")
1148        except Exception as e:
1149            msg = "Could not convert %s to decimal" % val
1150            six.raise_from(ValueError(msg), e)
1151        else:
1152            return decimal_value
1153
1154    # ------------------------------------------------------------------
1155    # Post-Parsing construction of datetime output.  These are kept as
1156    #  methods instead of functions for the sake of customizability via
1157    #  subclassing.
1158
1159    def _build_tzinfo(self, tzinfos, tzname, tzoffset):
1160        if callable(tzinfos):
1161            tzdata = tzinfos(tzname, tzoffset)
1162        else:
1163            tzdata = tzinfos.get(tzname)
1164        # handle case where tzinfo is paased an options that returns None
1165        # eg tzinfos = {'BRST' : None}
1166        if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
1167            tzinfo = tzdata
1168        elif isinstance(tzdata, text_type):
1169            tzinfo = tz.tzstr(tzdata)
1170        elif isinstance(tzdata, integer_types):
1171            tzinfo = tz.tzoffset(tzname, tzdata)
1172        else:
1173            raise TypeError("Offset must be tzinfo subclass, tz string, "
1174                            "or int offset.")
1175        return tzinfo
1176
1177    def _build_tzaware(self, naive, res, tzinfos):
1178        if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
1179            tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
1180            aware = naive.replace(tzinfo=tzinfo)
1181            aware = self._assign_tzname(aware, res.tzname)
1182
1183        elif res.tzname and res.tzname in time.tzname:
1184            aware = naive.replace(tzinfo=tz.tzlocal())
1185
1186            # Handle ambiguous local datetime
1187            aware = self._assign_tzname(aware, res.tzname)
1188
1189            # This is mostly relevant for winter GMT zones parsed in the UK
1190            if (aware.tzname() != res.tzname and
1191                    res.tzname in self.info.UTCZONE):
1192                aware = aware.replace(tzinfo=tz.UTC)
1193
1194        elif res.tzoffset == 0:
1195            aware = naive.replace(tzinfo=tz.UTC)
1196
1197        elif res.tzoffset:
1198            aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
1199
1200        elif not res.tzname and not res.tzoffset:
1201            # i.e. no timezone information was found.
1202            aware = naive
1203
1204        elif res.tzname:
1205            # tz-like string was parsed but we don't know what to do
1206            # with it
1207            warnings.warn("tzname {tzname} identified but not understood.  "
1208                          "Pass `tzinfos` argument in order to correctly "
1209                          "return a timezone-aware datetime.  In a future "
1210                          "version, this will raise an "
1211                          "exception.".format(tzname=res.tzname),
1212                          category=UnknownTimezoneWarning)
1213            aware = naive
1214
1215        return aware
1216
1217    def _build_naive(self, res, default):
1218        repl = {}
1219        for attr in ("year", "month", "day", "hour",
1220                     "minute", "second", "microsecond"):
1221            value = getattr(res, attr)
1222            if value is not None:
1223                repl[attr] = value
1224
1225        if 'day' not in repl:
1226            # If the default day exceeds the last day of the month, fall back
1227            # to the end of the month.
1228            cyear = default.year if res.year is None else res.year
1229            cmonth = default.month if res.month is None else res.month
1230            cday = default.day if res.day is None else res.day
1231
1232            if cday > monthrange(cyear, cmonth)[1]:
1233                repl['day'] = monthrange(cyear, cmonth)[1]
1234
1235        naive = default.replace(**repl)
1236
1237        if res.weekday is not None and not res.day:
1238            naive = naive + relativedelta.relativedelta(weekday=res.weekday)
1239
1240        return naive
1241
1242    def _assign_tzname(self, dt, tzname):
1243        if dt.tzname() != tzname:
1244            new_dt = tz.enfold(dt, fold=1)
1245            if new_dt.tzname() == tzname:
1246                return new_dt
1247
1248        return dt
1249
1250    def _recombine_skipped(self, tokens, skipped_idxs):
1251        """
1252        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
1253        >>> skipped_idxs = [0, 1, 2, 5]
1254        >>> _recombine_skipped(tokens, skipped_idxs)
1255        ["foo bar", "baz"]
1256        """
1257        skipped_tokens = []
1258        for i, idx in enumerate(sorted(skipped_idxs)):
1259            if i > 0 and idx - 1 == skipped_idxs[i - 1]:
1260                skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
1261            else:
1262                skipped_tokens.append(tokens[idx])
1263
1264        return skipped_tokens
1265
1266
1267DEFAULTPARSER = parser()
1268
1269
1270def parse(timestr, parserinfo=None, **kwargs):
1271    """
1272
1273    Parse a string in one of the supported formats, using the
1274    ``parserinfo`` parameters.
1275
1276    :param timestr:
1277        A string containing a date/time stamp.
1278
1279    :param parserinfo:
1280        A :class:`parserinfo` object containing parameters for the parser.
1281        If ``None``, the default arguments to the :class:`parserinfo`
1282        constructor are used.
1283
1284    The ``**kwargs`` parameter takes the following keyword arguments:
1285
1286    :param default:
1287        The default datetime object, if this is a datetime object and not
1288        ``None``, elements specified in ``timestr`` replace elements in the
1289        default object.
1290
1291    :param ignoretz:
1292        If set ``True``, time zones in parsed strings are ignored and a naive
1293        :class:`datetime` object is returned.
1294
1295    :param tzinfos:
1296        Additional time zone names / aliases which may be present in the
1297        string. This argument maps time zone names (and optionally offsets
1298        from those time zones) to time zones. This parameter can be a
1299        dictionary with timezone aliases mapping time zone names to time
1300        zones or a function taking two parameters (``tzname`` and
1301        ``tzoffset``) and returning a time zone.
1302
1303        The timezones to which the names are mapped can be an integer
1304        offset from UTC in seconds or a :class:`tzinfo` object.
1305
1306        .. doctest::
1307           :options: +NORMALIZE_WHITESPACE
1308
1309            >>> from dateutil.parser import parse
1310            >>> from dateutil.tz import gettz
1311            >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
1312            >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
1313            datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
1314            >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
1315            datetime.datetime(2012, 1, 19, 17, 21,
1316                              tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
1317
1318        This parameter is ignored if ``ignoretz`` is set.
1319
1320    :param dayfirst:
1321        Whether to interpret the first value in an ambiguous 3-integer date
1322        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
1323        ``yearfirst`` is set to ``True``, this distinguishes between YDM and
1324        YMD. If set to ``None``, this value is retrieved from the current
1325        :class:`parserinfo` object (which itself defaults to ``False``).
1326
1327    :param yearfirst:
1328        Whether to interpret the first value in an ambiguous 3-integer date
1329        (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
1330        be the year, otherwise the last number is taken to be the year. If
1331        this is set to ``None``, the value is retrieved from the current
1332        :class:`parserinfo` object (which itself defaults to ``False``).
1333
1334    :param fuzzy:
1335        Whether to allow fuzzy parsing, allowing for string like "Today is
1336        January 1, 2047 at 8:21:00AM".
1337
1338    :param fuzzy_with_tokens:
1339        If ``True``, ``fuzzy`` is automatically set to True, and the parser
1340        will return a tuple where the first element is the parsed
1341        :class:`datetime.datetime` datetimestamp and the second element is
1342        a tuple containing the portions of the string which were ignored:
1343
1344        .. doctest::
1345
1346            >>> from dateutil.parser import parse
1347            >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
1348            (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
1349
1350    :return:
1351        Returns a :class:`datetime.datetime` object or, if the
1352        ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
1353        first element being a :class:`datetime.datetime` object, the second
1354        a tuple containing the fuzzy tokens.
1355
1356    :raises ParserError:
1357        Raised for invalid or unknown string formats, if the provided
1358        :class:`tzinfo` is not in a valid format, or if an invalid date would
1359        be created.
1360
1361    :raises OverflowError:
1362        Raised if the parsed date exceeds the largest valid C integer on
1363        your system.
1364    """
1365    if parserinfo:
1366        return parser(parserinfo).parse(timestr, **kwargs)
1367    else:
1368        return DEFAULTPARSER.parse(timestr, **kwargs)
1369
1370
1371class _tzparser(object):
1372
1373    class _result(_resultbase):
1374
1375        __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
1376                     "start", "end"]
1377
1378        class _attr(_resultbase):
1379            __slots__ = ["month", "week", "weekday",
1380                         "yday", "jyday", "day", "time"]
1381
1382        def __repr__(self):
1383            return self._repr("")
1384
1385        def __init__(self):
1386            _resultbase.__init__(self)
1387            self.start = self._attr()
1388            self.end = self._attr()
1389
1390    def parse(self, tzstr):
1391        res = self._result()
1392        l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
1393        used_idxs = list()
1394        try:
1395
1396            len_l = len(l)
1397
1398            i = 0
1399            while i < len_l:
1400                # BRST+3[BRDT[+2]]
1401                j = i
1402                while j < len_l and not [x for x in l[j]
1403                                         if x in "0123456789:,-+"]:
1404                    j += 1
1405                if j != i:
1406                    if not res.stdabbr:
1407                        offattr = "stdoffset"
1408                        res.stdabbr = "".join(l[i:j])
1409                    else:
1410                        offattr = "dstoffset"
1411                        res.dstabbr = "".join(l[i:j])
1412
1413                    for ii in range(j):
1414                        used_idxs.append(ii)
1415                    i = j
1416                    if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
1417                                       "0123456789")):
1418                        if l[i] in ('+', '-'):
1419                            # Yes, that's right.  See the TZ variable
1420                            # documentation.
1421                            signal = (1, -1)[l[i] == '+']
1422                            used_idxs.append(i)
1423                            i += 1
1424                        else:
1425                            signal = -1
1426                        len_li = len(l[i])
1427                        if len_li == 4:
1428                            # -0300
1429                            setattr(res, offattr, (int(l[i][:2]) * 3600 +
1430                                                   int(l[i][2:]) * 60) * signal)
1431                        elif i + 1 < len_l and l[i + 1] == ':':
1432                            # -03:00
1433                            setattr(res, offattr,
1434                                    (int(l[i]) * 3600 +
1435                                     int(l[i + 2]) * 60) * signal)
1436                            used_idxs.append(i)
1437                            i += 2
1438                        elif len_li <= 2:
1439                            # -[0]3
1440                            setattr(res, offattr,
1441                                    int(l[i][:2]) * 3600 * signal)
1442                        else:
1443                            return None
1444                        used_idxs.append(i)
1445                        i += 1
1446                    if res.dstabbr:
1447                        break
1448                else:
1449                    break
1450
1451
1452            if i < len_l:
1453                for j in range(i, len_l):
1454                    if l[j] == ';':
1455                        l[j] = ','
1456
1457                assert l[i] == ','
1458
1459                i += 1
1460
1461            if i >= len_l:
1462                pass
1463            elif (8 <= l.count(',') <= 9 and
1464                  not [y for x in l[i:] if x != ','
1465                       for y in x if y not in "0123456789+-"]):
1466                # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
1467                for x in (res.start, res.end):
1468                    x.month = int(l[i])
1469                    used_idxs.append(i)
1470                    i += 2
1471                    if l[i] == '-':
1472                        value = int(l[i + 1]) * -1
1473                        used_idxs.append(i)
1474                        i += 1
1475                    else:
1476                        value = int(l[i])
1477                    used_idxs.append(i)
1478                    i += 2
1479                    if value:
1480                        x.week = value
1481                        x.weekday = (int(l[i]) - 1) % 7
1482                    else:
1483                        x.day = int(l[i])
1484                    used_idxs.append(i)
1485                    i += 2
1486                    x.time = int(l[i])
1487                    used_idxs.append(i)
1488                    i += 2
1489                if i < len_l:
1490                    if l[i] in ('-', '+'):
1491                        signal = (-1, 1)[l[i] == "+"]
1492                        used_idxs.append(i)
1493                        i += 1
1494                    else:
1495                        signal = 1
1496                    used_idxs.append(i)
1497                    res.dstoffset = (res.stdoffset + int(l[i]) * signal)
1498
1499                # This was a made-up format that is not in normal use
1500                warn(('Parsed time zone "%s"' % tzstr) +
1501                     'is in a non-standard dateutil-specific format, which ' +
1502                     'is now deprecated; support for parsing this format ' +
1503                     'will be removed in future versions. It is recommended ' +
1504                     'that you switch to a standard format like the GNU ' +
1505                     'TZ variable format.', tz.DeprecatedTzFormatWarning)
1506            elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
1507                  not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
1508                                                     '.', '-', ':')
1509                       for y in x if y not in "0123456789"]):
1510                for x in (res.start, res.end):
1511                    if l[i] == 'J':
1512                        # non-leap year day (1 based)
1513                        used_idxs.append(i)
1514                        i += 1
1515                        x.jyday = int(l[i])
1516                    elif l[i] == 'M':
1517                        # month[-.]week[-.]weekday
1518                        used_idxs.append(i)
1519                        i += 1
1520                        x.month = int(l[i])
1521                        used_idxs.append(i)
1522                        i += 1
1523                        assert l[i] in ('-', '.')
1524                        used_idxs.append(i)
1525                        i += 1
1526                        x.week = int(l[i])
1527                        if x.week == 5:
1528                            x.week = -1
1529                        used_idxs.append(i)
1530                        i += 1
1531                        assert l[i] in ('-', '.')
1532                        used_idxs.append(i)
1533                        i += 1
1534                        x.weekday = (int(l[i]) - 1) % 7
1535                    else:
1536                        # year day (zero based)
1537                        x.yday = int(l[i]) + 1
1538
1539                    used_idxs.append(i)
1540                    i += 1
1541
1542                    if i < len_l and l[i] == '/':
1543                        used_idxs.append(i)
1544                        i += 1
1545                        # start time
1546                        len_li = len(l[i])
1547                        if len_li == 4:
1548                            # -0300
1549                            x.time = (int(l[i][:2]) * 3600 +
1550                                      int(l[i][2:]) * 60)
1551                        elif i + 1 < len_l and l[i + 1] == ':':
1552                            # -03:00
1553                            x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
1554                            used_idxs.append(i)
1555                            i += 2
1556                            if i + 1 < len_l and l[i + 1] == ':':
1557                                used_idxs.append(i)
1558                                i += 2
1559                                x.time += int(l[i])
1560                        elif len_li <= 2:
1561                            # -[0]3
1562                            x.time = (int(l[i][:2]) * 3600)
1563                        else:
1564                            return None
1565                        used_idxs.append(i)
1566                        i += 1
1567
1568                    assert i == len_l or l[i] == ','
1569
1570                    i += 1
1571
1572                assert i >= len_l
1573
1574        except (IndexError, ValueError, AssertionError):
1575            return None
1576
1577        unused_idxs = set(range(len_l)).difference(used_idxs)
1578        res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
1579        return res
1580
1581
1582DEFAULTTZPARSER = _tzparser()
1583
1584
1585def _parsetz(tzstr):
1586    return DEFAULTTZPARSER.parse(tzstr)
1587
1588
1589class ParserError(ValueError):
1590    """Exception subclass used for any failure to parse a datetime string.
1591
1592    This is a subclass of :py:exc:`ValueError`, and should be raised any time
1593    earlier versions of ``dateutil`` would have raised ``ValueError``.
1594
1595    .. versionadded:: 2.8.1
1596    """
1597    def __str__(self):
1598        try:
1599            return self.args[0] % self.args[1:]
1600        except (TypeError, IndexError):
1601            return super(ParserError, self).__str__()
1602
1603    def __repr__(self):
1604        args = ", ".join("'%s'" % arg for arg in self.args)
1605        return "%s(%s)" % (self.__class__.__name__, args)
1606
1607
1608class UnknownTimezoneWarning(RuntimeWarning):
1609    """Raised when the parser finds a timezone it cannot parse into a tzinfo.
1610
1611    .. versionadded:: 2.7.0
1612    """
1613# vim:ts=4:sw=4:et
1614