1# -*- coding: utf-8 -*-
2"""
3This module offers a generic date/time string parser which is able to parse
4most known formats to represent a date and/or time.
5
6This module attempts to be forgiving with regards to unlikely input formats,
7returning a datetime object even for dates which are ambiguous. If an element
8of a date/time stamp is omitted, the following rules are applied:
9
10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
11  on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
12  specified.
13- If a time zone is omitted, a timezone-naive datetime is returned.
14
15If any other elements are missing, they are taken from the
16:class:`datetime.datetime` object passed to the parameter ``default``. If this
17results in a day number exceeding the valid number of days per month, the
18value falls back to the end of the month.
19
20Additional resources about date/time string formats can be found below:
21
22- `A summary of the international standard date and time notation
23  <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
24- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
26- `CPAN ParseDate module
27  <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
28- `Java SimpleDateFormat Class
29  <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
30"""
31from __future__ import unicode_literals
32
33import datetime
34import re
35import string
36import time
37import warnings
38
39from calendar import monthrange
40from io import StringIO
41
42import six
43from six import integer_types, text_type
44
45from decimal import Decimal
46
47from warnings import warn
48
49from .. import relativedelta
50from .. import tz
51
52__all__ = ["parse", "parserinfo", "ParserError"]
53
54
55# TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
56# making public and/or figuring out if there is something we can
57# take off their plate.
58class _timelex(object):
59    # Fractional seconds are sometimes split by a comma
60    _split_decimal = re.compile("([.,])")
61
62    def __init__(self, instream):
63        if six.PY2:
64            # In Python 2, we can't duck type properly because unicode has
65            # a 'decode' function, and we'd be double-decoding
66            if isinstance(instream, (bytes, bytearray)):
67                instream = instream.decode()
68        else:
69            if getattr(instream, 'decode', None) is not None:
70                instream = instream.decode()
71
72        if isinstance(instream, text_type):
73            instream = StringIO(instream)
74        elif getattr(instream, 'read', None) is None:
75            raise TypeError('Parser must be a string or character stream, not '
76                            '{itype}'.format(itype=instream.__class__.__name__))
77
78        self.instream = instream
79        self.charstack = []
80        self.tokenstack = []
81        self.eof = False
82
83    def get_token(self):
84        """
85        This function breaks the time string into lexical units (tokens), which
86        can be parsed by the parser. Lexical units are demarcated by changes in
87        the character set, so any continuous string of letters is considered
88        one unit, any continuous string of numbers is considered one unit.
89
90        The main complication arises from the fact that dots ('.') can be used
91        both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
92        "4:30:21.447"). As such, it is necessary to read the full context of
93        any dot-separated strings before breaking it into tokens; as such, this
94        function maintains a "token stack", for when the ambiguous context
95        demands that multiple tokens be parsed at once.
96        """
97        if self.tokenstack:
98            return self.tokenstack.pop(0)
99
100        seenletters = False
101        token = None
102        state = None
103
104        while not self.eof:
105            # We only realize that we've reached the end of a token when we
106            # find a character that's not part of the current token - since
107            # that character may be part of the next token, it's stored in the
108            # charstack.
109            if self.charstack:
110                nextchar = self.charstack.pop(0)
111            else:
112                nextchar = self.instream.read(1)
113                while nextchar == '\x00':
114                    nextchar = self.instream.read(1)
115
116            if not nextchar:
117                self.eof = True
118                break
119            elif not state:
120                # First character of the token - determines if we're starting
121                # to parse a word, a number or something else.
122                token = nextchar
123                if self.isword(nextchar):
124                    state = 'a'
125                elif self.isnum(nextchar):
126                    state = '0'
127                elif self.isspace(nextchar):
128                    token = ' '
129                    break  # emit token
130                else:
131                    break  # emit token
132            elif state == 'a':
133                # If we've already started reading a word, we keep reading
134                # letters until we find something that's not part of a word.
135                seenletters = True
136                if self.isword(nextchar):
137                    token += nextchar
138                elif nextchar == '.':
139                    token += nextchar
140                    state = 'a.'
141                else:
142                    self.charstack.append(nextchar)
143                    break  # emit token
144            elif state == '0':
145                # If we've already started reading a number, we keep reading
146                # numbers until we find something that doesn't fit.
147                if self.isnum(nextchar):
148                    token += nextchar
149                elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
150                    token += nextchar
151                    state = '0.'
152                else:
153                    self.charstack.append(nextchar)
154                    break  # emit token
155            elif state == 'a.':
156                # If we've seen some letters and a dot separator, continue
157                # parsing, and the tokens will be broken up later.
158                seenletters = True
159                if nextchar == '.' or self.isword(nextchar):
160                    token += nextchar
161                elif self.isnum(nextchar) and token[-1] == '.':
162                    token += nextchar
163                    state = '0.'
164                else:
165                    self.charstack.append(nextchar)
166                    break  # emit token
167            elif state == '0.':
168                # If we've seen at least one dot separator, keep going, we'll
169                # break up the tokens later.
170                if nextchar == '.' or self.isnum(nextchar):
171                    token += nextchar
172                elif self.isword(nextchar) and token[-1] == '.':
173                    token += nextchar
174                    state = 'a.'
175                else:
176                    self.charstack.append(nextchar)
177                    break  # emit token
178
179        if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
180                                       token[-1] in '.,')):
181            l = self._split_decimal.split(token)
182            token = l[0]
183            for tok in l[1:]:
184                if tok:
185                    self.tokenstack.append(tok)
186
187        if state == '0.' and token.count('.') == 0:
188            token = token.replace(',', '.')
189
190        return token
191
192    def __iter__(self):
193        return self
194
195    def __next__(self):
196        token = self.get_token()
197        if token is None:
198            raise StopIteration
199
200        return token
201
202    def next(self):
203        return self.__next__()  # Python 2.x support
204
205    @classmethod
206    def split(cls, s):
207        return list(cls(s))
208
209    @classmethod
210    def isword(cls, nextchar):
211        """ Whether or not the next character is part of a word """
212        return nextchar.isalpha()
213
214    @classmethod
215    def isnum(cls, nextchar):
216        """ Whether the next character is part of a number """
217        return nextchar.isdigit()
218
219    @classmethod
220    def isspace(cls, nextchar):
221        """ Whether the next character is whitespace """
222        return nextchar.isspace()
223
224
225class _resultbase(object):
226
227    def __init__(self):
228        for attr in self.__slots__:
229            setattr(self, attr, None)
230
231    def _repr(self, classname):
232        l = []
233        for attr in self.__slots__:
234            value = getattr(self, attr)
235            if value is not None:
236                l.append("%s=%s" % (attr, repr(value)))
237        return "%s(%s)" % (classname, ", ".join(l))
238
239    def __len__(self):
240        return (sum(getattr(self, attr) is not None
241                    for attr in self.__slots__))
242
243    def __repr__(self):
244        return self._repr(self.__class__.__name__)
245
246
247class parserinfo(object):
248    """
249    Class which handles what inputs are accepted. Subclass this to customize
250    the language and acceptable values for each parameter.
251
252    :param dayfirst:
253        Whether to interpret the first value in an ambiguous 3-integer date
254        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
255        ``yearfirst`` is set to ``True``, this distinguishes between YDM
256        and YMD. Default is ``False``.
257
258    :param yearfirst:
259        Whether to interpret the first value in an ambiguous 3-integer date
260        (e.g. 01/05/09) as the year. If ``True``, the first number is taken
261        to be the year, otherwise the last number is taken to be the year.
262        Default is ``False``.
263    """
264
265    # m from a.m/p.m, t from ISO T separator
266    JUMP = [" ", ".", ",", ";", "-", "/", "'",
267            "at", "on", "and", "ad", "m", "t", "of",
268            "st", "nd", "rd", "th"]
269
270    WEEKDAYS = [("Mon", "Monday"),
271                ("Tue", "Tuesday"),     # TODO: "Tues"
272                ("Wed", "Wednesday"),
273                ("Thu", "Thursday"),    # TODO: "Thurs"
274                ("Fri", "Friday"),
275                ("Sat", "Saturday"),
276                ("Sun", "Sunday")]
277    MONTHS = [("Jan", "January"),
278              ("Feb", "February"),      # TODO: "Febr"
279              ("Mar", "March"),
280              ("Apr", "April"),
281              ("May", "May"),
282              ("Jun", "June"),
283              ("Jul", "July"),
284              ("Aug", "August"),
285              ("Sep", "Sept", "September"),
286              ("Oct", "October"),
287              ("Nov", "November"),
288              ("Dec", "December")]
289    HMS = [("h", "hour", "hours"),
290           ("m", "minute", "minutes"),
291           ("s", "second", "seconds")]
292    AMPM = [("am", "a"),
293            ("pm", "p")]
294    UTCZONE = ["UTC", "GMT", "Z", "z"]
295    PERTAIN = ["of"]
296    TZOFFSET = {}
297    # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
298    #              "Anno Domini", "Year of Our Lord"]
299
300    def __init__(self, dayfirst=False, yearfirst=False):
301        self._jump = self._convert(self.JUMP)
302        self._weekdays = self._convert(self.WEEKDAYS)
303        self._months = self._convert(self.MONTHS)
304        self._hms = self._convert(self.HMS)
305        self._ampm = self._convert(self.AMPM)
306        self._utczone = self._convert(self.UTCZONE)
307        self._pertain = self._convert(self.PERTAIN)
308
309        self.dayfirst = dayfirst
310        self.yearfirst = yearfirst
311
312        self._year = time.localtime().tm_year
313        self._century = self._year // 100 * 100
314
315    def _convert(self, lst):
316        dct = {}
317        for i, v in enumerate(lst):
318            if isinstance(v, tuple):
319                for v in v:
320                    dct[v.lower()] = i
321            else:
322                dct[v.lower()] = i
323        return dct
324
325    def jump(self, name):
326        return name.lower() in self._jump
327
328    def weekday(self, name):
329        try:
330            return self._weekdays[name.lower()]
331        except KeyError:
332            pass
333        return None
334
335    def month(self, name):
336        try:
337            return self._months[name.lower()] + 1
338        except KeyError:
339            pass
340        return None
341
342    def hms(self, name):
343        try:
344            return self._hms[name.lower()]
345        except KeyError:
346            return None
347
348    def ampm(self, name):
349        try:
350            return self._ampm[name.lower()]
351        except KeyError:
352            return None
353
354    def pertain(self, name):
355        return name.lower() in self._pertain
356
357    def utczone(self, name):
358        return name.lower() in self._utczone
359
360    def tzoffset(self, name):
361        if name in self._utczone:
362            return 0
363
364        return self.TZOFFSET.get(name)
365
366    def convertyear(self, year, century_specified=False):
367        """
368        Converts two-digit years to year within [-50, 49]
369        range of self._year (current local time)
370        """
371
372        # Function contract is that the year is always positive
373        assert year >= 0
374
375        if year < 100 and not century_specified:
376            # assume current century to start
377            year += self._century
378
379            if year >= self._year + 50:  # if too far in future
380                year -= 100
381            elif year < self._year - 50:  # if too far in past
382                year += 100
383
384        return year
385
386    def validate(self, res):
387        # move to info
388        if res.year is not None:
389            res.year = self.convertyear(res.year, res.century_specified)
390
391        if ((res.tzoffset == 0 and not res.tzname) or
392             (res.tzname == 'Z' or res.tzname == 'z')):
393            res.tzname = "UTC"
394            res.tzoffset = 0
395        elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
396            res.tzoffset = 0
397        return True
398
399
400class _ymd(list):
401    def __init__(self, *args, **kwargs):
402        super(self.__class__, self).__init__(*args, **kwargs)
403        self.century_specified = False
404        self.dstridx = None
405        self.mstridx = None
406        self.ystridx = None
407
408    @property
409    def has_year(self):
410        return self.ystridx is not None
411
412    @property
413    def has_month(self):
414        return self.mstridx is not None
415
416    @property
417    def has_day(self):
418        return self.dstridx is not None
419
420    def could_be_day(self, value):
421        if self.has_day:
422            return False
423        elif not self.has_month:
424            return 1 <= value <= 31
425        elif not self.has_year:
426            # Be permissive, assume leap year
427            month = self[self.mstridx]
428            return 1 <= value <= monthrange(2000, month)[1]
429        else:
430            month = self[self.mstridx]
431            year = self[self.ystridx]
432            return 1 <= value <= monthrange(year, month)[1]
433
434    def append(self, val, label=None):
435        if hasattr(val, '__len__'):
436            if val.isdigit() and len(val) > 2:
437                self.century_specified = True
438                if label not in [None, 'Y']:  # pragma: no cover
439                    raise ValueError(label)
440                label = 'Y'
441        elif val > 100:
442            self.century_specified = True
443            if label not in [None, 'Y']:  # pragma: no cover
444                raise ValueError(label)
445            label = 'Y'
446
447        super(self.__class__, self).append(int(val))
448
449        if label == 'M':
450            if self.has_month:
451                raise ValueError('Month is already set')
452            self.mstridx = len(self) - 1
453        elif label == 'D':
454            if self.has_day:
455                raise ValueError('Day is already set')
456            self.dstridx = len(self) - 1
457        elif label == 'Y':
458            if self.has_year:
459                raise ValueError('Year is already set')
460            self.ystridx = len(self) - 1
461
462    def _resolve_from_stridxs(self, strids):
463        """
464        Try to resolve the identities of year/month/day elements using
465        ystridx, mstridx, and dstridx, if enough of these are specified.
466        """
467        if len(self) == 3 and len(strids) == 2:
468            # we can back out the remaining stridx value
469            missing = [x for x in range(3) if x not in strids.values()]
470            key = [x for x in ['y', 'm', 'd'] if x not in strids]
471            assert len(missing) == len(key) == 1
472            key = key[0]
473            val = missing[0]
474            strids[key] = val
475
476        assert len(self) == len(strids)  # otherwise this should not be called
477        out = {key: self[strids[key]] for key in strids}
478        return (out.get('y'), out.get('m'), out.get('d'))
479
480    def resolve_ymd(self, yearfirst, dayfirst):
481        len_ymd = len(self)
482        year, month, day = (None, None, None)
483
484        strids = (('y', self.ystridx),
485                  ('m', self.mstridx),
486                  ('d', self.dstridx))
487
488        strids = {key: val for key, val in strids if val is not None}
489        if (len(self) == len(strids) > 0 or
490                (len(self) == 3 and len(strids) == 2)):
491            return self._resolve_from_stridxs(strids)
492
493        mstridx = self.mstridx
494
495        if len_ymd > 3:
496            raise ValueError("More than three YMD values")
497        elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
498            # One member, or two members with a month string
499            if mstridx is not None:
500                month = self[mstridx]
501                # since mstridx is 0 or 1, self[mstridx-1] always
502                # looks up the other element
503                other = self[mstridx - 1]
504            else:
505                other = self[0]
506
507            if len_ymd > 1 or mstridx is None:
508                if other > 31:
509                    year = other
510                else:
511                    day = other
512
513        elif len_ymd == 2:
514            # Two members with numbers
515            if self[0] > 31:
516                # 99-01
517                year, month = self
518            elif self[1] > 31:
519                # 01-99
520                month, year = self
521            elif dayfirst and self[1] <= 12:
522                # 13-01
523                day, month = self
524            else:
525                # 01-13
526                month, day = self
527
528        elif len_ymd == 3:
529            # Three members
530            if mstridx == 0:
531                if self[1] > 31:
532                    # Apr-2003-25
533                    month, year, day = self
534                else:
535                    month, day, year = self
536            elif mstridx == 1:
537                if self[0] > 31 or (yearfirst and self[2] <= 31):
538                    # 99-Jan-01
539                    year, month, day = self
540                else:
541                    # 01-Jan-01
542                    # Give precedence to day-first, since
543                    # two-digit years is usually hand-written.
544                    day, month, year = self
545
546            elif mstridx == 2:
547                # WTF!?
548                if self[1] > 31:
549                    # 01-99-Jan
550                    day, year, month = self
551                else:
552                    # 99-01-Jan
553                    year, day, month = self
554
555            else:
556                if (self[0] > 31 or
557                    self.ystridx == 0 or
558                        (yearfirst and self[1] <= 12 and self[2] <= 31)):
559                    # 99-01-01
560                    if dayfirst and self[2] <= 12:
561                        year, day, month = self
562                    else:
563                        year, month, day = self
564                elif self[0] > 12 or (dayfirst and self[1] <= 12):
565                    # 13-01-01
566                    day, month, year = self
567                else:
568                    # 01-13-01
569                    month, day, year = self
570
571        return year, month, day
572
573
574class parser(object):
575    def __init__(self, info=None):
576        self.info = info or parserinfo()
577
578    def parse(self, timestr, default=None,
579              ignoretz=False, tzinfos=None, **kwargs):
580        """
581        Parse the date/time string into a :class:`datetime.datetime` object.
582
583        :param timestr:
584            Any date/time string using the supported formats.
585
586        :param default:
587            The default datetime object, if this is a datetime object and not
588            ``None``, elements specified in ``timestr`` replace elements in the
589            default object.
590
591        :param ignoretz:
592            If set ``True``, time zones in parsed strings are ignored and a
593            naive :class:`datetime.datetime` object is returned.
594
595        :param tzinfos:
596            Additional time zone names / aliases which may be present in the
597            string. This argument maps time zone names (and optionally offsets
598            from those time zones) to time zones. This parameter can be a
599            dictionary with timezone aliases mapping time zone names to time
600            zones or a function taking two parameters (``tzname`` and
601            ``tzoffset``) and returning a time zone.
602
603            The timezones to which the names are mapped can be an integer
604            offset from UTC in seconds or a :class:`tzinfo` object.
605
606            .. doctest::
607               :options: +NORMALIZE_WHITESPACE
608
609                >>> from dateutil.parser import parse
610                >>> from dateutil.tz import gettz
611                >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
612                >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
613                datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
614                >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
615                datetime.datetime(2012, 1, 19, 17, 21,
616                                  tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
617
618            This parameter is ignored if ``ignoretz`` is set.
619
620        :param \\*\\*kwargs:
621            Keyword arguments as passed to ``_parse()``.
622
623        :return:
624            Returns a :class:`datetime.datetime` object or, if the
625            ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
626            first element being a :class:`datetime.datetime` object, the second
627            a tuple containing the fuzzy tokens.
628
629        :raises ParserError:
630            Raised for invalid or unknown string format, if the provided
631            :class:`tzinfo` is not in a valid format, or if an invalid date
632            would be created.
633
634        :raises TypeError:
635            Raised for non-string or character stream input.
636
637        :raises OverflowError:
638            Raised if the parsed date exceeds the largest valid C integer on
639            your system.
640        """
641
642        if default is None:
643            default = datetime.datetime.now().replace(hour=0, minute=0,
644                                                      second=0, microsecond=0)
645
646        res, skipped_tokens = self._parse(timestr, **kwargs)
647
648        if res is None:
649            raise ParserError("Unknown string format: %s", timestr)
650
651        if len(res) == 0:
652            raise ParserError("String does not contain a date: %s", timestr)
653
654        try:
655            ret = self._build_naive(res, default)
656        except ValueError as e:
657            six.raise_from(ParserError(e.args[0] + ": %s", timestr), e)
658
659        if not ignoretz:
660            ret = self._build_tzaware(ret, res, tzinfos)
661
662        if kwargs.get('fuzzy_with_tokens', False):
663            return ret, skipped_tokens
664        else:
665            return ret
666
667    class _result(_resultbase):
668        __slots__ = ["year", "month", "day", "weekday",
669                     "hour", "minute", "second", "microsecond",
670                     "tzname", "tzoffset", "ampm","any_unused_tokens"]
671
672    def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
673               fuzzy_with_tokens=False):
674        """
675        Private method which performs the heavy lifting of parsing, called from
676        ``parse()``, which passes on its ``kwargs`` to this function.
677
678        :param timestr:
679            The string to parse.
680
681        :param dayfirst:
682            Whether to interpret the first value in an ambiguous 3-integer date
683            (e.g. 01/05/09) as the day (``True``) or month (``False``). If
684            ``yearfirst`` is set to ``True``, this distinguishes between YDM
685            and YMD. If set to ``None``, this value is retrieved from the
686            current :class:`parserinfo` object (which itself defaults to
687            ``False``).
688
689        :param yearfirst:
690            Whether to interpret the first value in an ambiguous 3-integer date
691            (e.g. 01/05/09) as the year. If ``True``, the first number is taken
692            to be the year, otherwise the last number is taken to be the year.
693            If this is set to ``None``, the value is retrieved from the current
694            :class:`parserinfo` object (which itself defaults to ``False``).
695
696        :param fuzzy:
697            Whether to allow fuzzy parsing, allowing for string like "Today is
698            January 1, 2047 at 8:21:00AM".
699
700        :param fuzzy_with_tokens:
701            If ``True``, ``fuzzy`` is automatically set to True, and the parser
702            will return a tuple where the first element is the parsed
703            :class:`datetime.datetime` datetimestamp and the second element is
704            a tuple containing the portions of the string which were ignored:
705
706            .. doctest::
707
708                >>> from dateutil.parser import parse
709                >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
710                (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
711
712        """
713        if fuzzy_with_tokens:
714            fuzzy = True
715
716        info = self.info
717
718        if dayfirst is None:
719            dayfirst = info.dayfirst
720
721        if yearfirst is None:
722            yearfirst = info.yearfirst
723
724        res = self._result()
725        l = _timelex.split(timestr)         # Splits the timestr into tokens
726
727        skipped_idxs = []
728
729        # year/month/day list
730        ymd = _ymd()
731
732        len_l = len(l)
733        i = 0
734        try:
735            while i < len_l:
736
737                # Check if it's a number
738                value_repr = l[i]
739                try:
740                    value = float(value_repr)
741                except ValueError:
742                    value = None
743
744                if value is not None:
745                    # Numeric token
746                    i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
747
748                # Check weekday
749                elif info.weekday(l[i]) is not None:
750                    value = info.weekday(l[i])
751                    res.weekday = value
752
753                # Check month name
754                elif info.month(l[i]) is not None:
755                    value = info.month(l[i])
756                    ymd.append(value, 'M')
757
758                    if i + 1 < len_l:
759                        if l[i + 1] in ('-', '/'):
760                            # Jan-01[-99]
761                            sep = l[i + 1]
762                            ymd.append(l[i + 2])
763
764                            if i + 3 < len_l and l[i + 3] == sep:
765                                # Jan-01-99
766                                ymd.append(l[i + 4])
767                                i += 2
768
769                            i += 2
770
771                        elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
772                              info.pertain(l[i + 2])):
773                            # Jan of 01
774                            # In this case, 01 is clearly year
775                            if l[i + 4].isdigit():
776                                # Convert it here to become unambiguous
777                                value = int(l[i + 4])
778                                year = str(info.convertyear(value))
779                                ymd.append(year, 'Y')
780                            else:
781                                # Wrong guess
782                                pass
783                                # TODO: not hit in tests
784                            i += 4
785
786                # Check am/pm
787                elif info.ampm(l[i]) is not None:
788                    value = info.ampm(l[i])
789                    val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
790
791                    if val_is_ampm:
792                        res.hour = self._adjust_ampm(res.hour, value)
793                        res.ampm = value
794
795                    elif fuzzy:
796                        skipped_idxs.append(i)
797
798                # Check for a timezone name
799                elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
800                    res.tzname = l[i]
801                    res.tzoffset = info.tzoffset(res.tzname)
802
803                    # Check for something like GMT+3, or BRST+3. Notice
804                    # that it doesn't mean "I am 3 hours after GMT", but
805                    # "my time +3 is GMT". If found, we reverse the
806                    # logic so that timezone parsing code will get it
807                    # right.
808                    if i + 1 < len_l and l[i + 1] in ('+', '-'):
809                        l[i + 1] = ('+', '-')[l[i + 1] == '+']
810                        res.tzoffset = None
811                        if info.utczone(res.tzname):
812                            # With something like GMT+3, the timezone
813                            # is *not* GMT.
814                            res.tzname = None
815
816                # Check for a numbered timezone
817                elif res.hour is not None and l[i] in ('+', '-'):
818                    signal = (-1, 1)[l[i] == '+']
819                    len_li = len(l[i + 1])
820
821                    # TODO: check that l[i + 1] is integer?
822                    if len_li == 4:
823                        # -0300
824                        hour_offset = int(l[i + 1][:2])
825                        min_offset = int(l[i + 1][2:])
826                    elif i + 2 < len_l and l[i + 2] == ':':
827                        # -03:00
828                        hour_offset = int(l[i + 1])
829                        min_offset = int(l[i + 3])  # TODO: Check that l[i+3] is minute-like?
830                        i += 2
831                    elif len_li <= 2:
832                        # -[0]3
833                        hour_offset = int(l[i + 1][:2])
834                        min_offset = 0
835                    else:
836                        raise ValueError(timestr)
837
838                    res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
839
840                    # Look for a timezone name between parenthesis
841                    if (i + 5 < len_l and
842                            info.jump(l[i + 2]) and l[i + 3] == '(' and
843                            l[i + 5] == ')' and
844                            3 <= len(l[i + 4]) and
845                            self._could_be_tzname(res.hour, res.tzname,
846                                                  None, l[i + 4])):
847                        # -0300 (BRST)
848                        res.tzname = l[i + 4]
849                        i += 4
850
851                    i += 1
852
853                # Check jumps
854                elif not (info.jump(l[i]) or fuzzy):
855                    raise ValueError(timestr)
856
857                else:
858                    skipped_idxs.append(i)
859                i += 1
860
861            # Process year/month/day
862            year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
863
864            res.century_specified = ymd.century_specified
865            res.year = year
866            res.month = month
867            res.day = day
868
869        except (IndexError, ValueError):
870            return None, None
871
872        if not info.validate(res):
873            return None, None
874
875        if fuzzy_with_tokens:
876            skipped_tokens = self._recombine_skipped(l, skipped_idxs)
877            return res, tuple(skipped_tokens)
878        else:
879            return res, None
880
881    def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
882        # Token is a number
883        value_repr = tokens[idx]
884        try:
885            value = self._to_decimal(value_repr)
886        except Exception as e:
887            six.raise_from(ValueError('Unknown numeric token'), e)
888
889        len_li = len(value_repr)
890
891        len_l = len(tokens)
892
893        if (len(ymd) == 3 and len_li in (2, 4) and
894            res.hour is None and
895            (idx + 1 >= len_l or
896             (tokens[idx + 1] != ':' and
897              info.hms(tokens[idx + 1]) is None))):
898            # 19990101T23[59]
899            s = tokens[idx]
900            res.hour = int(s[:2])
901
902            if len_li == 4:
903                res.minute = int(s[2:])
904
905        elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
906            # YYMMDD or HHMMSS[.ss]
907            s = tokens[idx]
908
909            if not ymd and '.' not in tokens[idx]:
910                ymd.append(s[:2])
911                ymd.append(s[2:4])
912                ymd.append(s[4:])
913            else:
914                # 19990101T235959[.59]
915
916                # TODO: Check if res attributes already set.
917                res.hour = int(s[:2])
918                res.minute = int(s[2:4])
919                res.second, res.microsecond = self._parsems(s[4:])
920
921        elif len_li in (8, 12, 14):
922            # YYYYMMDD
923            s = tokens[idx]
924            ymd.append(s[:4], 'Y')
925            ymd.append(s[4:6])
926            ymd.append(s[6:8])
927
928            if len_li > 8:
929                res.hour = int(s[8:10])
930                res.minute = int(s[10:12])
931
932                if len_li > 12:
933                    res.second = int(s[12:])
934
935        elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
936            # HH[ ]h or MM[ ]m or SS[.ss][ ]s
937            hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
938            (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
939            if hms is not None:
940                # TODO: checking that hour/minute/second are not
941                # already set?
942                self._assign_hms(res, value_repr, hms)
943
944        elif idx + 2 < len_l and tokens[idx + 1] == ':':
945            # HH:MM[:SS[.ss]]
946            res.hour = int(value)
947            value = self._to_decimal(tokens[idx + 2])  # TODO: try/except for this?
948            (res.minute, res.second) = self._parse_min_sec(value)
949
950            if idx + 4 < len_l and tokens[idx + 3] == ':':
951                res.second, res.microsecond = self._parsems(tokens[idx + 4])
952
953                idx += 2
954
955            idx += 2
956
957        elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
958            sep = tokens[idx + 1]
959            ymd.append(value_repr)
960
961            if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
962                if tokens[idx + 2].isdigit():
963                    # 01-01[-01]
964                    ymd.append(tokens[idx + 2])
965                else:
966                    # 01-Jan[-01]
967                    value = info.month(tokens[idx + 2])
968
969                    if value is not None:
970                        ymd.append(value, 'M')
971                    else:
972                        raise ValueError()
973
974                if idx + 3 < len_l and tokens[idx + 3] == sep:
975                    # We have three members
976                    value = info.month(tokens[idx + 4])
977
978                    if value is not None:
979                        ymd.append(value, 'M')
980                    else:
981                        ymd.append(tokens[idx + 4])
982                    idx += 2
983
984                idx += 1
985            idx += 1
986
987        elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
988            if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
989                # 12 am
990                hour = int(value)
991                res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
992                idx += 1
993            else:
994                # Year, month or day
995                ymd.append(value)
996            idx += 1
997
998        elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
999            # 12am
1000            hour = int(value)
1001            res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
1002            idx += 1
1003
1004        elif ymd.could_be_day(value):
1005            ymd.append(value)
1006
1007        elif not fuzzy:
1008            raise ValueError()
1009
1010        return idx
1011
1012    def _find_hms_idx(self, idx, tokens, info, allow_jump):
1013        len_l = len(tokens)
1014
1015        if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
1016            # There is an "h", "m", or "s" label following this token.  We take
1017            # assign the upcoming label to the current token.
1018            # e.g. the "12" in 12h"
1019            hms_idx = idx + 1
1020
1021        elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
1022              info.hms(tokens[idx+2]) is not None):
1023            # There is a space and then an "h", "m", or "s" label.
1024            # e.g. the "12" in "12 h"
1025            hms_idx = idx + 2
1026
1027        elif idx > 0 and info.hms(tokens[idx-1]) is not None:
1028            # There is a "h", "m", or "s" preceding this token.  Since neither
1029            # of the previous cases was hit, there is no label following this
1030            # token, so we use the previous label.
1031            # e.g. the "04" in "12h04"
1032            hms_idx = idx-1
1033
1034        elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
1035              info.hms(tokens[idx-2]) is not None):
1036            # If we are looking at the final token, we allow for a
1037            # backward-looking check to skip over a space.
1038            # TODO: Are we sure this is the right condition here?
1039            hms_idx = idx - 2
1040
1041        else:
1042            hms_idx = None
1043
1044        return hms_idx
1045
1046    def _assign_hms(self, res, value_repr, hms):
1047        # See GH issue #427, fixing float rounding
1048        value = self._to_decimal(value_repr)
1049
1050        if hms == 0:
1051            # Hour
1052            res.hour = int(value)
1053            if value % 1:
1054                res.minute = int(60*(value % 1))
1055
1056        elif hms == 1:
1057            (res.minute, res.second) = self._parse_min_sec(value)
1058
1059        elif hms == 2:
1060            (res.second, res.microsecond) = self._parsems(value_repr)
1061
1062    def _could_be_tzname(self, hour, tzname, tzoffset, token):
1063        return (hour is not None and
1064                tzname is None and
1065                tzoffset is None and
1066                len(token) <= 5 and
1067                (all(x in string.ascii_uppercase for x in token)
1068                 or token in self.info.UTCZONE))
1069
1070    def _ampm_valid(self, hour, ampm, fuzzy):
1071        """
1072        For fuzzy parsing, 'a' or 'am' (both valid English words)
1073        may erroneously trigger the AM/PM flag. Deal with that
1074        here.
1075        """
1076        val_is_ampm = True
1077
1078        # If there's already an AM/PM flag, this one isn't one.
1079        if fuzzy and ampm is not None:
1080            val_is_ampm = False
1081
1082        # If AM/PM is found and hour is not, raise a ValueError
1083        if hour is None:
1084            if fuzzy:
1085                val_is_ampm = False
1086            else:
1087                raise ValueError('No hour specified with AM or PM flag.')
1088        elif not 0 <= hour <= 12:
1089            # If AM/PM is found, it's a 12 hour clock, so raise
1090            # an error for invalid range
1091            if fuzzy:
1092                val_is_ampm = False
1093            else:
1094                raise ValueError('Invalid hour specified for 12-hour clock.')
1095
1096        return val_is_ampm
1097
1098    def _adjust_ampm(self, hour, ampm):
1099        if hour < 12 and ampm == 1:
1100            hour += 12
1101        elif hour == 12 and ampm == 0:
1102            hour = 0
1103        return hour
1104
1105    def _parse_min_sec(self, value):
1106        # TODO: Every usage of this function sets res.second to the return
1107        # value. Are there any cases where second will be returned as None and
1108        # we *don't* want to set res.second = None?
1109        minute = int(value)
1110        second = None
1111
1112        sec_remainder = value % 1
1113        if sec_remainder:
1114            second = int(60 * sec_remainder)
1115        return (minute, second)
1116
1117    def _parse_hms(self, idx, tokens, info, hms_idx):
1118        # TODO: Is this going to admit a lot of false-positives for when we
1119        # just happen to have digits and "h", "m" or "s" characters in non-date
1120        # text?  I guess hex hashes won't have that problem, but there's plenty
1121        # of random junk out there.
1122        if hms_idx is None:
1123            hms = None
1124            new_idx = idx
1125        elif hms_idx > idx:
1126            hms = info.hms(tokens[hms_idx])
1127            new_idx = hms_idx
1128        else:
1129            # Looking backwards, increment one.
1130            hms = info.hms(tokens[hms_idx]) + 1
1131            new_idx = idx
1132
1133        return (new_idx, hms)
1134
1135    # ------------------------------------------------------------------
1136    # Handling for individual tokens.  These are kept as methods instead
1137    #  of functions for the sake of customizability via subclassing.
1138
1139    def _parsems(self, value):
1140        """Parse a I[.F] seconds value into (seconds, microseconds)."""
1141        if "." not in value:
1142            return int(value), 0
1143        else:
1144            i, f = value.split(".")
1145            return int(i), int(f.ljust(6, "0")[:6])
1146
1147    def _to_decimal(self, val):
1148        try:
1149            decimal_value = Decimal(val)
1150            # See GH 662, edge case, infinite value should not be converted
1151            #  via `_to_decimal`
1152            if not decimal_value.is_finite():
1153                raise ValueError("Converted decimal value is infinite or NaN")
1154        except Exception as e:
1155            msg = "Could not convert %s to decimal" % val
1156            six.raise_from(ValueError(msg), e)
1157        else:
1158            return decimal_value
1159
1160    # ------------------------------------------------------------------
1161    # Post-Parsing construction of datetime output.  These are kept as
1162    #  methods instead of functions for the sake of customizability via
1163    #  subclassing.
1164
1165    def _build_tzinfo(self, tzinfos, tzname, tzoffset):
1166        if callable(tzinfos):
1167            tzdata = tzinfos(tzname, tzoffset)
1168        else:
1169            tzdata = tzinfos.get(tzname)
1170        # handle case where tzinfo is paased an options that returns None
1171        # eg tzinfos = {'BRST' : None}
1172        if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
1173            tzinfo = tzdata
1174        elif isinstance(tzdata, text_type):
1175            tzinfo = tz.tzstr(tzdata)
1176        elif isinstance(tzdata, integer_types):
1177            tzinfo = tz.tzoffset(tzname, tzdata)
1178        else:
1179            raise TypeError("Offset must be tzinfo subclass, tz string, "
1180                            "or int offset.")
1181        return tzinfo
1182
1183    def _build_tzaware(self, naive, res, tzinfos):
1184        if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
1185            tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
1186            aware = naive.replace(tzinfo=tzinfo)
1187            aware = self._assign_tzname(aware, res.tzname)
1188
1189        elif res.tzname and res.tzname in time.tzname:
1190            aware = naive.replace(tzinfo=tz.tzlocal())
1191
1192            # Handle ambiguous local datetime
1193            aware = self._assign_tzname(aware, res.tzname)
1194
1195            # This is mostly relevant for winter GMT zones parsed in the UK
1196            if (aware.tzname() != res.tzname and
1197                    res.tzname in self.info.UTCZONE):
1198                aware = aware.replace(tzinfo=tz.UTC)
1199
1200        elif res.tzoffset == 0:
1201            aware = naive.replace(tzinfo=tz.UTC)
1202
1203        elif res.tzoffset:
1204            aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
1205
1206        elif not res.tzname and not res.tzoffset:
1207            # i.e. no timezone information was found.
1208            aware = naive
1209
1210        elif res.tzname:
1211            # tz-like string was parsed but we don't know what to do
1212            # with it
1213            warnings.warn("tzname {tzname} identified but not understood.  "
1214                          "Pass `tzinfos` argument in order to correctly "
1215                          "return a timezone-aware datetime.  In a future "
1216                          "version, this will raise an "
1217                          "exception.".format(tzname=res.tzname),
1218                          category=UnknownTimezoneWarning)
1219            aware = naive
1220
1221        return aware
1222
1223    def _build_naive(self, res, default):
1224        repl = {}
1225        for attr in ("year", "month", "day", "hour",
1226                     "minute", "second", "microsecond"):
1227            value = getattr(res, attr)
1228            if value is not None:
1229                repl[attr] = value
1230
1231        if 'day' not in repl:
1232            # If the default day exceeds the last day of the month, fall back
1233            # to the end of the month.
1234            cyear = default.year if res.year is None else res.year
1235            cmonth = default.month if res.month is None else res.month
1236            cday = default.day if res.day is None else res.day
1237
1238            if cday > monthrange(cyear, cmonth)[1]:
1239                repl['day'] = monthrange(cyear, cmonth)[1]
1240
1241        naive = default.replace(**repl)
1242
1243        if res.weekday is not None and not res.day:
1244            naive = naive + relativedelta.relativedelta(weekday=res.weekday)
1245
1246        return naive
1247
1248    def _assign_tzname(self, dt, tzname):
1249        if dt.tzname() != tzname:
1250            new_dt = tz.enfold(dt, fold=1)
1251            if new_dt.tzname() == tzname:
1252                return new_dt
1253
1254        return dt
1255
1256    def _recombine_skipped(self, tokens, skipped_idxs):
1257        """
1258        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
1259        >>> skipped_idxs = [0, 1, 2, 5]
1260        >>> _recombine_skipped(tokens, skipped_idxs)
1261        ["foo bar", "baz"]
1262        """
1263        skipped_tokens = []
1264        for i, idx in enumerate(sorted(skipped_idxs)):
1265            if i > 0 and idx - 1 == skipped_idxs[i - 1]:
1266                skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
1267            else:
1268                skipped_tokens.append(tokens[idx])
1269
1270        return skipped_tokens
1271
1272
1273DEFAULTPARSER = parser()
1274
1275
1276def parse(timestr, parserinfo=None, **kwargs):
1277    """
1278
1279    Parse a string in one of the supported formats, using the
1280    ``parserinfo`` parameters.
1281
1282    :param timestr:
1283        A string containing a date/time stamp.
1284
1285    :param parserinfo:
1286        A :class:`parserinfo` object containing parameters for the parser.
1287        If ``None``, the default arguments to the :class:`parserinfo`
1288        constructor are used.
1289
1290    The ``**kwargs`` parameter takes the following keyword arguments:
1291
1292    :param default:
1293        The default datetime object, if this is a datetime object and not
1294        ``None``, elements specified in ``timestr`` replace elements in the
1295        default object.
1296
1297    :param ignoretz:
1298        If set ``True``, time zones in parsed strings are ignored and a naive
1299        :class:`datetime` object is returned.
1300
1301    :param tzinfos:
1302        Additional time zone names / aliases which may be present in the
1303        string. This argument maps time zone names (and optionally offsets
1304        from those time zones) to time zones. This parameter can be a
1305        dictionary with timezone aliases mapping time zone names to time
1306        zones or a function taking two parameters (``tzname`` and
1307        ``tzoffset``) and returning a time zone.
1308
1309        The timezones to which the names are mapped can be an integer
1310        offset from UTC in seconds or a :class:`tzinfo` object.
1311
1312        .. doctest::
1313           :options: +NORMALIZE_WHITESPACE
1314
1315            >>> from dateutil.parser import parse
1316            >>> from dateutil.tz import gettz
1317            >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
1318            >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
1319            datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
1320            >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
1321            datetime.datetime(2012, 1, 19, 17, 21,
1322                              tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
1323
1324        This parameter is ignored if ``ignoretz`` is set.
1325
1326    :param dayfirst:
1327        Whether to interpret the first value in an ambiguous 3-integer date
1328        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
1329        ``yearfirst`` is set to ``True``, this distinguishes between YDM and
1330        YMD. If set to ``None``, this value is retrieved from the current
1331        :class:`parserinfo` object (which itself defaults to ``False``).
1332
1333    :param yearfirst:
1334        Whether to interpret the first value in an ambiguous 3-integer date
1335        (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
1336        be the year, otherwise the last number is taken to be the year. If
1337        this is set to ``None``, the value is retrieved from the current
1338        :class:`parserinfo` object (which itself defaults to ``False``).
1339
1340    :param fuzzy:
1341        Whether to allow fuzzy parsing, allowing for string like "Today is
1342        January 1, 2047 at 8:21:00AM".
1343
1344    :param fuzzy_with_tokens:
1345        If ``True``, ``fuzzy`` is automatically set to True, and the parser
1346        will return a tuple where the first element is the parsed
1347        :class:`datetime.datetime` datetimestamp and the second element is
1348        a tuple containing the portions of the string which were ignored:
1349
1350        .. doctest::
1351
1352            >>> from dateutil.parser import parse
1353            >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
1354            (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
1355
1356    :return:
1357        Returns a :class:`datetime.datetime` object or, if the
1358        ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
1359        first element being a :class:`datetime.datetime` object, the second
1360        a tuple containing the fuzzy tokens.
1361
1362    :raises ValueError:
1363        Raised for invalid or unknown string format, if the provided
1364        :class:`tzinfo` is not in a valid format, or if an invalid date
1365        would be created.
1366
1367    :raises OverflowError:
1368        Raised if the parsed date exceeds the largest valid C integer on
1369        your system.
1370    """
1371    if parserinfo:
1372        return parser(parserinfo).parse(timestr, **kwargs)
1373    else:
1374        return DEFAULTPARSER.parse(timestr, **kwargs)
1375
1376
1377class _tzparser(object):
1378
1379    class _result(_resultbase):
1380
1381        __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
1382                     "start", "end"]
1383
1384        class _attr(_resultbase):
1385            __slots__ = ["month", "week", "weekday",
1386                         "yday", "jyday", "day", "time"]
1387
1388        def __repr__(self):
1389            return self._repr("")
1390
1391        def __init__(self):
1392            _resultbase.__init__(self)
1393            self.start = self._attr()
1394            self.end = self._attr()
1395
1396    def parse(self, tzstr):
1397        res = self._result()
1398        l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
1399        used_idxs = list()
1400        try:
1401
1402            len_l = len(l)
1403
1404            i = 0
1405            while i < len_l:
1406                # BRST+3[BRDT[+2]]
1407                j = i
1408                while j < len_l and not [x for x in l[j]
1409                                         if x in "0123456789:,-+"]:
1410                    j += 1
1411                if j != i:
1412                    if not res.stdabbr:
1413                        offattr = "stdoffset"
1414                        res.stdabbr = "".join(l[i:j])
1415                    else:
1416                        offattr = "dstoffset"
1417                        res.dstabbr = "".join(l[i:j])
1418
1419                    for ii in range(j):
1420                        used_idxs.append(ii)
1421                    i = j
1422                    if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
1423                                       "0123456789")):
1424                        if l[i] in ('+', '-'):
1425                            # Yes, that's right.  See the TZ variable
1426                            # documentation.
1427                            signal = (1, -1)[l[i] == '+']
1428                            used_idxs.append(i)
1429                            i += 1
1430                        else:
1431                            signal = -1
1432                        len_li = len(l[i])
1433                        if len_li == 4:
1434                            # -0300
1435                            setattr(res, offattr, (int(l[i][:2]) * 3600 +
1436                                                   int(l[i][2:]) * 60) * signal)
1437                        elif i + 1 < len_l and l[i + 1] == ':':
1438                            # -03:00
1439                            setattr(res, offattr,
1440                                    (int(l[i]) * 3600 +
1441                                     int(l[i + 2]) * 60) * signal)
1442                            used_idxs.append(i)
1443                            i += 2
1444                        elif len_li <= 2:
1445                            # -[0]3
1446                            setattr(res, offattr,
1447                                    int(l[i][:2]) * 3600 * signal)
1448                        else:
1449                            return None
1450                        used_idxs.append(i)
1451                        i += 1
1452                    if res.dstabbr:
1453                        break
1454                else:
1455                    break
1456
1457
1458            if i < len_l:
1459                for j in range(i, len_l):
1460                    if l[j] == ';':
1461                        l[j] = ','
1462
1463                assert l[i] == ','
1464
1465                i += 1
1466
1467            if i >= len_l:
1468                pass
1469            elif (8 <= l.count(',') <= 9 and
1470                  not [y for x in l[i:] if x != ','
1471                       for y in x if y not in "0123456789+-"]):
1472                # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
1473                for x in (res.start, res.end):
1474                    x.month = int(l[i])
1475                    used_idxs.append(i)
1476                    i += 2
1477                    if l[i] == '-':
1478                        value = int(l[i + 1]) * -1
1479                        used_idxs.append(i)
1480                        i += 1
1481                    else:
1482                        value = int(l[i])
1483                    used_idxs.append(i)
1484                    i += 2
1485                    if value:
1486                        x.week = value
1487                        x.weekday = (int(l[i]) - 1) % 7
1488                    else:
1489                        x.day = int(l[i])
1490                    used_idxs.append(i)
1491                    i += 2
1492                    x.time = int(l[i])
1493                    used_idxs.append(i)
1494                    i += 2
1495                if i < len_l:
1496                    if l[i] in ('-', '+'):
1497                        signal = (-1, 1)[l[i] == "+"]
1498                        used_idxs.append(i)
1499                        i += 1
1500                    else:
1501                        signal = 1
1502                    used_idxs.append(i)
1503                    res.dstoffset = (res.stdoffset + int(l[i]) * signal)
1504
1505                # This was a made-up format that is not in normal use
1506                warn(('Parsed time zone "%s"' % tzstr) +
1507                     'is in a non-standard dateutil-specific format, which ' +
1508                     'is now deprecated; support for parsing this format ' +
1509                     'will be removed in future versions. It is recommended ' +
1510                     'that you switch to a standard format like the GNU ' +
1511                     'TZ variable format.', tz.DeprecatedTzFormatWarning)
1512            elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
1513                  not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
1514                                                     '.', '-', ':')
1515                       for y in x if y not in "0123456789"]):
1516                for x in (res.start, res.end):
1517                    if l[i] == 'J':
1518                        # non-leap year day (1 based)
1519                        used_idxs.append(i)
1520                        i += 1
1521                        x.jyday = int(l[i])
1522                    elif l[i] == 'M':
1523                        # month[-.]week[-.]weekday
1524                        used_idxs.append(i)
1525                        i += 1
1526                        x.month = int(l[i])
1527                        used_idxs.append(i)
1528                        i += 1
1529                        assert l[i] in ('-', '.')
1530                        used_idxs.append(i)
1531                        i += 1
1532                        x.week = int(l[i])
1533                        if x.week == 5:
1534                            x.week = -1
1535                        used_idxs.append(i)
1536                        i += 1
1537                        assert l[i] in ('-', '.')
1538                        used_idxs.append(i)
1539                        i += 1
1540                        x.weekday = (int(l[i]) - 1) % 7
1541                    else:
1542                        # year day (zero based)
1543                        x.yday = int(l[i]) + 1
1544
1545                    used_idxs.append(i)
1546                    i += 1
1547
1548                    if i < len_l and l[i] == '/':
1549                        used_idxs.append(i)
1550                        i += 1
1551                        # start time
1552                        len_li = len(l[i])
1553                        if len_li == 4:
1554                            # -0300
1555                            x.time = (int(l[i][:2]) * 3600 +
1556                                      int(l[i][2:]) * 60)
1557                        elif i + 1 < len_l and l[i + 1] == ':':
1558                            # -03:00
1559                            x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
1560                            used_idxs.append(i)
1561                            i += 2
1562                            if i + 1 < len_l and l[i + 1] == ':':
1563                                used_idxs.append(i)
1564                                i += 2
1565                                x.time += int(l[i])
1566                        elif len_li <= 2:
1567                            # -[0]3
1568                            x.time = (int(l[i][:2]) * 3600)
1569                        else:
1570                            return None
1571                        used_idxs.append(i)
1572                        i += 1
1573
1574                    assert i == len_l or l[i] == ','
1575
1576                    i += 1
1577
1578                assert i >= len_l
1579
1580        except (IndexError, ValueError, AssertionError):
1581            return None
1582
1583        unused_idxs = set(range(len_l)).difference(used_idxs)
1584        res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
1585        return res
1586
1587
1588DEFAULTTZPARSER = _tzparser()
1589
1590
1591def _parsetz(tzstr):
1592    return DEFAULTTZPARSER.parse(tzstr)
1593
1594
1595class ParserError(ValueError):
1596    """Error class for representing failure to parse a datetime string."""
1597    def __str__(self):
1598        try:
1599            return self.args[0] % self.args[1:]
1600        except (TypeError, IndexError):
1601            return super(ParserError, self).__str__()
1602
1603        def __repr__(self):
1604            return "%s(%s)" % (self.__class__.__name__, str(self))
1605
1606
1607class UnknownTimezoneWarning(RuntimeWarning):
1608    """Raised when the parser finds a timezone it cannot parse into a tzinfo"""
1609# vim:ts=4:sw=4:et
1610