1# -*- coding: utf-8 -*-
2"""
3This module offers a generic date/time string parser which is able to parse
4most known formats to represent a date and/or time.
5
6This module attempts to be forgiving with regards to unlikely input formats,
7returning a datetime object even for dates which are ambiguous. If an element
8of a date/time stamp is omitted, the following rules are applied:
9
10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
11  on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
12  specified.
13- If a time zone is omitted, a timezone-naive datetime is returned.
14
15If any other elements are missing, they are taken from the
16:class:`datetime.datetime` object passed to the parameter ``default``. If this
17results in a day number exceeding the valid number of days per month, the
18value falls back to the end of the month.
19
20Additional resources about date/time string formats can be found below:
21
22- `A summary of the international standard date and time notation
23  <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
24- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
26- `CPAN ParseDate module
27  <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
28- `Java SimpleDateFormat Class
29  <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
30"""
31from __future__ import unicode_literals
32
33import datetime
34import re
35import string
36import time
37import warnings
38
39from calendar import monthrange
40from io import StringIO
41
42import six
43from six import binary_type, integer_types, text_type
44
45from decimal import Decimal
46
47from warnings import warn
48
49from .. import relativedelta
50from .. import tz
51
52__all__ = ["parse", "parserinfo"]
53
54
55# TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
56# making public and/or figuring out if there is something we can
57# take off their plate.
58class _timelex(object):
59    # Fractional seconds are sometimes split by a comma
60    _split_decimal = re.compile("([.,])")
61
62    def __init__(self, instream):
63        if six.PY2:
64            # In Python 2, we can't duck type properly because unicode has
65            # a 'decode' function, and we'd be double-decoding
66            if isinstance(instream, (binary_type, bytearray)):
67                instream = instream.decode()
68        else:
69            if getattr(instream, 'decode', None) is not None:
70                instream = instream.decode()
71
72        if isinstance(instream, text_type):
73            instream = StringIO(instream)
74        elif getattr(instream, 'read', None) is None:
75            raise TypeError('Parser must be a string or character stream, not '
76                            '{itype}'.format(itype=instream.__class__.__name__))
77
78        self.instream = instream
79        self.charstack = []
80        self.tokenstack = []
81        self.eof = False
82
83    def get_token(self):
84        """
85        This function breaks the time string into lexical units (tokens), which
86        can be parsed by the parser. Lexical units are demarcated by changes in
87        the character set, so any continuous string of letters is considered
88        one unit, any continuous string of numbers is considered one unit.
89
90        The main complication arises from the fact that dots ('.') can be used
91        both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
92        "4:30:21.447"). As such, it is necessary to read the full context of
93        any dot-separated strings before breaking it into tokens; as such, this
94        function maintains a "token stack", for when the ambiguous context
95        demands that multiple tokens be parsed at once.
96        """
97        if self.tokenstack:
98            return self.tokenstack.pop(0)
99
100        seenletters = False
101        token = None
102        state = None
103
104        while not self.eof:
105            # We only realize that we've reached the end of a token when we
106            # find a character that's not part of the current token - since
107            # that character may be part of the next token, it's stored in the
108            # charstack.
109            if self.charstack:
110                nextchar = self.charstack.pop(0)
111            else:
112                nextchar = self.instream.read(1)
113                while nextchar == '\x00':
114                    nextchar = self.instream.read(1)
115
116            if not nextchar:
117                self.eof = True
118                break
119            elif not state:
120                # First character of the token - determines if we're starting
121                # to parse a word, a number or something else.
122                token = nextchar
123                if self.isword(nextchar):
124                    state = 'a'
125                elif self.isnum(nextchar):
126                    state = '0'
127                elif self.isspace(nextchar):
128                    token = ' '
129                    break  # emit token
130                else:
131                    break  # emit token
132            elif state == 'a':
133                # If we've already started reading a word, we keep reading
134                # letters until we find something that's not part of a word.
135                seenletters = True
136                if self.isword(nextchar):
137                    token += nextchar
138                elif nextchar == '.':
139                    token += nextchar
140                    state = 'a.'
141                else:
142                    self.charstack.append(nextchar)
143                    break  # emit token
144            elif state == '0':
145                # If we've already started reading a number, we keep reading
146                # numbers until we find something that doesn't fit.
147                if self.isnum(nextchar):
148                    token += nextchar
149                elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
150                    token += nextchar
151                    state = '0.'
152                else:
153                    self.charstack.append(nextchar)
154                    break  # emit token
155            elif state == 'a.':
156                # If we've seen some letters and a dot separator, continue
157                # parsing, and the tokens will be broken up later.
158                seenletters = True
159                if nextchar == '.' or self.isword(nextchar):
160                    token += nextchar
161                elif self.isnum(nextchar) and token[-1] == '.':
162                    token += nextchar
163                    state = '0.'
164                else:
165                    self.charstack.append(nextchar)
166                    break  # emit token
167            elif state == '0.':
168                # If we've seen at least one dot separator, keep going, we'll
169                # break up the tokens later.
170                if nextchar == '.' or self.isnum(nextchar):
171                    token += nextchar
172                elif self.isword(nextchar) and token[-1] == '.':
173                    token += nextchar
174                    state = 'a.'
175                else:
176                    self.charstack.append(nextchar)
177                    break  # emit token
178
179        if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
180                                       token[-1] in '.,')):
181            l = self._split_decimal.split(token)
182            token = l[0]
183            for tok in l[1:]:
184                if tok:
185                    self.tokenstack.append(tok)
186
187        if state == '0.' and token.count('.') == 0:
188            token = token.replace(',', '.')
189
190        return token
191
192    def __iter__(self):
193        return self
194
195    def __next__(self):
196        token = self.get_token()
197        if token is None:
198            raise StopIteration
199
200        return token
201
202    def next(self):
203        return self.__next__()  # Python 2.x support
204
205    @classmethod
206    def split(cls, s):
207        return list(cls(s))
208
209    @classmethod
210    def isword(cls, nextchar):
211        """ Whether or not the next character is part of a word """
212        return nextchar.isalpha()
213
214    @classmethod
215    def isnum(cls, nextchar):
216        """ Whether the next character is part of a number """
217        return nextchar.isdigit()
218
219    @classmethod
220    def isspace(cls, nextchar):
221        """ Whether the next character is whitespace """
222        return nextchar.isspace()
223
224
225class _resultbase(object):
226
227    def __init__(self):
228        for attr in self.__slots__:
229            setattr(self, attr, None)
230
231    def _repr(self, classname):
232        l = []
233        for attr in self.__slots__:
234            value = getattr(self, attr)
235            if value is not None:
236                l.append("%s=%s" % (attr, repr(value)))
237        return "%s(%s)" % (classname, ", ".join(l))
238
239    def __len__(self):
240        return (sum(getattr(self, attr) is not None
241                    for attr in self.__slots__))
242
243    def __repr__(self):
244        return self._repr(self.__class__.__name__)
245
246
247class parserinfo(object):
248    """
249    Class which handles what inputs are accepted. Subclass this to customize
250    the language and acceptable values for each parameter.
251
252    :param dayfirst:
253        Whether to interpret the first value in an ambiguous 3-integer date
254        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
255        ``yearfirst`` is set to ``True``, this distinguishes between YDM
256        and YMD. Default is ``False``.
257
258    :param yearfirst:
259        Whether to interpret the first value in an ambiguous 3-integer date
260        (e.g. 01/05/09) as the year. If ``True``, the first number is taken
261        to be the year, otherwise the last number is taken to be the year.
262        Default is ``False``.
263    """
264
265    # m from a.m/p.m, t from ISO T separator
266    JUMP = [" ", ".", ",", ";", "-", "/", "'",
267            "at", "on", "and", "ad", "m", "t", "of",
268            "st", "nd", "rd", "th"]
269
270    WEEKDAYS = [("Mon", "Monday"),
271                ("Tue", "Tuesday"),     # TODO: "Tues"
272                ("Wed", "Wednesday"),
273                ("Thu", "Thursday"),    # TODO: "Thurs"
274                ("Fri", "Friday"),
275                ("Sat", "Saturday"),
276                ("Sun", "Sunday")]
277    MONTHS = [("Jan", "January"),
278              ("Feb", "February"),      # TODO: "Febr"
279              ("Mar", "March"),
280              ("Apr", "April"),
281              ("May", "May"),
282              ("Jun", "June"),
283              ("Jul", "July"),
284              ("Aug", "August"),
285              ("Sep", "Sept", "September"),
286              ("Oct", "October"),
287              ("Nov", "November"),
288              ("Dec", "December")]
289    HMS = [("h", "hour", "hours"),
290           ("m", "minute", "minutes"),
291           ("s", "second", "seconds")]
292    AMPM = [("am", "a"),
293            ("pm", "p")]
294    UTCZONE = ["UTC", "GMT", "Z"]
295    PERTAIN = ["of"]
296    TZOFFSET = {}
297    # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
298    #              "Anno Domini", "Year of Our Lord"]
299
300    def __init__(self, dayfirst=False, yearfirst=False):
301        self._jump = self._convert(self.JUMP)
302        self._weekdays = self._convert(self.WEEKDAYS)
303        self._months = self._convert(self.MONTHS)
304        self._hms = self._convert(self.HMS)
305        self._ampm = self._convert(self.AMPM)
306        self._utczone = self._convert(self.UTCZONE)
307        self._pertain = self._convert(self.PERTAIN)
308
309        self.dayfirst = dayfirst
310        self.yearfirst = yearfirst
311
312        self._year = time.localtime().tm_year
313        self._century = self._year // 100 * 100
314
315    def _convert(self, lst):
316        dct = {}
317        for i, v in enumerate(lst):
318            if isinstance(v, tuple):
319                for v in v:
320                    dct[v.lower()] = i
321            else:
322                dct[v.lower()] = i
323        return dct
324
325    def jump(self, name):
326        return name.lower() in self._jump
327
328    def weekday(self, name):
329        try:
330            return self._weekdays[name.lower()]
331        except KeyError:
332            pass
333        return None
334
335    def month(self, name):
336        try:
337            return self._months[name.lower()] + 1
338        except KeyError:
339            pass
340        return None
341
342    def hms(self, name):
343        try:
344            return self._hms[name.lower()]
345        except KeyError:
346            return None
347
348    def ampm(self, name):
349        try:
350            return self._ampm[name.lower()]
351        except KeyError:
352            return None
353
354    def pertain(self, name):
355        return name.lower() in self._pertain
356
357    def utczone(self, name):
358        return name.lower() in self._utczone
359
360    def tzoffset(self, name):
361        if name in self._utczone:
362            return 0
363
364        return self.TZOFFSET.get(name)
365
366    def convertyear(self, year, century_specified=False):
367        """
368        Converts two-digit years to year within [-50, 49]
369        range of self._year (current local time)
370        """
371
372        # Function contract is that the year is always positive
373        assert year >= 0
374
375        if year < 100 and not century_specified:
376            # assume current century to start
377            year += self._century
378
379            if year >= self._year + 50:  # if too far in future
380                year -= 100
381            elif year < self._year - 50:  # if too far in past
382                year += 100
383
384        return year
385
386    def validate(self, res):
387        # move to info
388        if res.year is not None:
389            res.year = self.convertyear(res.year, res.century_specified)
390
391        if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
392            res.tzname = "UTC"
393            res.tzoffset = 0
394        elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
395            res.tzoffset = 0
396        return True
397
398
399class _ymd(list):
400    def __init__(self, *args, **kwargs):
401        super(self.__class__, self).__init__(*args, **kwargs)
402        self.century_specified = False
403        self.dstridx = None
404        self.mstridx = None
405        self.ystridx = None
406
407    @property
408    def has_year(self):
409        return self.ystridx is not None
410
411    @property
412    def has_month(self):
413        return self.mstridx is not None
414
415    @property
416    def has_day(self):
417        return self.dstridx is not None
418
419    def could_be_day(self, value):
420        if self.has_day:
421            return False
422        elif not self.has_month:
423            return 1 <= value <= 31
424        elif not self.has_year:
425            # Be permissive, assume leapyear
426            month = self[self.mstridx]
427            return 1 <= value <= monthrange(2000, month)[1]
428        else:
429            month = self[self.mstridx]
430            year = self[self.ystridx]
431            return 1 <= value <= monthrange(year, month)[1]
432
433    def append(self, val, label=None):
434        if hasattr(val, '__len__'):
435            if val.isdigit() and len(val) > 2:
436                self.century_specified = True
437                if label not in [None, 'Y']:  # pragma: no cover
438                    raise ValueError(label)
439                label = 'Y'
440        elif val > 100:
441            self.century_specified = True
442            if label not in [None, 'Y']:  # pragma: no cover
443                raise ValueError(label)
444            label = 'Y'
445
446        super(self.__class__, self).append(int(val))
447
448        if label == 'M':
449            if self.has_month:
450                raise ValueError('Month is already set')
451            self.mstridx = len(self) - 1
452        elif label == 'D':
453            if self.has_day:
454                raise ValueError('Day is already set')
455            self.dstridx = len(self) - 1
456        elif label == 'Y':
457            if self.has_year:
458                raise ValueError('Year is already set')
459            self.ystridx = len(self) - 1
460
461    def _resolve_from_stridxs(self, strids):
462        """
463        Try to resolve the identities of year/month/day elements using
464        ystridx, mstridx, and dstridx, if enough of these are specified.
465        """
466        if len(self) == 3 and len(strids) == 2:
467            # we can back out the remaining stridx value
468            missing = [x for x in range(3) if x not in strids.values()]
469            key = [x for x in ['y', 'm', 'd'] if x not in strids]
470            assert len(missing) == len(key) == 1
471            key = key[0]
472            val = missing[0]
473            strids[key] = val
474
475        assert len(self) == len(strids)  # otherwise this should not be called
476        out = {key: self[strids[key]] for key in strids}
477        return (out.get('y'), out.get('m'), out.get('d'))
478
479    def resolve_ymd(self, yearfirst, dayfirst):
480        len_ymd = len(self)
481        year, month, day = (None, None, None)
482
483        strids = (('y', self.ystridx),
484                  ('m', self.mstridx),
485                  ('d', self.dstridx))
486
487        strids = {key: val for key, val in strids if val is not None}
488        if (len(self) == len(strids) > 0 or
489                (len(self) == 3 and len(strids) == 2)):
490            return self._resolve_from_stridxs(strids)
491
492        mstridx = self.mstridx
493
494        if len_ymd > 3:
495            raise ValueError("More than three YMD values")
496        elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
497            # One member, or two members with a month string
498            if mstridx is not None:
499                month = self[mstridx]
500                # since mstridx is 0 or 1, self[mstridx-1] always
501                # looks up the other element
502                other = self[mstridx - 1]
503            else:
504                other = self[0]
505
506            if len_ymd > 1 or mstridx is None:
507                if other > 31:
508                    year = other
509                else:
510                    day = other
511
512        elif len_ymd == 2:
513            # Two members with numbers
514            if self[0] > 31:
515                # 99-01
516                year, month = self
517            elif self[1] > 31:
518                # 01-99
519                month, year = self
520            elif dayfirst and self[1] <= 12:
521                # 13-01
522                day, month = self
523            else:
524                # 01-13
525                month, day = self
526
527        elif len_ymd == 3:
528            # Three members
529            if mstridx == 0:
530                if self[1] > 31:
531                    # Apr-2003-25
532                    month, year, day = self
533                else:
534                    month, day, year = self
535            elif mstridx == 1:
536                if self[0] > 31 or (yearfirst and self[2] <= 31):
537                    # 99-Jan-01
538                    year, month, day = self
539                else:
540                    # 01-Jan-01
541                    # Give precendence to day-first, since
542                    # two-digit years is usually hand-written.
543                    day, month, year = self
544
545            elif mstridx == 2:
546                # WTF!?
547                if self[1] > 31:
548                    # 01-99-Jan
549                    day, year, month = self
550                else:
551                    # 99-01-Jan
552                    year, day, month = self
553
554            else:
555                if (self[0] > 31 or
556                    self.ystridx == 0 or
557                        (yearfirst and self[1] <= 12 and self[2] <= 31)):
558                    # 99-01-01
559                    if dayfirst and self[2] <= 12:
560                        year, day, month = self
561                    else:
562                        year, month, day = self
563                elif self[0] > 12 or (dayfirst and self[1] <= 12):
564                    # 13-01-01
565                    day, month, year = self
566                else:
567                    # 01-13-01
568                    month, day, year = self
569
570        return year, month, day
571
572
573class parser(object):
574    def __init__(self, info=None):
575        self.info = info or parserinfo()
576
577    def parse(self, timestr, default=None,
578              ignoretz=False, tzinfos=None, **kwargs):
579        """
580        Parse the date/time string into a :class:`datetime.datetime` object.
581
582        :param timestr:
583            Any date/time string using the supported formats.
584
585        :param default:
586            The default datetime object, if this is a datetime object and not
587            ``None``, elements specified in ``timestr`` replace elements in the
588            default object.
589
590        :param ignoretz:
591            If set ``True``, time zones in parsed strings are ignored and a
592            naive :class:`datetime.datetime` object is returned.
593
594        :param tzinfos:
595            Additional time zone names / aliases which may be present in the
596            string. This argument maps time zone names (and optionally offsets
597            from those time zones) to time zones. This parameter can be a
598            dictionary with timezone aliases mapping time zone names to time
599            zones or a function taking two parameters (``tzname`` and
600            ``tzoffset``) and returning a time zone.
601
602            The timezones to which the names are mapped can be an integer
603            offset from UTC in seconds or a :class:`tzinfo` object.
604
605            .. doctest::
606               :options: +NORMALIZE_WHITESPACE
607
608                >>> from dateutil.parser import parse
609                >>> from dateutil.tz import gettz
610                >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
611                >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
612                datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
613                >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
614                datetime.datetime(2012, 1, 19, 17, 21,
615                                  tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
616
617            This parameter is ignored if ``ignoretz`` is set.
618
619        :param \\*\\*kwargs:
620            Keyword arguments as passed to ``_parse()``.
621
622        :return:
623            Returns a :class:`datetime.datetime` object or, if the
624            ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
625            first element being a :class:`datetime.datetime` object, the second
626            a tuple containing the fuzzy tokens.
627
628        :raises ValueError:
629            Raised for invalid or unknown string format, if the provided
630            :class:`tzinfo` is not in a valid format, or if an invalid date
631            would be created.
632
633        :raises TypeError:
634            Raised for non-string or character stream input.
635
636        :raises OverflowError:
637            Raised if the parsed date exceeds the largest valid C integer on
638            your system.
639        """
640
641        if default is None:
642            default = datetime.datetime.now().replace(hour=0, minute=0,
643                                                      second=0, microsecond=0)
644
645        res, skipped_tokens = self._parse(timestr, **kwargs)
646
647        if res is None:
648            raise ValueError("Unknown string format:", timestr)
649
650        if len(res) == 0:
651            raise ValueError("String does not contain a date:", timestr)
652
653        ret = self._build_naive(res, default)
654
655        if not ignoretz:
656            ret = self._build_tzaware(ret, res, tzinfos)
657
658        if kwargs.get('fuzzy_with_tokens', False):
659            return ret, skipped_tokens
660        else:
661            return ret
662
663    class _result(_resultbase):
664        __slots__ = ["year", "month", "day", "weekday",
665                     "hour", "minute", "second", "microsecond",
666                     "tzname", "tzoffset", "ampm","any_unused_tokens"]
667
668    def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
669               fuzzy_with_tokens=False):
670        """
671        Private method which performs the heavy lifting of parsing, called from
672        ``parse()``, which passes on its ``kwargs`` to this function.
673
674        :param timestr:
675            The string to parse.
676
677        :param dayfirst:
678            Whether to interpret the first value in an ambiguous 3-integer date
679            (e.g. 01/05/09) as the day (``True``) or month (``False``). If
680            ``yearfirst`` is set to ``True``, this distinguishes between YDM
681            and YMD. If set to ``None``, this value is retrieved from the
682            current :class:`parserinfo` object (which itself defaults to
683            ``False``).
684
685        :param yearfirst:
686            Whether to interpret the first value in an ambiguous 3-integer date
687            (e.g. 01/05/09) as the year. If ``True``, the first number is taken
688            to be the year, otherwise the last number is taken to be the year.
689            If this is set to ``None``, the value is retrieved from the current
690            :class:`parserinfo` object (which itself defaults to ``False``).
691
692        :param fuzzy:
693            Whether to allow fuzzy parsing, allowing for string like "Today is
694            January 1, 2047 at 8:21:00AM".
695
696        :param fuzzy_with_tokens:
697            If ``True``, ``fuzzy`` is automatically set to True, and the parser
698            will return a tuple where the first element is the parsed
699            :class:`datetime.datetime` datetimestamp and the second element is
700            a tuple containing the portions of the string which were ignored:
701
702            .. doctest::
703
704                >>> from dateutil.parser import parse
705                >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
706                (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
707
708        """
709        if fuzzy_with_tokens:
710            fuzzy = True
711
712        info = self.info
713
714        if dayfirst is None:
715            dayfirst = info.dayfirst
716
717        if yearfirst is None:
718            yearfirst = info.yearfirst
719
720        res = self._result()
721        l = _timelex.split(timestr)         # Splits the timestr into tokens
722
723        skipped_idxs = []
724
725        # year/month/day list
726        ymd = _ymd()
727
728        len_l = len(l)
729        i = 0
730        try:
731            while i < len_l:
732
733                # Check if it's a number
734                value_repr = l[i]
735                try:
736                    value = float(value_repr)
737                except ValueError:
738                    value = None
739
740                if value is not None:
741                    # Numeric token
742                    i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
743
744                # Check weekday
745                elif info.weekday(l[i]) is not None:
746                    value = info.weekday(l[i])
747                    res.weekday = value
748
749                # Check month name
750                elif info.month(l[i]) is not None:
751                    value = info.month(l[i])
752                    ymd.append(value, 'M')
753
754                    if i + 1 < len_l:
755                        if l[i + 1] in ('-', '/'):
756                            # Jan-01[-99]
757                            sep = l[i + 1]
758                            ymd.append(l[i + 2])
759
760                            if i + 3 < len_l and l[i + 3] == sep:
761                                # Jan-01-99
762                                ymd.append(l[i + 4])
763                                i += 2
764
765                            i += 2
766
767                        elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
768                              info.pertain(l[i + 2])):
769                            # Jan of 01
770                            # In this case, 01 is clearly year
771                            if l[i + 4].isdigit():
772                                # Convert it here to become unambiguous
773                                value = int(l[i + 4])
774                                year = str(info.convertyear(value))
775                                ymd.append(year, 'Y')
776                            else:
777                                # Wrong guess
778                                pass
779                                # TODO: not hit in tests
780                            i += 4
781
782                # Check am/pm
783                elif info.ampm(l[i]) is not None:
784                    value = info.ampm(l[i])
785                    val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
786
787                    if val_is_ampm:
788                        res.hour = self._adjust_ampm(res.hour, value)
789                        res.ampm = value
790
791                    elif fuzzy:
792                        skipped_idxs.append(i)
793
794                # Check for a timezone name
795                elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
796                    res.tzname = l[i]
797                    res.tzoffset = info.tzoffset(res.tzname)
798
799                    # Check for something like GMT+3, or BRST+3. Notice
800                    # that it doesn't mean "I am 3 hours after GMT", but
801                    # "my time +3 is GMT". If found, we reverse the
802                    # logic so that timezone parsing code will get it
803                    # right.
804                    if i + 1 < len_l and l[i + 1] in ('+', '-'):
805                        l[i + 1] = ('+', '-')[l[i + 1] == '+']
806                        res.tzoffset = None
807                        if info.utczone(res.tzname):
808                            # With something like GMT+3, the timezone
809                            # is *not* GMT.
810                            res.tzname = None
811
812                # Check for a numbered timezone
813                elif res.hour is not None and l[i] in ('+', '-'):
814                    signal = (-1, 1)[l[i] == '+']
815                    len_li = len(l[i + 1])
816
817                    # TODO: check that l[i + 1] is integer?
818                    if len_li == 4:
819                        # -0300
820                        hour_offset = int(l[i + 1][:2])
821                        min_offset = int(l[i + 1][2:])
822                    elif i + 2 < len_l and l[i + 2] == ':':
823                        # -03:00
824                        hour_offset = int(l[i + 1])
825                        min_offset = int(l[i + 3])  # TODO: Check that l[i+3] is minute-like?
826                        i += 2
827                    elif len_li <= 2:
828                        # -[0]3
829                        hour_offset = int(l[i + 1][:2])
830                        min_offset = 0
831                    else:
832                        raise ValueError(timestr)
833
834                    res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
835
836                    # Look for a timezone name between parenthesis
837                    if (i + 5 < len_l and
838                            info.jump(l[i + 2]) and l[i + 3] == '(' and
839                            l[i + 5] == ')' and
840                            3 <= len(l[i + 4]) and
841                            self._could_be_tzname(res.hour, res.tzname,
842                                                  None, l[i + 4])):
843                        # -0300 (BRST)
844                        res.tzname = l[i + 4]
845                        i += 4
846
847                    i += 1
848
849                # Check jumps
850                elif not (info.jump(l[i]) or fuzzy):
851                    raise ValueError(timestr)
852
853                else:
854                    skipped_idxs.append(i)
855                i += 1
856
857            # Process year/month/day
858            year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
859
860            res.century_specified = ymd.century_specified
861            res.year = year
862            res.month = month
863            res.day = day
864
865        except (IndexError, ValueError):
866            return None, None
867
868        if not info.validate(res):
869            return None, None
870
871        if fuzzy_with_tokens:
872            skipped_tokens = self._recombine_skipped(l, skipped_idxs)
873            return res, tuple(skipped_tokens)
874        else:
875            return res, None
876
877    def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
878        # Token is a number
879        value_repr = tokens[idx]
880        try:
881            value = self._to_decimal(value_repr)
882        except Exception as e:
883            six.raise_from(ValueError('Unknown numeric token'), e)
884
885        len_li = len(value_repr)
886
887        len_l = len(tokens)
888
889        if (len(ymd) == 3 and len_li in (2, 4) and
890            res.hour is None and
891            (idx + 1 >= len_l or
892             (tokens[idx + 1] != ':' and
893              info.hms(tokens[idx + 1]) is None))):
894            # 19990101T23[59]
895            s = tokens[idx]
896            res.hour = int(s[:2])
897
898            if len_li == 4:
899                res.minute = int(s[2:])
900
901        elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
902            # YYMMDD or HHMMSS[.ss]
903            s = tokens[idx]
904
905            if not ymd and '.' not in tokens[idx]:
906                ymd.append(s[:2])
907                ymd.append(s[2:4])
908                ymd.append(s[4:])
909            else:
910                # 19990101T235959[.59]
911
912                # TODO: Check if res attributes already set.
913                res.hour = int(s[:2])
914                res.minute = int(s[2:4])
915                res.second, res.microsecond = self._parsems(s[4:])
916
917        elif len_li in (8, 12, 14):
918            # YYYYMMDD
919            s = tokens[idx]
920            ymd.append(s[:4], 'Y')
921            ymd.append(s[4:6])
922            ymd.append(s[6:8])
923
924            if len_li > 8:
925                res.hour = int(s[8:10])
926                res.minute = int(s[10:12])
927
928                if len_li > 12:
929                    res.second = int(s[12:])
930
931        elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
932            # HH[ ]h or MM[ ]m or SS[.ss][ ]s
933            hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
934            (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
935            if hms is not None:
936                # TODO: checking that hour/minute/second are not
937                # already set?
938                self._assign_hms(res, value_repr, hms)
939
940        elif idx + 2 < len_l and tokens[idx + 1] == ':':
941            # HH:MM[:SS[.ss]]
942            res.hour = int(value)
943            value = self._to_decimal(tokens[idx + 2])  # TODO: try/except for this?
944            (res.minute, res.second) = self._parse_min_sec(value)
945
946            if idx + 4 < len_l and tokens[idx + 3] == ':':
947                res.second, res.microsecond = self._parsems(tokens[idx + 4])
948
949                idx += 2
950
951            idx += 2
952
953        elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
954            sep = tokens[idx + 1]
955            ymd.append(value_repr)
956
957            if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
958                if tokens[idx + 2].isdigit():
959                    # 01-01[-01]
960                    ymd.append(tokens[idx + 2])
961                else:
962                    # 01-Jan[-01]
963                    value = info.month(tokens[idx + 2])
964
965                    if value is not None:
966                        ymd.append(value, 'M')
967                    else:
968                        raise ValueError()
969
970                if idx + 3 < len_l and tokens[idx + 3] == sep:
971                    # We have three members
972                    value = info.month(tokens[idx + 4])
973
974                    if value is not None:
975                        ymd.append(value, 'M')
976                    else:
977                        ymd.append(tokens[idx + 4])
978                    idx += 2
979
980                idx += 1
981            idx += 1
982
983        elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
984            if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
985                # 12 am
986                hour = int(value)
987                res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
988                idx += 1
989            else:
990                # Year, month or day
991                ymd.append(value)
992            idx += 1
993
994        elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
995            # 12am
996            hour = int(value)
997            res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
998            idx += 1
999
1000        elif ymd.could_be_day(value):
1001            ymd.append(value)
1002
1003        elif not fuzzy:
1004            raise ValueError()
1005
1006        return idx
1007
1008    def _find_hms_idx(self, idx, tokens, info, allow_jump):
1009        len_l = len(tokens)
1010
1011        if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
1012            # There is an "h", "m", or "s" label following this token.  We take
1013            # assign the upcoming label to the current token.
1014            # e.g. the "12" in 12h"
1015            hms_idx = idx + 1
1016
1017        elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
1018              info.hms(tokens[idx+2]) is not None):
1019            # There is a space and then an "h", "m", or "s" label.
1020            # e.g. the "12" in "12 h"
1021            hms_idx = idx + 2
1022
1023        elif idx > 0 and info.hms(tokens[idx-1]) is not None:
1024            # There is a "h", "m", or "s" preceeding this token.  Since neither
1025            # of the previous cases was hit, there is no label following this
1026            # token, so we use the previous label.
1027            # e.g. the "04" in "12h04"
1028            hms_idx = idx-1
1029
1030        elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
1031              info.hms(tokens[idx-2]) is not None):
1032            # If we are looking at the final token, we allow for a
1033            # backward-looking check to skip over a space.
1034            # TODO: Are we sure this is the right condition here?
1035            hms_idx = idx - 2
1036
1037        else:
1038            hms_idx = None
1039
1040        return hms_idx
1041
1042    def _assign_hms(self, res, value_repr, hms):
1043        # See GH issue #427, fixing float rounding
1044        value = self._to_decimal(value_repr)
1045
1046        if hms == 0:
1047            # Hour
1048            res.hour = int(value)
1049            if value % 1:
1050                res.minute = int(60*(value % 1))
1051
1052        elif hms == 1:
1053            (res.minute, res.second) = self._parse_min_sec(value)
1054
1055        elif hms == 2:
1056            (res.second, res.microsecond) = self._parsems(value_repr)
1057
1058    def _could_be_tzname(self, hour, tzname, tzoffset, token):
1059        return (hour is not None and
1060                tzname is None and
1061                tzoffset is None and
1062                len(token) <= 5 and
1063                all(x in string.ascii_uppercase for x in token))
1064
1065    def _ampm_valid(self, hour, ampm, fuzzy):
1066        """
1067        For fuzzy parsing, 'a' or 'am' (both valid English words)
1068        may erroneously trigger the AM/PM flag. Deal with that
1069        here.
1070        """
1071        val_is_ampm = True
1072
1073        # If there's already an AM/PM flag, this one isn't one.
1074        if fuzzy and ampm is not None:
1075            val_is_ampm = False
1076
1077        # If AM/PM is found and hour is not, raise a ValueError
1078        if hour is None:
1079            if fuzzy:
1080                val_is_ampm = False
1081            else:
1082                raise ValueError('No hour specified with AM or PM flag.')
1083        elif not 0 <= hour <= 12:
1084            # If AM/PM is found, it's a 12 hour clock, so raise
1085            # an error for invalid range
1086            if fuzzy:
1087                val_is_ampm = False
1088            else:
1089                raise ValueError('Invalid hour specified for 12-hour clock.')
1090
1091        return val_is_ampm
1092
1093    def _adjust_ampm(self, hour, ampm):
1094        if hour < 12 and ampm == 1:
1095            hour += 12
1096        elif hour == 12 and ampm == 0:
1097            hour = 0
1098        return hour
1099
1100    def _parse_min_sec(self, value):
1101        # TODO: Every usage of this function sets res.second to the return
1102        # value. Are there any cases where second will be returned as None and
1103        # we *dont* want to set res.second = None?
1104        minute = int(value)
1105        second = None
1106
1107        sec_remainder = value % 1
1108        if sec_remainder:
1109            second = int(60 * sec_remainder)
1110        return (minute, second)
1111
1112    def _parsems(self, value):
1113        """Parse a I[.F] seconds value into (seconds, microseconds)."""
1114        if "." not in value:
1115            return int(value), 0
1116        else:
1117            i, f = value.split(".")
1118            return int(i), int(f.ljust(6, "0")[:6])
1119
1120    def _parse_hms(self, idx, tokens, info, hms_idx):
1121        # TODO: Is this going to admit a lot of false-positives for when we
1122        # just happen to have digits and "h", "m" or "s" characters in non-date
1123        # text?  I guess hex hashes won't have that problem, but there's plenty
1124        # of random junk out there.
1125        if hms_idx is None:
1126            hms = None
1127            new_idx = idx
1128        elif hms_idx > idx:
1129            hms = info.hms(tokens[hms_idx])
1130            new_idx = hms_idx
1131        else:
1132            # Looking backwards, increment one.
1133            hms = info.hms(tokens[hms_idx]) + 1
1134            new_idx = idx
1135
1136        return (new_idx, hms)
1137
1138    def _recombine_skipped(self, tokens, skipped_idxs):
1139        """
1140        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
1141        >>> skipped_idxs = [0, 1, 2, 5]
1142        >>> _recombine_skipped(tokens, skipped_idxs)
1143        ["foo bar", "baz"]
1144        """
1145        skipped_tokens = []
1146        for i, idx in enumerate(sorted(skipped_idxs)):
1147            if i > 0 and idx - 1 == skipped_idxs[i - 1]:
1148                skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
1149            else:
1150                skipped_tokens.append(tokens[idx])
1151
1152        return skipped_tokens
1153
1154    def _build_tzinfo(self, tzinfos, tzname, tzoffset):
1155        if callable(tzinfos):
1156            tzdata = tzinfos(tzname, tzoffset)
1157        else:
1158            tzdata = tzinfos.get(tzname)
1159        # handle case where tzinfo is paased an options that returns None
1160        # eg tzinfos = {'BRST' : None}
1161        if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
1162            tzinfo = tzdata
1163        elif isinstance(tzdata, text_type):
1164            tzinfo = tz.tzstr(tzdata)
1165        elif isinstance(tzdata, integer_types):
1166            tzinfo = tz.tzoffset(tzname, tzdata)
1167        return tzinfo
1168
1169    def _build_tzaware(self, naive, res, tzinfos):
1170        if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
1171            tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
1172            aware = naive.replace(tzinfo=tzinfo)
1173            aware = self._assign_tzname(aware, res.tzname)
1174
1175        elif res.tzname and res.tzname in time.tzname:
1176            aware = naive.replace(tzinfo=tz.tzlocal())
1177
1178            # Handle ambiguous local datetime
1179            aware = self._assign_tzname(aware, res.tzname)
1180
1181            # This is mostly relevant for winter GMT zones parsed in the UK
1182            if (aware.tzname() != res.tzname and
1183                    res.tzname in self.info.UTCZONE):
1184                aware = aware.replace(tzinfo=tz.tzutc())
1185
1186        elif res.tzoffset == 0:
1187            aware = naive.replace(tzinfo=tz.tzutc())
1188
1189        elif res.tzoffset:
1190            aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
1191
1192        elif not res.tzname and not res.tzoffset:
1193            # i.e. no timezone information was found.
1194            aware = naive
1195
1196        elif res.tzname:
1197            # tz-like string was parsed but we don't know what to do
1198            # with it
1199            warnings.warn("tzname {tzname} identified but not understood.  "
1200                          "Pass `tzinfos` argument in order to correctly "
1201                          "return a timezone-aware datetime.  In a future "
1202                          "version, this will raise an "
1203                          "exception.".format(tzname=res.tzname),
1204                          category=UnknownTimezoneWarning)
1205            aware = naive
1206
1207        return aware
1208
1209    def _build_naive(self, res, default):
1210        repl = {}
1211        for attr in ("year", "month", "day", "hour",
1212                     "minute", "second", "microsecond"):
1213            value = getattr(res, attr)
1214            if value is not None:
1215                repl[attr] = value
1216
1217        if 'day' not in repl:
1218            # If the default day exceeds the last day of the month, fall back
1219            # to the end of the month.
1220            cyear = default.year if res.year is None else res.year
1221            cmonth = default.month if res.month is None else res.month
1222            cday = default.day if res.day is None else res.day
1223
1224            if cday > monthrange(cyear, cmonth)[1]:
1225                repl['day'] = monthrange(cyear, cmonth)[1]
1226
1227        naive = default.replace(**repl)
1228
1229        if res.weekday is not None and not res.day:
1230            naive = naive + relativedelta.relativedelta(weekday=res.weekday)
1231
1232        return naive
1233
1234    def _assign_tzname(self, dt, tzname):
1235        if dt.tzname() != tzname:
1236            new_dt = tz.enfold(dt, fold=1)
1237            if new_dt.tzname() == tzname:
1238                return new_dt
1239
1240        return dt
1241
1242    def _to_decimal(self, val):
1243        try:
1244            decimal_value = Decimal(val)
1245            # See GH 662, edge case, infinite value should not be converted via `_to_decimal`
1246            if not decimal_value.is_finite():
1247                raise ValueError("Converted decimal value is infinite or NaN")
1248        except Exception as e:
1249            msg = "Could not convert %s to decimal" % val
1250            six.raise_from(ValueError(msg), e)
1251        else:
1252            return decimal_value
1253
1254
1255DEFAULTPARSER = parser()
1256
1257
1258def parse(timestr, parserinfo=None, **kwargs):
1259    """
1260
1261    Parse a string in one of the supported formats, using the
1262    ``parserinfo`` parameters.
1263
1264    :param timestr:
1265        A string containing a date/time stamp.
1266
1267    :param parserinfo:
1268        A :class:`parserinfo` object containing parameters for the parser.
1269        If ``None``, the default arguments to the :class:`parserinfo`
1270        constructor are used.
1271
1272    The ``**kwargs`` parameter takes the following keyword arguments:
1273
1274    :param default:
1275        The default datetime object, if this is a datetime object and not
1276        ``None``, elements specified in ``timestr`` replace elements in the
1277        default object.
1278
1279    :param ignoretz:
1280        If set ``True``, time zones in parsed strings are ignored and a naive
1281        :class:`datetime` object is returned.
1282
1283    :param tzinfos:
1284        Additional time zone names / aliases which may be present in the
1285        string. This argument maps time zone names (and optionally offsets
1286        from those time zones) to time zones. This parameter can be a
1287        dictionary with timezone aliases mapping time zone names to time
1288        zones or a function taking two parameters (``tzname`` and
1289        ``tzoffset``) and returning a time zone.
1290
1291        The timezones to which the names are mapped can be an integer
1292        offset from UTC in seconds or a :class:`tzinfo` object.
1293
1294        .. doctest::
1295           :options: +NORMALIZE_WHITESPACE
1296
1297            >>> from dateutil.parser import parse
1298            >>> from dateutil.tz import gettz
1299            >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
1300            >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
1301            datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
1302            >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
1303            datetime.datetime(2012, 1, 19, 17, 21,
1304                              tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
1305
1306        This parameter is ignored if ``ignoretz`` is set.
1307
1308    :param dayfirst:
1309        Whether to interpret the first value in an ambiguous 3-integer date
1310        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
1311        ``yearfirst`` is set to ``True``, this distinguishes between YDM and
1312        YMD. If set to ``None``, this value is retrieved from the current
1313        :class:`parserinfo` object (which itself defaults to ``False``).
1314
1315    :param yearfirst:
1316        Whether to interpret the first value in an ambiguous 3-integer date
1317        (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
1318        be the year, otherwise the last number is taken to be the year. If
1319        this is set to ``None``, the value is retrieved from the current
1320        :class:`parserinfo` object (which itself defaults to ``False``).
1321
1322    :param fuzzy:
1323        Whether to allow fuzzy parsing, allowing for string like "Today is
1324        January 1, 2047 at 8:21:00AM".
1325
1326    :param fuzzy_with_tokens:
1327        If ``True``, ``fuzzy`` is automatically set to True, and the parser
1328        will return a tuple where the first element is the parsed
1329        :class:`datetime.datetime` datetimestamp and the second element is
1330        a tuple containing the portions of the string which were ignored:
1331
1332        .. doctest::
1333
1334            >>> from dateutil.parser import parse
1335            >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
1336            (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
1337
1338    :return:
1339        Returns a :class:`datetime.datetime` object or, if the
1340        ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
1341        first element being a :class:`datetime.datetime` object, the second
1342        a tuple containing the fuzzy tokens.
1343
1344    :raises ValueError:
1345        Raised for invalid or unknown string format, if the provided
1346        :class:`tzinfo` is not in a valid format, or if an invalid date
1347        would be created.
1348
1349    :raises OverflowError:
1350        Raised if the parsed date exceeds the largest valid C integer on
1351        your system.
1352    """
1353    if parserinfo:
1354        return parser(parserinfo).parse(timestr, **kwargs)
1355    else:
1356        return DEFAULTPARSER.parse(timestr, **kwargs)
1357
1358
1359class _tzparser(object):
1360
1361    class _result(_resultbase):
1362
1363        __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
1364                     "start", "end"]
1365
1366        class _attr(_resultbase):
1367            __slots__ = ["month", "week", "weekday",
1368                         "yday", "jyday", "day", "time"]
1369
1370        def __repr__(self):
1371            return self._repr("")
1372
1373        def __init__(self):
1374            _resultbase.__init__(self)
1375            self.start = self._attr()
1376            self.end = self._attr()
1377
1378    def parse(self, tzstr):
1379        res = self._result()
1380        l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
1381        used_idxs = list()
1382        try:
1383
1384            len_l = len(l)
1385
1386            i = 0
1387            while i < len_l:
1388                # BRST+3[BRDT[+2]]
1389                j = i
1390                while j < len_l and not [x for x in l[j]
1391                                         if x in "0123456789:,-+"]:
1392                    j += 1
1393                if j != i:
1394                    if not res.stdabbr:
1395                        offattr = "stdoffset"
1396                        res.stdabbr = "".join(l[i:j])
1397                    else:
1398                        offattr = "dstoffset"
1399                        res.dstabbr = "".join(l[i:j])
1400
1401                    for ii in range(j):
1402                        used_idxs.append(ii)
1403                    i = j
1404                    if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
1405                                       "0123456789")):
1406                        if l[i] in ('+', '-'):
1407                            # Yes, that's right.  See the TZ variable
1408                            # documentation.
1409                            signal = (1, -1)[l[i] == '+']
1410                            used_idxs.append(i)
1411                            i += 1
1412                        else:
1413                            signal = -1
1414                        len_li = len(l[i])
1415                        if len_li == 4:
1416                            # -0300
1417                            setattr(res, offattr, (int(l[i][:2]) * 3600 +
1418                                                   int(l[i][2:]) * 60) * signal)
1419                        elif i + 1 < len_l and l[i + 1] == ':':
1420                            # -03:00
1421                            setattr(res, offattr,
1422                                    (int(l[i]) * 3600 +
1423                                     int(l[i + 2]) * 60) * signal)
1424                            used_idxs.append(i)
1425                            i += 2
1426                        elif len_li <= 2:
1427                            # -[0]3
1428                            setattr(res, offattr,
1429                                    int(l[i][:2]) * 3600 * signal)
1430                        else:
1431                            return None
1432                        used_idxs.append(i)
1433                        i += 1
1434                    if res.dstabbr:
1435                        break
1436                else:
1437                    break
1438
1439
1440            if i < len_l:
1441                for j in range(i, len_l):
1442                    if l[j] == ';':
1443                        l[j] = ','
1444
1445                assert l[i] == ','
1446
1447                i += 1
1448
1449            if i >= len_l:
1450                pass
1451            elif (8 <= l.count(',') <= 9 and
1452                  not [y for x in l[i:] if x != ','
1453                       for y in x if y not in "0123456789+-"]):
1454                # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
1455                for x in (res.start, res.end):
1456                    x.month = int(l[i])
1457                    used_idxs.append(i)
1458                    i += 2
1459                    if l[i] == '-':
1460                        value = int(l[i + 1]) * -1
1461                        used_idxs.append(i)
1462                        i += 1
1463                    else:
1464                        value = int(l[i])
1465                    used_idxs.append(i)
1466                    i += 2
1467                    if value:
1468                        x.week = value
1469                        x.weekday = (int(l[i]) - 1) % 7
1470                    else:
1471                        x.day = int(l[i])
1472                    used_idxs.append(i)
1473                    i += 2
1474                    x.time = int(l[i])
1475                    used_idxs.append(i)
1476                    i += 2
1477                if i < len_l:
1478                    if l[i] in ('-', '+'):
1479                        signal = (-1, 1)[l[i] == "+"]
1480                        used_idxs.append(i)
1481                        i += 1
1482                    else:
1483                        signal = 1
1484                    used_idxs.append(i)
1485                    res.dstoffset = (res.stdoffset + int(l[i]) * signal)
1486
1487                # This was a made-up format that is not in normal use
1488                warn(('Parsed time zone "%s"' % tzstr) +
1489                     'is in a non-standard dateutil-specific format, which ' +
1490                     'is now deprecated; support for parsing this format ' +
1491                     'will be removed in future versions. It is recommended ' +
1492                     'that you switch to a standard format like the GNU ' +
1493                     'TZ variable format.', tz.DeprecatedTzFormatWarning)
1494            elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
1495                  not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
1496                                                     '.', '-', ':')
1497                       for y in x if y not in "0123456789"]):
1498                for x in (res.start, res.end):
1499                    if l[i] == 'J':
1500                        # non-leap year day (1 based)
1501                        used_idxs.append(i)
1502                        i += 1
1503                        x.jyday = int(l[i])
1504                    elif l[i] == 'M':
1505                        # month[-.]week[-.]weekday
1506                        used_idxs.append(i)
1507                        i += 1
1508                        x.month = int(l[i])
1509                        used_idxs.append(i)
1510                        i += 1
1511                        assert l[i] in ('-', '.')
1512                        used_idxs.append(i)
1513                        i += 1
1514                        x.week = int(l[i])
1515                        if x.week == 5:
1516                            x.week = -1
1517                        used_idxs.append(i)
1518                        i += 1
1519                        assert l[i] in ('-', '.')
1520                        used_idxs.append(i)
1521                        i += 1
1522                        x.weekday = (int(l[i]) - 1) % 7
1523                    else:
1524                        # year day (zero based)
1525                        x.yday = int(l[i]) + 1
1526
1527                    used_idxs.append(i)
1528                    i += 1
1529
1530                    if i < len_l and l[i] == '/':
1531                        used_idxs.append(i)
1532                        i += 1
1533                        # start time
1534                        len_li = len(l[i])
1535                        if len_li == 4:
1536                            # -0300
1537                            x.time = (int(l[i][:2]) * 3600 +
1538                                      int(l[i][2:]) * 60)
1539                        elif i + 1 < len_l and l[i + 1] == ':':
1540                            # -03:00
1541                            x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
1542                            used_idxs.append(i)
1543                            i += 2
1544                            if i + 1 < len_l and l[i + 1] == ':':
1545                                used_idxs.append(i)
1546                                i += 2
1547                                x.time += int(l[i])
1548                        elif len_li <= 2:
1549                            # -[0]3
1550                            x.time = (int(l[i][:2]) * 3600)
1551                        else:
1552                            return None
1553                        used_idxs.append(i)
1554                        i += 1
1555
1556                    assert i == len_l or l[i] == ','
1557
1558                    i += 1
1559
1560                assert i >= len_l
1561
1562        except (IndexError, ValueError, AssertionError):
1563            return None
1564
1565        unused_idxs = set(range(len_l)).difference(used_idxs)
1566        res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
1567        return res
1568
1569
1570DEFAULTTZPARSER = _tzparser()
1571
1572
1573def _parsetz(tzstr):
1574    return DEFAULTTZPARSER.parse(tzstr)
1575
1576class UnknownTimezoneWarning(RuntimeWarning):
1577    """Raised when the parser finds a timezone it cannot parse into a tzinfo"""
1578# vim:ts=4:sw=4:et
1579