1# -*- coding:iso-8859-1 -*-
2"""
3This module offers a generic date/time string parser which is able to parse
4most known formats to represent a date and/or time.
5
6This module attempts to be forgiving with regards to unlikely input formats,
7returning a datetime object even for dates which are ambiguous. If an element
8of a date/time stamp is omitted, the following rules are applied:
9- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
10  on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
11  specified.
12- If a time zone is omitted, a timezone-naive datetime is returned.
13
14If any other elements are missing, they are taken from the
15:class:`datetime.datetime` object passed to the parameter ``default``. If this
16results in a day number exceeding the valid number of days per month, the
17value falls back to the end of the month.
18
19Additional resources about date/time string formats can be found below:
20
21- `A summary of the international standard date and time notation
22  <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
23- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
24- `Time Formats (Planetary Rings Node) <http://pds-rings.seti.org/tools/time_formats.html>`_
25- `CPAN ParseDate module
26  <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
27- `Java SimpleDateFormat Class
28  <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
29"""
30from __future__ import unicode_literals
31
32import datetime
33import string
34import time
35import collections
36import re
37from io import StringIO
38from calendar import monthrange, isleap
39
40from six import text_type, binary_type, integer_types
41
42from . import relativedelta
43from . import tz
44
45__all__ = ["parse", "parserinfo"]
46
47
48class _timelex(object):
49    # Fractional seconds are sometimes split by a comma
50    _split_decimal = re.compile("([\.,])")
51
52    def __init__(self, instream):
53        if isinstance(instream, binary_type):
54            instream = instream.decode()
55
56        if isinstance(instream, text_type):
57            instream = StringIO(instream)
58
59        if getattr(instream, 'read', None) is None:
60            raise TypeError('Parser must be a string or character stream, not '
61                            '{itype}'.format(itype=instream.__class__.__name__))
62
63        self.instream = instream
64        self.charstack = []
65        self.tokenstack = []
66        self.eof = False
67
68    def get_token(self):
69        """
70        This function breaks the time string into lexical units (tokens), which
71        can be parsed by the parser. Lexical units are demarcated by changes in
72        the character set, so any continuous string of letters is considered
73        one unit, any continuous string of numbers is considered one unit.
74
75        The main complication arises from the fact that dots ('.') can be used
76        both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
77        "4:30:21.447"). As such, it is necessary to read the full context of
78        any dot-separated strings before breaking it into tokens; as such, this
79        function maintains a "token stack", for when the ambiguous context
80        demands that multiple tokens be parsed at once.
81        """
82        if self.tokenstack:
83            return self.tokenstack.pop(0)
84
85        seenletters = False
86        token = None
87        state = None
88
89        while not self.eof:
90            # We only realize that we've reached the end of a token when we
91            # find a character that's not part of the current token - since
92            # that character may be part of the next token, it's stored in the
93            # charstack.
94            if self.charstack:
95                nextchar = self.charstack.pop(0)
96            else:
97                nextchar = self.instream.read(1)
98                while nextchar == '\x00':
99                    nextchar = self.instream.read(1)
100
101            if not nextchar:
102                self.eof = True
103                break
104            elif not state:
105                # First character of the token - determines if we're starting
106                # to parse a word, a number or something else.
107                token = nextchar
108                if self.isword(nextchar):
109                    state = 'a'
110                elif self.isnum(nextchar):
111                    state = '0'
112                elif self.isspace(nextchar):
113                    token = ' '
114                    break  # emit token
115                else:
116                    break  # emit token
117            elif state == 'a':
118                # If we've already started reading a word, we keep reading
119                # letters until we find something that's not part of a word.
120                seenletters = True
121                if self.isword(nextchar):
122                    token += nextchar
123                elif nextchar == '.':
124                    token += nextchar
125                    state = 'a.'
126                else:
127                    self.charstack.append(nextchar)
128                    break  # emit token
129            elif state == '0':
130                # If we've already started reading a number, we keep reading
131                # numbers until we find something that doesn't fit.
132                if self.isnum(nextchar):
133                    token += nextchar
134                elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
135                    token += nextchar
136                    state = '0.'
137                else:
138                    self.charstack.append(nextchar)
139                    break  # emit token
140            elif state == 'a.':
141                # If we've seen some letters and a dot separator, continue
142                # parsing, and the tokens will be broken up later.
143                seenletters = True
144                if nextchar == '.' or self.isword(nextchar):
145                    token += nextchar
146                elif self.isnum(nextchar) and token[-1] == '.':
147                    token += nextchar
148                    state = '0.'
149                else:
150                    self.charstack.append(nextchar)
151                    break  # emit token
152            elif state == '0.':
153                # If we've seen at least one dot separator, keep going, we'll
154                # break up the tokens later.
155                if nextchar == '.' or self.isnum(nextchar):
156                    token += nextchar
157                elif self.isword(nextchar) and token[-1] == '.':
158                    token += nextchar
159                    state = 'a.'
160                else:
161                    self.charstack.append(nextchar)
162                    break  # emit token
163
164        if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
165                                       token[-1] in '.,')):
166            l = self._split_decimal.split(token)
167            token = l[0]
168            for tok in l[1:]:
169                if tok:
170                    self.tokenstack.append(tok)
171
172        if state == '0.' and token.count('.') == 0:
173            token = token.replace(',', '.')
174
175        return token
176
177    def __iter__(self):
178        return self
179
180    def __next__(self):
181        token = self.get_token()
182        if token is None:
183            raise StopIteration
184
185        return token
186
187    def next(self):
188        return self.__next__()  # Python 2.x support
189
190    @classmethod
191    def split(cls, s):
192        return list(cls(s))
193
194    @classmethod
195    def isword(cls, nextchar):
196        """ Whether or not the next character is part of a word """
197        return nextchar.isalpha()
198
199    @classmethod
200    def isnum(cls, nextchar):
201        """ Whether the next character is part of a number """
202        return nextchar.isdigit()
203
204    @classmethod
205    def isspace(cls, nextchar):
206        """ Whether the next character is whitespace """
207        return nextchar.isspace()
208
209
210class _resultbase(object):
211
212    def __init__(self):
213        for attr in self.__slots__:
214            setattr(self, attr, None)
215
216    def _repr(self, classname):
217        l = []
218        for attr in self.__slots__:
219            value = getattr(self, attr)
220            if value is not None:
221                l.append("%s=%s" % (attr, repr(value)))
222        return "%s(%s)" % (classname, ", ".join(l))
223
224    def __len__(self):
225        return (sum(getattr(self, attr) is not None
226                    for attr in self.__slots__))
227
228    def __repr__(self):
229        return self._repr(self.__class__.__name__)
230
231
232class parserinfo(object):
233    """
234    Class which handles what inputs are accepted. Subclass this to customize
235    the language and acceptable values for each parameter.
236
237    :param dayfirst:
238            Whether to interpret the first value in an ambiguous 3-integer date
239            (e.g. 01/05/09) as the day (``True``) or month (``False``). If
240            ``yearfirst`` is set to ``True``, this distinguishes between YDM
241            and YMD. Default is ``False``.
242
243    :param yearfirst:
244            Whether to interpret the first value in an ambiguous 3-integer date
245            (e.g. 01/05/09) as the year. If ``True``, the first number is taken
246            to be the year, otherwise the last number is taken to be the year.
247            Default is ``False``.
248    """
249
250    # m from a.m/p.m, t from ISO T separator
251    JUMP = [" ", ".", ",", ";", "-", "/", "'",
252            "at", "on", "and", "ad", "m", "t", "of",
253            "st", "nd", "rd", "th"]
254
255    WEEKDAYS = [("Mon", "Monday"),
256                ("Tue", "Tuesday"),
257                ("Wed", "Wednesday"),
258                ("Thu", "Thursday"),
259                ("Fri", "Friday"),
260                ("Sat", "Saturday"),
261                ("Sun", "Sunday")]
262    MONTHS = [("Jan", "January"),
263              ("Feb", "February"),
264              ("Mar", "March"),
265              ("Apr", "April"),
266              ("May", "May"),
267              ("Jun", "June"),
268              ("Jul", "July"),
269              ("Aug", "August"),
270              ("Sep", "Sept", "September"),
271              ("Oct", "October"),
272              ("Nov", "November"),
273              ("Dec", "December")]
274    HMS = [("h", "hour", "hours"),
275           ("m", "minute", "minutes"),
276           ("s", "second", "seconds")]
277    AMPM = [("am", "a"),
278            ("pm", "p")]
279    UTCZONE = ["UTC", "GMT", "Z"]
280    PERTAIN = ["of"]
281    TZOFFSET = {}
282
283    def __init__(self, dayfirst=False, yearfirst=False):
284        self._jump = self._convert(self.JUMP)
285        self._weekdays = self._convert(self.WEEKDAYS)
286        self._months = self._convert(self.MONTHS)
287        self._hms = self._convert(self.HMS)
288        self._ampm = self._convert(self.AMPM)
289        self._utczone = self._convert(self.UTCZONE)
290        self._pertain = self._convert(self.PERTAIN)
291
292        self.dayfirst = dayfirst
293        self.yearfirst = yearfirst
294
295        self._year = time.localtime().tm_year
296        self._century = self._year // 100 * 100
297
298    def _convert(self, lst):
299        dct = {}
300        for i, v in enumerate(lst):
301            if isinstance(v, tuple):
302                for v in v:
303                    dct[v.lower()] = i
304            else:
305                dct[v.lower()] = i
306        return dct
307
308    def jump(self, name):
309        return name.lower() in self._jump
310
311    def weekday(self, name):
312        if len(name) >= 3:
313            try:
314                return self._weekdays[name.lower()]
315            except KeyError:
316                pass
317        return None
318
319    def month(self, name):
320        if len(name) >= 3:
321            try:
322                return self._months[name.lower()] + 1
323            except KeyError:
324                pass
325        return None
326
327    def hms(self, name):
328        try:
329            return self._hms[name.lower()]
330        except KeyError:
331            return None
332
333    def ampm(self, name):
334        try:
335            return self._ampm[name.lower()]
336        except KeyError:
337            return None
338
339    def pertain(self, name):
340        return name.lower() in self._pertain
341
342    def utczone(self, name):
343        return name.lower() in self._utczone
344
345    def tzoffset(self, name):
346        if name in self._utczone:
347            return 0
348
349        return self.TZOFFSET.get(name)
350
351    def convertyear(self, year, century_specified=False):
352        if year < 100 and not century_specified:
353            year += self._century
354            if abs(year - self._year) >= 50:
355                if year < self._year:
356                    year += 100
357                else:
358                    year -= 100
359        return year
360
361    def validate(self, res):
362        # move to info
363        if res.year is not None:
364            res.year = self.convertyear(res.year, res.century_specified)
365
366        if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
367            res.tzname = "UTC"
368            res.tzoffset = 0
369        elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
370            res.tzoffset = 0
371        return True
372
373
374class _ymd(list):
375    def __init__(self, tzstr, *args, **kwargs):
376        super(self.__class__, self).__init__(*args, **kwargs)
377        self.century_specified = False
378        self.tzstr = tzstr
379
380    @staticmethod
381    def token_could_be_year(token, year):
382        try:
383            return int(token) == year
384        except ValueError:
385            return False
386
387    @staticmethod
388    def find_potential_year_tokens(year, tokens):
389        return [token for token in tokens if _ymd.token_could_be_year(token, year)]
390
391    def find_probable_year_index(self, tokens):
392        """
393        attempt to deduce if a pre 100 year was lost
394         due to padded zeros being taken off
395        """
396        for index, token in enumerate(self):
397            potential_year_tokens = _ymd.find_potential_year_tokens(token, tokens)
398            if len(potential_year_tokens) == 1 and len(potential_year_tokens[0]) > 2:
399                return index
400
401    def append(self, val):
402        if hasattr(val, '__len__'):
403            if val.isdigit() and len(val) > 2:
404                self.century_specified = True
405        elif val > 100:
406            self.century_specified = True
407
408        super(self.__class__, self).append(int(val))
409
410    def resolve_ymd(self, mstridx, yearfirst, dayfirst):
411        len_ymd = len(self)
412        year, month, day = (None, None, None)
413
414        if len_ymd > 3:
415            raise ValueError("More than three YMD values")
416        elif len_ymd == 1 or (mstridx != -1 and len_ymd == 2):
417            # One member, or two members with a month string
418            if mstridx != -1:
419                month = self[mstridx]
420                del self[mstridx]
421
422            if len_ymd > 1 or mstridx == -1:
423                if self[0] > 31:
424                    year = self[0]
425                else:
426                    day = self[0]
427
428        elif len_ymd == 2:
429            # Two members with numbers
430            if self[0] > 31:
431                # 99-01
432                year, month = self
433            elif self[1] > 31:
434                # 01-99
435                month, year = self
436            elif dayfirst and self[1] <= 12:
437                # 13-01
438                day, month = self
439            else:
440                # 01-13
441                month, day = self
442
443        elif len_ymd == 3:
444            # Three members
445            if mstridx == 0:
446                month, day, year = self
447            elif mstridx == 1:
448                if self[0] > 31 or (yearfirst and self[2] <= 31):
449                    # 99-Jan-01
450                    year, month, day = self
451                else:
452                    # 01-Jan-01
453                    # Give precendence to day-first, since
454                    # two-digit years is usually hand-written.
455                    day, month, year = self
456
457            elif mstridx == 2:
458                # WTF!?
459                if self[1] > 31:
460                    # 01-99-Jan
461                    day, year, month = self
462                else:
463                    # 99-01-Jan
464                    year, day, month = self
465
466            else:
467                if self[0] > 31 or \
468                    self.find_probable_year_index(_timelex.split(self.tzstr)) == 0 or \
469                   (yearfirst and self[1] <= 12 and self[2] <= 31):
470                    # 99-01-01
471                    if dayfirst and self[2] <= 12:
472                        year, day, month = self
473                    else:
474                        year, month, day = self
475                elif self[0] > 12 or (dayfirst and self[1] <= 12):
476                    # 13-01-01
477                    day, month, year = self
478                else:
479                    # 01-13-01
480                    month, day, year = self
481
482        return year, month, day
483
484
485class parser(object):
486    def __init__(self, info=None):
487        self.info = info or parserinfo()
488
489    def parse(self, timestr, default=None, ignoretz=False, tzinfos=None, **kwargs):
490        """
491        Parse the date/time string into a :class:`datetime.datetime` object.
492
493        :param timestr:
494            Any date/time string using the supported formats.
495
496        :param default:
497            The default datetime object, if this is a datetime object and not
498            ``None``, elements specified in ``timestr`` replace elements in the
499            default object.
500
501        :param ignoretz:
502            If set ``True``, time zones in parsed strings are ignored and a
503            naive :class:`datetime.datetime` object is returned.
504
505        :param tzinfos:
506            Additional time zone names / aliases which may be present in the
507            string. This argument maps time zone names (and optionally offsets
508            from those time zones) to time zones. This parameter can be a
509            dictionary with timezone aliases mapping time zone names to time
510            zones or a function taking two parameters (``tzname`` and
511            ``tzoffset``) and returning a time zone.
512
513            The timezones to which the names are mapped can be an integer
514            offset from UTC in minutes or a :class:`tzinfo` object.
515
516            .. doctest::
517               :options: +NORMALIZE_WHITESPACE
518
519                >>> from dateutil.parser import parse
520                >>> from dateutil.tz import gettz
521                >>> tzinfos = {"BRST": -10800, "CST": gettz("America/Chicago")}
522                >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
523                datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -10800))
524                >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
525                datetime.datetime(2012, 1, 19, 17, 21,
526                                  tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
527
528            This parameter is ignored if ``ignoretz`` is set.
529
530        :param **kwargs:
531            Keyword arguments as passed to ``_parse()``.
532
533        :return:
534            Returns a :class:`datetime.datetime` object or, if the
535            ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
536            first element being a :class:`datetime.datetime` object, the second
537            a tuple containing the fuzzy tokens.
538
539        :raises ValueError:
540            Raised for invalid or unknown string format, if the provided
541            :class:`tzinfo` is not in a valid format, or if an invalid date
542            would be created.
543
544        :raises OverflowError:
545            Raised if the parsed date exceeds the largest valid C integer on
546            your system.
547        """
548
549        if default is None:
550            effective_dt = datetime.datetime.now()
551            default = datetime.datetime.now().replace(hour=0, minute=0,
552                                                      second=0, microsecond=0)
553        else:
554            effective_dt = default
555
556        res, skipped_tokens = self._parse(timestr, **kwargs)
557
558        if res is None:
559            raise ValueError("Unknown string format")
560
561        if len(res) == 0:
562            raise ValueError("String does not contain a date.")
563
564        repl = {}
565        for attr in ("year", "month", "day", "hour",
566                     "minute", "second", "microsecond"):
567            value = getattr(res, attr)
568            if value is not None:
569                repl[attr] = value
570
571        if 'day' not in repl:
572            # If the default day exceeds the last day of the month, fall back to
573            # the end of the month.
574            cyear = default.year if res.year is None else res.year
575            cmonth = default.month if res.month is None else res.month
576            cday = default.day if res.day is None else res.day
577
578            if cday > monthrange(cyear, cmonth)[1]:
579                repl['day'] = monthrange(cyear, cmonth)[1]
580
581        ret = default.replace(**repl)
582
583        if res.weekday is not None and not res.day:
584            ret = ret+relativedelta.relativedelta(weekday=res.weekday)
585
586        if not ignoretz:
587            if (isinstance(tzinfos, collections.Callable) or
588                    tzinfos and res.tzname in tzinfos):
589
590                if isinstance(tzinfos, collections.Callable):
591                    tzdata = tzinfos(res.tzname, res.tzoffset)
592                else:
593                    tzdata = tzinfos.get(res.tzname)
594
595                if isinstance(tzdata, datetime.tzinfo):
596                    tzinfo = tzdata
597                elif isinstance(tzdata, text_type):
598                    tzinfo = tz.tzstr(tzdata)
599                elif isinstance(tzdata, integer_types):
600                    tzinfo = tz.tzoffset(res.tzname, tzdata)
601                else:
602                    raise ValueError("Offset must be tzinfo subclass, "
603                                     "tz string, or int offset.")
604                ret = ret.replace(tzinfo=tzinfo)
605            elif res.tzname and res.tzname in time.tzname:
606                ret = ret.replace(tzinfo=tz.tzlocal())
607            elif res.tzoffset == 0:
608                ret = ret.replace(tzinfo=tz.tzutc())
609            elif res.tzoffset:
610                ret = ret.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
611
612        if kwargs.get('fuzzy_with_tokens', False):
613            return ret, skipped_tokens
614        else:
615            return ret
616
617    class _result(_resultbase):
618        __slots__ = ["year", "month", "day", "weekday",
619                     "hour", "minute", "second", "microsecond",
620                     "tzname", "tzoffset", "ampm"]
621
622    def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
623               fuzzy_with_tokens=False):
624        """
625        Private method which performs the heavy lifting of parsing, called from
626        ``parse()``, which passes on its ``kwargs`` to this function.
627
628        :param timestr:
629            The string to parse.
630
631        :param dayfirst:
632            Whether to interpret the first value in an ambiguous 3-integer date
633            (e.g. 01/05/09) as the day (``True``) or month (``False``). If
634            ``yearfirst`` is set to ``True``, this distinguishes between YDM
635            and YMD. If set to ``None``, this value is retrieved from the
636            current :class:`parserinfo` object (which itself defaults to
637            ``False``).
638
639        :param yearfirst:
640            Whether to interpret the first value in an ambiguous 3-integer date
641            (e.g. 01/05/09) as the year. If ``True``, the first number is taken
642            to be the year, otherwise the last number is taken to be the year.
643            If this is set to ``None``, the value is retrieved from the current
644            :class:`parserinfo` object (which itself defaults to ``False``).
645
646        :param fuzzy:
647            Whether to allow fuzzy parsing, allowing for string like "Today is
648            January 1, 2047 at 8:21:00AM".
649
650        :param fuzzy_with_tokens:
651            If ``True``, ``fuzzy`` is automatically set to True, and the parser
652            will return a tuple where the first element is the parsed
653            :class:`datetime.datetime` datetimestamp and the second element is
654            a tuple containing the portions of the string which were ignored:
655
656            .. doctest::
657
658                >>> from dateutil.parser import parse
659                >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
660                (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
661
662        """
663        if fuzzy_with_tokens:
664            fuzzy = True
665
666        info = self.info
667
668        if dayfirst is None:
669            dayfirst = info.dayfirst
670
671        if yearfirst is None:
672            yearfirst = info.yearfirst
673
674        res = self._result()
675        l = _timelex.split(timestr)         # Splits the timestr into tokens
676
677        # keep up with the last token skipped so we can recombine
678        # consecutively skipped tokens (-2 for when i begins at 0).
679        last_skipped_token_i = -2
680        skipped_tokens = list()
681
682        try:
683            # year/month/day list
684            ymd = _ymd(timestr)
685
686            # Index of the month string in ymd
687            mstridx = -1
688
689            len_l = len(l)
690            i = 0
691            while i < len_l:
692
693                # Check if it's a number
694                try:
695                    value_repr = l[i]
696                    value = float(value_repr)
697                except ValueError:
698                    value = None
699
700                if value is not None:
701                    # Token is a number
702                    len_li = len(l[i])
703                    i += 1
704
705                    if (len(ymd) == 3 and len_li in (2, 4)
706                        and res.hour is None and (i >= len_l or (l[i] != ':' and
707                                                  info.hms(l[i]) is None))):
708                        # 19990101T23[59]
709                        s = l[i-1]
710                        res.hour = int(s[:2])
711
712                        if len_li == 4:
713                            res.minute = int(s[2:])
714
715                    elif len_li == 6 or (len_li > 6 and l[i-1].find('.') == 6):
716                        # YYMMDD or HHMMSS[.ss]
717                        s = l[i-1]
718
719                        if not ymd and l[i-1].find('.') == -1:
720                            #ymd.append(info.convertyear(int(s[:2])))
721
722                            ymd.append(s[:2])
723                            ymd.append(s[2:4])
724                            ymd.append(s[4:])
725                        else:
726                            # 19990101T235959[.59]
727                            res.hour = int(s[:2])
728                            res.minute = int(s[2:4])
729                            res.second, res.microsecond = _parsems(s[4:])
730
731                    elif len_li in (8, 12, 14):
732                        # YYYYMMDD
733                        s = l[i-1]
734                        ymd.append(s[:4])
735                        ymd.append(s[4:6])
736                        ymd.append(s[6:8])
737
738                        if len_li > 8:
739                            res.hour = int(s[8:10])
740                            res.minute = int(s[10:12])
741
742                            if len_li > 12:
743                                res.second = int(s[12:])
744
745                    elif ((i < len_l and info.hms(l[i]) is not None) or
746                          (i+1 < len_l and l[i] == ' ' and
747                           info.hms(l[i+1]) is not None)):
748
749                        # HH[ ]h or MM[ ]m or SS[.ss][ ]s
750                        if l[i] == ' ':
751                            i += 1
752
753                        idx = info.hms(l[i])
754
755                        while True:
756                            if idx == 0:
757                                res.hour = int(value)
758
759                                if value % 1:
760                                    res.minute = int(60*(value % 1))
761
762                            elif idx == 1:
763                                res.minute = int(value)
764
765                                if value % 1:
766                                    res.second = int(60*(value % 1))
767
768                            elif idx == 2:
769                                res.second, res.microsecond = \
770                                    _parsems(value_repr)
771
772                            i += 1
773
774                            if i >= len_l or idx == 2:
775                                break
776
777                            # 12h00
778                            try:
779                                value_repr = l[i]
780                                value = float(value_repr)
781                            except ValueError:
782                                break
783                            else:
784                                i += 1
785                                idx += 1
786
787                                if i < len_l:
788                                    newidx = info.hms(l[i])
789
790                                    if newidx is not None:
791                                        idx = newidx
792
793                    elif (i == len_l and l[i-2] == ' ' and
794                          info.hms(l[i-3]) is not None):
795                        # X h MM or X m SS
796                        idx = info.hms(l[i-3]) + 1
797
798                        if idx == 1:
799                            res.minute = int(value)
800
801                            if value % 1:
802                                res.second = int(60*(value % 1))
803                            elif idx == 2:
804                                res.second, res.microsecond = \
805                                    _parsems(value_repr)
806                                i += 1
807
808                    elif i+1 < len_l and l[i] == ':':
809                        # HH:MM[:SS[.ss]]
810                        res.hour = int(value)
811                        i += 1
812                        value = float(l[i])
813                        res.minute = int(value)
814
815                        if value % 1:
816                            res.second = int(60*(value % 1))
817
818                        i += 1
819
820                        if i < len_l and l[i] == ':':
821                            res.second, res.microsecond = _parsems(l[i+1])
822                            i += 2
823
824                    elif i < len_l and l[i] in ('-', '/', '.'):
825                        sep = l[i]
826                        ymd.append(value_repr)
827                        i += 1
828
829                        if i < len_l and not info.jump(l[i]):
830                            try:
831                                # 01-01[-01]
832                                ymd.append(l[i])
833                            except ValueError:
834                                # 01-Jan[-01]
835                                value = info.month(l[i])
836
837                                if value is not None:
838                                    ymd.append(value)
839                                    assert mstridx == -1
840                                    mstridx = len(ymd)-1
841                                else:
842                                    return None, None
843
844                            i += 1
845
846                            if i < len_l and l[i] == sep:
847                                # We have three members
848                                i += 1
849                                value = info.month(l[i])
850
851                                if value is not None:
852                                    ymd.append(value)
853                                    mstridx = len(ymd)-1
854                                    assert mstridx == -1
855                                else:
856                                    ymd.append(l[i])
857
858                                i += 1
859                    elif i >= len_l or info.jump(l[i]):
860                        if i+1 < len_l and info.ampm(l[i+1]) is not None:
861                            # 12 am
862                            res.hour = int(value)
863
864                            if res.hour < 12 and info.ampm(l[i+1]) == 1:
865                                res.hour += 12
866                            elif res.hour == 12 and info.ampm(l[i+1]) == 0:
867                                res.hour = 0
868
869                            i += 1
870                        else:
871                            # Year, month or day
872                            ymd.append(value)
873                        i += 1
874                    elif info.ampm(l[i]) is not None:
875
876                        # 12am
877                        res.hour = int(value)
878
879                        if res.hour < 12 and info.ampm(l[i]) == 1:
880                            res.hour += 12
881                        elif res.hour == 12 and info.ampm(l[i]) == 0:
882                            res.hour = 0
883                        i += 1
884
885                    elif not fuzzy:
886                        return None, None
887                    else:
888                        i += 1
889                    continue
890
891                # Check weekday
892                value = info.weekday(l[i])
893                if value is not None:
894                    res.weekday = value
895                    i += 1
896                    continue
897
898                # Check month name
899                value = info.month(l[i])
900                if value is not None:
901                    ymd.append(value)
902                    assert mstridx == -1
903                    mstridx = len(ymd)-1
904
905                    i += 1
906                    if i < len_l:
907                        if l[i] in ('-', '/'):
908                            # Jan-01[-99]
909                            sep = l[i]
910                            i += 1
911                            ymd.append(l[i])
912                            i += 1
913
914                            if i < len_l and l[i] == sep:
915                                # Jan-01-99
916                                i += 1
917                                ymd.append(l[i])
918                                i += 1
919
920                        elif (i+3 < len_l and l[i] == l[i+2] == ' '
921                              and info.pertain(l[i+1])):
922                            # Jan of 01
923                            # In this case, 01 is clearly year
924                            try:
925                                value = int(l[i+3])
926                            except ValueError:
927                                # Wrong guess
928                                pass
929                            else:
930                                # Convert it here to become unambiguous
931                                ymd.append(str(info.convertyear(value)))
932                            i += 4
933                    continue
934
935                # Check am/pm
936                value = info.ampm(l[i])
937                if value is not None:
938                    # For fuzzy parsing, 'a' or 'am' (both valid English words)
939                    # may erroneously trigger the AM/PM flag. Deal with that
940                    # here.
941                    val_is_ampm = True
942
943                    # If there's already an AM/PM flag, this one isn't one.
944                    if fuzzy and res.ampm is not None:
945                        val_is_ampm = False
946
947                    # If AM/PM is found and hour is not, raise a ValueError
948                    if res.hour is None:
949                        if fuzzy:
950                            val_is_ampm = False
951                        else:
952                            raise ValueError('No hour specified with ' +
953                                             'AM or PM flag.')
954                    elif not 0 <= res.hour <= 12:
955                        # If AM/PM is found, it's a 12 hour clock, so raise
956                        # an error for invalid range
957                        if fuzzy:
958                            val_is_ampm = False
959                        else:
960                            raise ValueError('Invalid hour specified for ' +
961                                             '12-hour clock.')
962
963                    if val_is_ampm:
964                        if value == 1 and res.hour < 12:
965                            res.hour += 12
966                        elif value == 0 and res.hour == 12:
967                            res.hour = 0
968
969                        res.ampm = value
970
971                    i += 1
972                    continue
973
974                # Check for a timezone name
975                if (res.hour is not None and len(l[i]) <= 5 and
976                        res.tzname is None and res.tzoffset is None and
977                        not [x for x in l[i] if x not in
978                             string.ascii_uppercase]):
979                    res.tzname = l[i]
980                    res.tzoffset = info.tzoffset(res.tzname)
981                    i += 1
982
983                    # Check for something like GMT+3, or BRST+3. Notice
984                    # that it doesn't mean "I am 3 hours after GMT", but
985                    # "my time +3 is GMT". If found, we reverse the
986                    # logic so that timezone parsing code will get it
987                    # right.
988                    if i < len_l and l[i] in ('+', '-'):
989                        l[i] = ('+', '-')[l[i] == '+']
990                        res.tzoffset = None
991                        if info.utczone(res.tzname):
992                            # With something like GMT+3, the timezone
993                            # is *not* GMT.
994                            res.tzname = None
995
996                    continue
997
998                # Check for a numbered timezone
999                if res.hour is not None and l[i] in ('+', '-'):
1000                    signal = (-1, 1)[l[i] == '+']
1001                    i += 1
1002                    len_li = len(l[i])
1003
1004                    if len_li == 4:
1005                        # -0300
1006                        res.tzoffset = int(l[i][:2])*3600+int(l[i][2:])*60
1007                    elif i+1 < len_l and l[i+1] == ':':
1008                        # -03:00
1009                        res.tzoffset = int(l[i])*3600+int(l[i+2])*60
1010                        i += 2
1011                    elif len_li <= 2:
1012                        # -[0]3
1013                        res.tzoffset = int(l[i][:2])*3600
1014                    else:
1015                        return None, None
1016                    i += 1
1017
1018                    res.tzoffset *= signal
1019
1020                    # Look for a timezone name between parenthesis
1021                    if (i+3 < len_l and
1022                        info.jump(l[i]) and l[i+1] == '(' and l[i+3] == ')' and
1023                        3 <= len(l[i+2]) <= 5 and
1024                        not [x for x in l[i+2]
1025                             if x not in string.ascii_uppercase]):
1026                        # -0300 (BRST)
1027                        res.tzname = l[i+2]
1028                        i += 4
1029                    continue
1030
1031                # Check jumps
1032                if not (info.jump(l[i]) or fuzzy):
1033                    return None, None
1034
1035                if last_skipped_token_i == i - 1:
1036                    # recombine the tokens
1037                    skipped_tokens[-1] += l[i]
1038                else:
1039                    # just append
1040                    skipped_tokens.append(l[i])
1041                last_skipped_token_i = i
1042                i += 1
1043
1044            # Process year/month/day
1045            year, month, day = ymd.resolve_ymd(mstridx, yearfirst, dayfirst)
1046            if year is not None:
1047                res.year = year
1048                res.century_specified = ymd.century_specified
1049
1050            if month is not None:
1051                res.month = month
1052
1053            if day is not None:
1054                res.day = day
1055
1056        except (IndexError, ValueError, AssertionError):
1057            return None, None
1058
1059        if not info.validate(res):
1060            return None, None
1061
1062        if fuzzy_with_tokens:
1063            return res, tuple(skipped_tokens)
1064        else:
1065            return res, None
1066
1067DEFAULTPARSER = parser()
1068
1069
1070def parse(timestr, parserinfo=None, **kwargs):
1071    """
1072
1073    Parse a string in one of the supported formats, using the
1074    ``parserinfo`` parameters.
1075
1076    :param timestr:
1077        A string containing a date/time stamp.
1078
1079    :param parserinfo:
1080        A :class:`parserinfo` object containing parameters for the parser.
1081        If ``None``, the default arguments to the :class:`parserinfo`
1082        constructor are used.
1083
1084    The ``**kwargs`` parameter takes the following keyword arguments:
1085
1086    :param default:
1087        The default datetime object, if this is a datetime object and not
1088        ``None``, elements specified in ``timestr`` replace elements in the
1089        default object.
1090
1091    :param ignoretz:
1092        If set ``True``, time zones in parsed strings are ignored and a naive
1093        :class:`datetime` object is returned.
1094
1095    :param tzinfos:
1096            Additional time zone names / aliases which may be present in the
1097            string. This argument maps time zone names (and optionally offsets
1098            from those time zones) to time zones. This parameter can be a
1099            dictionary with timezone aliases mapping time zone names to time
1100            zones or a function taking two parameters (``tzname`` and
1101            ``tzoffset``) and returning a time zone.
1102
1103            The timezones to which the names are mapped can be an integer
1104            offset from UTC in minutes or a :class:`tzinfo` object.
1105
1106            .. doctest::
1107               :options: +NORMALIZE_WHITESPACE
1108
1109                >>> from dateutil.parser import parse
1110                >>> from dateutil.tz import gettz
1111                >>> tzinfos = {"BRST": -10800, "CST": gettz("America/Chicago")}
1112                >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
1113                datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -10800))
1114                >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
1115                datetime.datetime(2012, 1, 19, 17, 21,
1116                                  tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
1117
1118            This parameter is ignored if ``ignoretz`` is set.
1119
1120    :param dayfirst:
1121        Whether to interpret the first value in an ambiguous 3-integer date
1122        (e.g. 01/05/09) as the day (``True``) or month (``False``). If
1123        ``yearfirst`` is set to ``True``, this distinguishes between YDM and
1124        YMD. If set to ``None``, this value is retrieved from the current
1125        :class:`parserinfo` object (which itself defaults to ``False``).
1126
1127    :param yearfirst:
1128        Whether to interpret the first value in an ambiguous 3-integer date
1129        (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
1130        be the year, otherwise the last number is taken to be the year. If
1131        this is set to ``None``, the value is retrieved from the current
1132        :class:`parserinfo` object (which itself defaults to ``False``).
1133
1134    :param fuzzy:
1135        Whether to allow fuzzy parsing, allowing for string like "Today is
1136        January 1, 2047 at 8:21:00AM".
1137
1138    :param fuzzy_with_tokens:
1139        If ``True``, ``fuzzy`` is automatically set to True, and the parser
1140        will return a tuple where the first element is the parsed
1141        :class:`datetime.datetime` datetimestamp and the second element is
1142        a tuple containing the portions of the string which were ignored:
1143
1144        .. doctest::
1145
1146            >>> from dateutil.parser import parse
1147            >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
1148            (datetime.datetime(2011, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
1149
1150    :return:
1151        Returns a :class:`datetime.datetime` object or, if the
1152        ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
1153        first element being a :class:`datetime.datetime` object, the second
1154        a tuple containing the fuzzy tokens.
1155
1156    :raises ValueError:
1157        Raised for invalid or unknown string format, if the provided
1158        :class:`tzinfo` is not in a valid format, or if an invalid date
1159        would be created.
1160
1161    :raises OverflowError:
1162        Raised if the parsed date exceeds the largest valid C integer on
1163        your system.
1164    """
1165    if parserinfo:
1166        return parser(parserinfo).parse(timestr, **kwargs)
1167    else:
1168        return DEFAULTPARSER.parse(timestr, **kwargs)
1169
1170
1171class _tzparser(object):
1172
1173    class _result(_resultbase):
1174
1175        __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
1176                     "start", "end"]
1177
1178        class _attr(_resultbase):
1179            __slots__ = ["month", "week", "weekday",
1180                         "yday", "jyday", "day", "time"]
1181
1182        def __repr__(self):
1183            return self._repr("")
1184
1185        def __init__(self):
1186            _resultbase.__init__(self)
1187            self.start = self._attr()
1188            self.end = self._attr()
1189
1190    def parse(self, tzstr):
1191        res = self._result()
1192        l = _timelex.split(tzstr)
1193        try:
1194
1195            len_l = len(l)
1196
1197            i = 0
1198            while i < len_l:
1199                # BRST+3[BRDT[+2]]
1200                j = i
1201                while j < len_l and not [x for x in l[j]
1202                                         if x in "0123456789:,-+"]:
1203                    j += 1
1204                if j != i:
1205                    if not res.stdabbr:
1206                        offattr = "stdoffset"
1207                        res.stdabbr = "".join(l[i:j])
1208                    else:
1209                        offattr = "dstoffset"
1210                        res.dstabbr = "".join(l[i:j])
1211                    i = j
1212                    if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
1213                                       "0123456789")):
1214                        if l[i] in ('+', '-'):
1215                            # Yes, that's right.  See the TZ variable
1216                            # documentation.
1217                            signal = (1, -1)[l[i] == '+']
1218                            i += 1
1219                        else:
1220                            signal = -1
1221                        len_li = len(l[i])
1222                        if len_li == 4:
1223                            # -0300
1224                            setattr(res, offattr, (int(l[i][:2])*3600 +
1225                                                   int(l[i][2:])*60)*signal)
1226                        elif i+1 < len_l and l[i+1] == ':':
1227                            # -03:00
1228                            setattr(res, offattr,
1229                                    (int(l[i])*3600+int(l[i+2])*60)*signal)
1230                            i += 2
1231                        elif len_li <= 2:
1232                            # -[0]3
1233                            setattr(res, offattr,
1234                                    int(l[i][:2])*3600*signal)
1235                        else:
1236                            return None
1237                        i += 1
1238                    if res.dstabbr:
1239                        break
1240                else:
1241                    break
1242
1243            if i < len_l:
1244                for j in range(i, len_l):
1245                    if l[j] == ';':
1246                        l[j] = ','
1247
1248                assert l[i] == ','
1249
1250                i += 1
1251
1252            if i >= len_l:
1253                pass
1254            elif (8 <= l.count(',') <= 9 and
1255                  not [y for x in l[i:] if x != ','
1256                       for y in x if y not in "0123456789"]):
1257                # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
1258                for x in (res.start, res.end):
1259                    x.month = int(l[i])
1260                    i += 2
1261                    if l[i] == '-':
1262                        value = int(l[i+1])*-1
1263                        i += 1
1264                    else:
1265                        value = int(l[i])
1266                    i += 2
1267                    if value:
1268                        x.week = value
1269                        x.weekday = (int(l[i])-1) % 7
1270                    else:
1271                        x.day = int(l[i])
1272                    i += 2
1273                    x.time = int(l[i])
1274                    i += 2
1275                if i < len_l:
1276                    if l[i] in ('-', '+'):
1277                        signal = (-1, 1)[l[i] == "+"]
1278                        i += 1
1279                    else:
1280                        signal = 1
1281                    res.dstoffset = (res.stdoffset+int(l[i]))*signal
1282            elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
1283                  not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
1284                                                     '.', '-', ':')
1285                       for y in x if y not in "0123456789"]):
1286                for x in (res.start, res.end):
1287                    if l[i] == 'J':
1288                        # non-leap year day (1 based)
1289                        i += 1
1290                        x.jyday = int(l[i])
1291                    elif l[i] == 'M':
1292                        # month[-.]week[-.]weekday
1293                        i += 1
1294                        x.month = int(l[i])
1295                        i += 1
1296                        assert l[i] in ('-', '.')
1297                        i += 1
1298                        x.week = int(l[i])
1299                        if x.week == 5:
1300                            x.week = -1
1301                        i += 1
1302                        assert l[i] in ('-', '.')
1303                        i += 1
1304                        x.weekday = (int(l[i])-1) % 7
1305                    else:
1306                        # year day (zero based)
1307                        x.yday = int(l[i])+1
1308
1309                    i += 1
1310
1311                    if i < len_l and l[i] == '/':
1312                        i += 1
1313                        # start time
1314                        len_li = len(l[i])
1315                        if len_li == 4:
1316                            # -0300
1317                            x.time = (int(l[i][:2])*3600+int(l[i][2:])*60)
1318                        elif i+1 < len_l and l[i+1] == ':':
1319                            # -03:00
1320                            x.time = int(l[i])*3600+int(l[i+2])*60
1321                            i += 2
1322                            if i+1 < len_l and l[i+1] == ':':
1323                                i += 2
1324                                x.time += int(l[i])
1325                        elif len_li <= 2:
1326                            # -[0]3
1327                            x.time = (int(l[i][:2])*3600)
1328                        else:
1329                            return None
1330                        i += 1
1331
1332                    assert i == len_l or l[i] == ','
1333
1334                    i += 1
1335
1336                assert i >= len_l
1337
1338        except (IndexError, ValueError, AssertionError):
1339            return None
1340
1341        return res
1342
1343
1344DEFAULTTZPARSER = _tzparser()
1345
1346
1347def _parsetz(tzstr):
1348    return DEFAULTTZPARSER.parse(tzstr)
1349
1350
1351def _parsems(value):
1352    """Parse a I[.F] seconds value into (seconds, microseconds)."""
1353    if "." not in value:
1354        return int(value), 0
1355    else:
1356        i, f = value.split(".")
1357        return int(i), int(f.ljust(6, "0")[:6])
1358
1359
1360# vim:ts=4:sw=4:et
1361