1# -*- coding: utf-8 -*- 2""" 3This module offers a generic date/time string parser which is able to parse 4most known formats to represent a date and/or time. 5 6This module attempts to be forgiving with regards to unlikely input formats, 7returning a datetime object even for dates which are ambiguous. If an element 8of a date/time stamp is omitted, the following rules are applied: 9 10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour 11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is 12 specified. 13- If a time zone is omitted, a timezone-naive datetime is returned. 14 15If any other elements are missing, they are taken from the 16:class:`datetime.datetime` object passed to the parameter ``default``. If this 17results in a day number exceeding the valid number of days per month, the 18value falls back to the end of the month. 19 20Additional resources about date/time string formats can be found below: 21 22- `A summary of the international standard date and time notation 23 <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ 24- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_ 25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ 26- `CPAN ParseDate module 27 <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ 28- `Java SimpleDateFormat Class 29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ 30""" 31from __future__ import unicode_literals 32 33import datetime 34import re 35import string 36import time 37import warnings 38 39from calendar import monthrange 40from io import StringIO 41 42import six 43from six import integer_types, text_type 44 45from decimal import Decimal 46 47from warnings import warn 48 49from .. import relativedelta 50from .. import tz 51 52__all__ = ["parse", "parserinfo", "ParserError"] 53 54 55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth 56# making public and/or figuring out if there is something we can 57# take off their plate. 58class _timelex(object): 59 # Fractional seconds are sometimes split by a comma 60 _split_decimal = re.compile("([.,])") 61 62 def __init__(self, instream): 63 if six.PY2: 64 # In Python 2, we can't duck type properly because unicode has 65 # a 'decode' function, and we'd be double-decoding 66 if isinstance(instream, (bytes, bytearray)): 67 instream = instream.decode() 68 else: 69 if getattr(instream, 'decode', None) is not None: 70 instream = instream.decode() 71 72 if isinstance(instream, text_type): 73 instream = StringIO(instream) 74 elif getattr(instream, 'read', None) is None: 75 raise TypeError('Parser must be a string or character stream, not ' 76 '{itype}'.format(itype=instream.__class__.__name__)) 77 78 self.instream = instream 79 self.charstack = [] 80 self.tokenstack = [] 81 self.eof = False 82 83 def get_token(self): 84 """ 85 This function breaks the time string into lexical units (tokens), which 86 can be parsed by the parser. Lexical units are demarcated by changes in 87 the character set, so any continuous string of letters is considered 88 one unit, any continuous string of numbers is considered one unit. 89 90 The main complication arises from the fact that dots ('.') can be used 91 both as separators (e.g. "Sep.20.2009") or decimal points (e.g. 92 "4:30:21.447"). As such, it is necessary to read the full context of 93 any dot-separated strings before breaking it into tokens; as such, this 94 function maintains a "token stack", for when the ambiguous context 95 demands that multiple tokens be parsed at once. 96 """ 97 if self.tokenstack: 98 return self.tokenstack.pop(0) 99 100 seenletters = False 101 token = None 102 state = None 103 104 while not self.eof: 105 # We only realize that we've reached the end of a token when we 106 # find a character that's not part of the current token - since 107 # that character may be part of the next token, it's stored in the 108 # charstack. 109 if self.charstack: 110 nextchar = self.charstack.pop(0) 111 else: 112 nextchar = self.instream.read(1) 113 while nextchar == '\x00': 114 nextchar = self.instream.read(1) 115 116 if not nextchar: 117 self.eof = True 118 break 119 elif not state: 120 # First character of the token - determines if we're starting 121 # to parse a word, a number or something else. 122 token = nextchar 123 if self.isword(nextchar): 124 state = 'a' 125 elif self.isnum(nextchar): 126 state = '0' 127 elif self.isspace(nextchar): 128 token = ' ' 129 break # emit token 130 else: 131 break # emit token 132 elif state == 'a': 133 # If we've already started reading a word, we keep reading 134 # letters until we find something that's not part of a word. 135 seenletters = True 136 if self.isword(nextchar): 137 token += nextchar 138 elif nextchar == '.': 139 token += nextchar 140 state = 'a.' 141 else: 142 self.charstack.append(nextchar) 143 break # emit token 144 elif state == '0': 145 # If we've already started reading a number, we keep reading 146 # numbers until we find something that doesn't fit. 147 if self.isnum(nextchar): 148 token += nextchar 149 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): 150 token += nextchar 151 state = '0.' 152 else: 153 self.charstack.append(nextchar) 154 break # emit token 155 elif state == 'a.': 156 # If we've seen some letters and a dot separator, continue 157 # parsing, and the tokens will be broken up later. 158 seenletters = True 159 if nextchar == '.' or self.isword(nextchar): 160 token += nextchar 161 elif self.isnum(nextchar) and token[-1] == '.': 162 token += nextchar 163 state = '0.' 164 else: 165 self.charstack.append(nextchar) 166 break # emit token 167 elif state == '0.': 168 # If we've seen at least one dot separator, keep going, we'll 169 # break up the tokens later. 170 if nextchar == '.' or self.isnum(nextchar): 171 token += nextchar 172 elif self.isword(nextchar) and token[-1] == '.': 173 token += nextchar 174 state = 'a.' 175 else: 176 self.charstack.append(nextchar) 177 break # emit token 178 179 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or 180 token[-1] in '.,')): 181 l = self._split_decimal.split(token) 182 token = l[0] 183 for tok in l[1:]: 184 if tok: 185 self.tokenstack.append(tok) 186 187 if state == '0.' and token.count('.') == 0: 188 token = token.replace(',', '.') 189 190 return token 191 192 def __iter__(self): 193 return self 194 195 def __next__(self): 196 token = self.get_token() 197 if token is None: 198 raise StopIteration 199 200 return token 201 202 def next(self): 203 return self.__next__() # Python 2.x support 204 205 @classmethod 206 def split(cls, s): 207 return list(cls(s)) 208 209 @classmethod 210 def isword(cls, nextchar): 211 """ Whether or not the next character is part of a word """ 212 return nextchar.isalpha() 213 214 @classmethod 215 def isnum(cls, nextchar): 216 """ Whether the next character is part of a number """ 217 return nextchar.isdigit() 218 219 @classmethod 220 def isspace(cls, nextchar): 221 """ Whether the next character is whitespace """ 222 return nextchar.isspace() 223 224 225class _resultbase(object): 226 227 def __init__(self): 228 for attr in self.__slots__: 229 setattr(self, attr, None) 230 231 def _repr(self, classname): 232 l = [] 233 for attr in self.__slots__: 234 value = getattr(self, attr) 235 if value is not None: 236 l.append("%s=%s" % (attr, repr(value))) 237 return "%s(%s)" % (classname, ", ".join(l)) 238 239 def __len__(self): 240 return (sum(getattr(self, attr) is not None 241 for attr in self.__slots__)) 242 243 def __repr__(self): 244 return self._repr(self.__class__.__name__) 245 246 247class parserinfo(object): 248 """ 249 Class which handles what inputs are accepted. Subclass this to customize 250 the language and acceptable values for each parameter. 251 252 :param dayfirst: 253 Whether to interpret the first value in an ambiguous 3-integer date 254 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 255 ``yearfirst`` is set to ``True``, this distinguishes between YDM 256 and YMD. Default is ``False``. 257 258 :param yearfirst: 259 Whether to interpret the first value in an ambiguous 3-integer date 260 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 261 to be the year, otherwise the last number is taken to be the year. 262 Default is ``False``. 263 """ 264 265 # m from a.m/p.m, t from ISO T separator 266 JUMP = [" ", ".", ",", ";", "-", "/", "'", 267 "at", "on", "and", "ad", "m", "t", "of", 268 "st", "nd", "rd", "th"] 269 270 WEEKDAYS = [("Mon", "Monday"), 271 ("Tue", "Tuesday"), # TODO: "Tues" 272 ("Wed", "Wednesday"), 273 ("Thu", "Thursday"), # TODO: "Thurs" 274 ("Fri", "Friday"), 275 ("Sat", "Saturday"), 276 ("Sun", "Sunday")] 277 MONTHS = [("Jan", "January"), 278 ("Feb", "February"), # TODO: "Febr" 279 ("Mar", "March"), 280 ("Apr", "April"), 281 ("May", "May"), 282 ("Jun", "June"), 283 ("Jul", "July"), 284 ("Aug", "August"), 285 ("Sep", "Sept", "September"), 286 ("Oct", "October"), 287 ("Nov", "November"), 288 ("Dec", "December")] 289 HMS = [("h", "hour", "hours"), 290 ("m", "minute", "minutes"), 291 ("s", "second", "seconds")] 292 AMPM = [("am", "a"), 293 ("pm", "p")] 294 UTCZONE = ["UTC", "GMT", "Z", "z"] 295 PERTAIN = ["of"] 296 TZOFFSET = {} 297 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", 298 # "Anno Domini", "Year of Our Lord"] 299 300 def __init__(self, dayfirst=False, yearfirst=False): 301 self._jump = self._convert(self.JUMP) 302 self._weekdays = self._convert(self.WEEKDAYS) 303 self._months = self._convert(self.MONTHS) 304 self._hms = self._convert(self.HMS) 305 self._ampm = self._convert(self.AMPM) 306 self._utczone = self._convert(self.UTCZONE) 307 self._pertain = self._convert(self.PERTAIN) 308 309 self.dayfirst = dayfirst 310 self.yearfirst = yearfirst 311 312 self._year = time.localtime().tm_year 313 self._century = self._year // 100 * 100 314 315 def _convert(self, lst): 316 dct = {} 317 for i, v in enumerate(lst): 318 if isinstance(v, tuple): 319 for v in v: 320 dct[v.lower()] = i 321 else: 322 dct[v.lower()] = i 323 return dct 324 325 def jump(self, name): 326 return name.lower() in self._jump 327 328 def weekday(self, name): 329 try: 330 return self._weekdays[name.lower()] 331 except KeyError: 332 pass 333 return None 334 335 def month(self, name): 336 try: 337 return self._months[name.lower()] + 1 338 except KeyError: 339 pass 340 return None 341 342 def hms(self, name): 343 try: 344 return self._hms[name.lower()] 345 except KeyError: 346 return None 347 348 def ampm(self, name): 349 try: 350 return self._ampm[name.lower()] 351 except KeyError: 352 return None 353 354 def pertain(self, name): 355 return name.lower() in self._pertain 356 357 def utczone(self, name): 358 return name.lower() in self._utczone 359 360 def tzoffset(self, name): 361 if name in self._utczone: 362 return 0 363 364 return self.TZOFFSET.get(name) 365 366 def convertyear(self, year, century_specified=False): 367 """ 368 Converts two-digit years to year within [-50, 49] 369 range of self._year (current local time) 370 """ 371 372 # Function contract is that the year is always positive 373 assert year >= 0 374 375 if year < 100 and not century_specified: 376 # assume current century to start 377 year += self._century 378 379 if year >= self._year + 50: # if too far in future 380 year -= 100 381 elif year < self._year - 50: # if too far in past 382 year += 100 383 384 return year 385 386 def validate(self, res): 387 # move to info 388 if res.year is not None: 389 res.year = self.convertyear(res.year, res.century_specified) 390 391 if ((res.tzoffset == 0 and not res.tzname) or 392 (res.tzname == 'Z' or res.tzname == 'z')): 393 res.tzname = "UTC" 394 res.tzoffset = 0 395 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): 396 res.tzoffset = 0 397 return True 398 399 400class _ymd(list): 401 def __init__(self, *args, **kwargs): 402 super(self.__class__, self).__init__(*args, **kwargs) 403 self.century_specified = False 404 self.dstridx = None 405 self.mstridx = None 406 self.ystridx = None 407 408 @property 409 def has_year(self): 410 return self.ystridx is not None 411 412 @property 413 def has_month(self): 414 return self.mstridx is not None 415 416 @property 417 def has_day(self): 418 return self.dstridx is not None 419 420 def could_be_day(self, value): 421 if self.has_day: 422 return False 423 elif not self.has_month: 424 return 1 <= value <= 31 425 elif not self.has_year: 426 # Be permissive, assume leap year 427 month = self[self.mstridx] 428 return 1 <= value <= monthrange(2000, month)[1] 429 else: 430 month = self[self.mstridx] 431 year = self[self.ystridx] 432 return 1 <= value <= monthrange(year, month)[1] 433 434 def append(self, val, label=None): 435 if hasattr(val, '__len__'): 436 if val.isdigit() and len(val) > 2: 437 self.century_specified = True 438 if label not in [None, 'Y']: # pragma: no cover 439 raise ValueError(label) 440 label = 'Y' 441 elif val > 100: 442 self.century_specified = True 443 if label not in [None, 'Y']: # pragma: no cover 444 raise ValueError(label) 445 label = 'Y' 446 447 super(self.__class__, self).append(int(val)) 448 449 if label == 'M': 450 if self.has_month: 451 raise ValueError('Month is already set') 452 self.mstridx = len(self) - 1 453 elif label == 'D': 454 if self.has_day: 455 raise ValueError('Day is already set') 456 self.dstridx = len(self) - 1 457 elif label == 'Y': 458 if self.has_year: 459 raise ValueError('Year is already set') 460 self.ystridx = len(self) - 1 461 462 def _resolve_from_stridxs(self, strids): 463 """ 464 Try to resolve the identities of year/month/day elements using 465 ystridx, mstridx, and dstridx, if enough of these are specified. 466 """ 467 if len(self) == 3 and len(strids) == 2: 468 # we can back out the remaining stridx value 469 missing = [x for x in range(3) if x not in strids.values()] 470 key = [x for x in ['y', 'm', 'd'] if x not in strids] 471 assert len(missing) == len(key) == 1 472 key = key[0] 473 val = missing[0] 474 strids[key] = val 475 476 assert len(self) == len(strids) # otherwise this should not be called 477 out = {key: self[strids[key]] for key in strids} 478 return (out.get('y'), out.get('m'), out.get('d')) 479 480 def resolve_ymd(self, yearfirst, dayfirst): 481 len_ymd = len(self) 482 year, month, day = (None, None, None) 483 484 strids = (('y', self.ystridx), 485 ('m', self.mstridx), 486 ('d', self.dstridx)) 487 488 strids = {key: val for key, val in strids if val is not None} 489 if (len(self) == len(strids) > 0 or 490 (len(self) == 3 and len(strids) == 2)): 491 return self._resolve_from_stridxs(strids) 492 493 mstridx = self.mstridx 494 495 if len_ymd > 3: 496 raise ValueError("More than three YMD values") 497 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): 498 # One member, or two members with a month string 499 if mstridx is not None: 500 month = self[mstridx] 501 # since mstridx is 0 or 1, self[mstridx-1] always 502 # looks up the other element 503 other = self[mstridx - 1] 504 else: 505 other = self[0] 506 507 if len_ymd > 1 or mstridx is None: 508 if other > 31: 509 year = other 510 else: 511 day = other 512 513 elif len_ymd == 2: 514 # Two members with numbers 515 if self[0] > 31: 516 # 99-01 517 year, month = self 518 elif self[1] > 31: 519 # 01-99 520 month, year = self 521 elif dayfirst and self[1] <= 12: 522 # 13-01 523 day, month = self 524 else: 525 # 01-13 526 month, day = self 527 528 elif len_ymd == 3: 529 # Three members 530 if mstridx == 0: 531 if self[1] > 31: 532 # Apr-2003-25 533 month, year, day = self 534 else: 535 month, day, year = self 536 elif mstridx == 1: 537 if self[0] > 31 or (yearfirst and self[2] <= 31): 538 # 99-Jan-01 539 year, month, day = self 540 else: 541 # 01-Jan-01 542 # Give precedence to day-first, since 543 # two-digit years is usually hand-written. 544 day, month, year = self 545 546 elif mstridx == 2: 547 # WTF!? 548 if self[1] > 31: 549 # 01-99-Jan 550 day, year, month = self 551 else: 552 # 99-01-Jan 553 year, day, month = self 554 555 else: 556 if (self[0] > 31 or 557 self.ystridx == 0 or 558 (yearfirst and self[1] <= 12 and self[2] <= 31)): 559 # 99-01-01 560 if dayfirst and self[2] <= 12: 561 year, day, month = self 562 else: 563 year, month, day = self 564 elif self[0] > 12 or (dayfirst and self[1] <= 12): 565 # 13-01-01 566 day, month, year = self 567 else: 568 # 01-13-01 569 month, day, year = self 570 571 return year, month, day 572 573 574class parser(object): 575 def __init__(self, info=None): 576 self.info = info or parserinfo() 577 578 def parse(self, timestr, default=None, 579 ignoretz=False, tzinfos=None, **kwargs): 580 """ 581 Parse the date/time string into a :class:`datetime.datetime` object. 582 583 :param timestr: 584 Any date/time string using the supported formats. 585 586 :param default: 587 The default datetime object, if this is a datetime object and not 588 ``None``, elements specified in ``timestr`` replace elements in the 589 default object. 590 591 :param ignoretz: 592 If set ``True``, time zones in parsed strings are ignored and a 593 naive :class:`datetime.datetime` object is returned. 594 595 :param tzinfos: 596 Additional time zone names / aliases which may be present in the 597 string. This argument maps time zone names (and optionally offsets 598 from those time zones) to time zones. This parameter can be a 599 dictionary with timezone aliases mapping time zone names to time 600 zones or a function taking two parameters (``tzname`` and 601 ``tzoffset``) and returning a time zone. 602 603 The timezones to which the names are mapped can be an integer 604 offset from UTC in seconds or a :class:`tzinfo` object. 605 606 .. doctest:: 607 :options: +NORMALIZE_WHITESPACE 608 609 >>> from dateutil.parser import parse 610 >>> from dateutil.tz import gettz 611 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 612 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 613 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 614 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 615 datetime.datetime(2012, 1, 19, 17, 21, 616 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 617 618 This parameter is ignored if ``ignoretz`` is set. 619 620 :param \\*\\*kwargs: 621 Keyword arguments as passed to ``_parse()``. 622 623 :return: 624 Returns a :class:`datetime.datetime` object or, if the 625 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 626 first element being a :class:`datetime.datetime` object, the second 627 a tuple containing the fuzzy tokens. 628 629 :raises ParserError: 630 Raised for invalid or unknown string format, if the provided 631 :class:`tzinfo` is not in a valid format, or if an invalid date 632 would be created. 633 634 :raises TypeError: 635 Raised for non-string or character stream input. 636 637 :raises OverflowError: 638 Raised if the parsed date exceeds the largest valid C integer on 639 your system. 640 """ 641 642 if default is None: 643 default = datetime.datetime.now().replace(hour=0, minute=0, 644 second=0, microsecond=0) 645 646 res, skipped_tokens = self._parse(timestr, **kwargs) 647 648 if res is None: 649 raise ParserError("Unknown string format: %s", timestr) 650 651 if len(res) == 0: 652 raise ParserError("String does not contain a date: %s", timestr) 653 654 try: 655 ret = self._build_naive(res, default) 656 except ValueError as e: 657 six.raise_from(ParserError(e.args[0] + ": %s", timestr), e) 658 659 if not ignoretz: 660 ret = self._build_tzaware(ret, res, tzinfos) 661 662 if kwargs.get('fuzzy_with_tokens', False): 663 return ret, skipped_tokens 664 else: 665 return ret 666 667 class _result(_resultbase): 668 __slots__ = ["year", "month", "day", "weekday", 669 "hour", "minute", "second", "microsecond", 670 "tzname", "tzoffset", "ampm","any_unused_tokens"] 671 672 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, 673 fuzzy_with_tokens=False): 674 """ 675 Private method which performs the heavy lifting of parsing, called from 676 ``parse()``, which passes on its ``kwargs`` to this function. 677 678 :param timestr: 679 The string to parse. 680 681 :param dayfirst: 682 Whether to interpret the first value in an ambiguous 3-integer date 683 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 684 ``yearfirst`` is set to ``True``, this distinguishes between YDM 685 and YMD. If set to ``None``, this value is retrieved from the 686 current :class:`parserinfo` object (which itself defaults to 687 ``False``). 688 689 :param yearfirst: 690 Whether to interpret the first value in an ambiguous 3-integer date 691 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 692 to be the year, otherwise the last number is taken to be the year. 693 If this is set to ``None``, the value is retrieved from the current 694 :class:`parserinfo` object (which itself defaults to ``False``). 695 696 :param fuzzy: 697 Whether to allow fuzzy parsing, allowing for string like "Today is 698 January 1, 2047 at 8:21:00AM". 699 700 :param fuzzy_with_tokens: 701 If ``True``, ``fuzzy`` is automatically set to True, and the parser 702 will return a tuple where the first element is the parsed 703 :class:`datetime.datetime` datetimestamp and the second element is 704 a tuple containing the portions of the string which were ignored: 705 706 .. doctest:: 707 708 >>> from dateutil.parser import parse 709 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 710 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 711 712 """ 713 if fuzzy_with_tokens: 714 fuzzy = True 715 716 info = self.info 717 718 if dayfirst is None: 719 dayfirst = info.dayfirst 720 721 if yearfirst is None: 722 yearfirst = info.yearfirst 723 724 res = self._result() 725 l = _timelex.split(timestr) # Splits the timestr into tokens 726 727 skipped_idxs = [] 728 729 # year/month/day list 730 ymd = _ymd() 731 732 len_l = len(l) 733 i = 0 734 try: 735 while i < len_l: 736 737 # Check if it's a number 738 value_repr = l[i] 739 try: 740 value = float(value_repr) 741 except ValueError: 742 value = None 743 744 if value is not None: 745 # Numeric token 746 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) 747 748 # Check weekday 749 elif info.weekday(l[i]) is not None: 750 value = info.weekday(l[i]) 751 res.weekday = value 752 753 # Check month name 754 elif info.month(l[i]) is not None: 755 value = info.month(l[i]) 756 ymd.append(value, 'M') 757 758 if i + 1 < len_l: 759 if l[i + 1] in ('-', '/'): 760 # Jan-01[-99] 761 sep = l[i + 1] 762 ymd.append(l[i + 2]) 763 764 if i + 3 < len_l and l[i + 3] == sep: 765 # Jan-01-99 766 ymd.append(l[i + 4]) 767 i += 2 768 769 i += 2 770 771 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and 772 info.pertain(l[i + 2])): 773 # Jan of 01 774 # In this case, 01 is clearly year 775 if l[i + 4].isdigit(): 776 # Convert it here to become unambiguous 777 value = int(l[i + 4]) 778 year = str(info.convertyear(value)) 779 ymd.append(year, 'Y') 780 else: 781 # Wrong guess 782 pass 783 # TODO: not hit in tests 784 i += 4 785 786 # Check am/pm 787 elif info.ampm(l[i]) is not None: 788 value = info.ampm(l[i]) 789 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) 790 791 if val_is_ampm: 792 res.hour = self._adjust_ampm(res.hour, value) 793 res.ampm = value 794 795 elif fuzzy: 796 skipped_idxs.append(i) 797 798 # Check for a timezone name 799 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): 800 res.tzname = l[i] 801 res.tzoffset = info.tzoffset(res.tzname) 802 803 # Check for something like GMT+3, or BRST+3. Notice 804 # that it doesn't mean "I am 3 hours after GMT", but 805 # "my time +3 is GMT". If found, we reverse the 806 # logic so that timezone parsing code will get it 807 # right. 808 if i + 1 < len_l and l[i + 1] in ('+', '-'): 809 l[i + 1] = ('+', '-')[l[i + 1] == '+'] 810 res.tzoffset = None 811 if info.utczone(res.tzname): 812 # With something like GMT+3, the timezone 813 # is *not* GMT. 814 res.tzname = None 815 816 # Check for a numbered timezone 817 elif res.hour is not None and l[i] in ('+', '-'): 818 signal = (-1, 1)[l[i] == '+'] 819 len_li = len(l[i + 1]) 820 821 # TODO: check that l[i + 1] is integer? 822 if len_li == 4: 823 # -0300 824 hour_offset = int(l[i + 1][:2]) 825 min_offset = int(l[i + 1][2:]) 826 elif i + 2 < len_l and l[i + 2] == ':': 827 # -03:00 828 hour_offset = int(l[i + 1]) 829 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? 830 i += 2 831 elif len_li <= 2: 832 # -[0]3 833 hour_offset = int(l[i + 1][:2]) 834 min_offset = 0 835 else: 836 raise ValueError(timestr) 837 838 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) 839 840 # Look for a timezone name between parenthesis 841 if (i + 5 < len_l and 842 info.jump(l[i + 2]) and l[i + 3] == '(' and 843 l[i + 5] == ')' and 844 3 <= len(l[i + 4]) and 845 self._could_be_tzname(res.hour, res.tzname, 846 None, l[i + 4])): 847 # -0300 (BRST) 848 res.tzname = l[i + 4] 849 i += 4 850 851 i += 1 852 853 # Check jumps 854 elif not (info.jump(l[i]) or fuzzy): 855 raise ValueError(timestr) 856 857 else: 858 skipped_idxs.append(i) 859 i += 1 860 861 # Process year/month/day 862 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) 863 864 res.century_specified = ymd.century_specified 865 res.year = year 866 res.month = month 867 res.day = day 868 869 except (IndexError, ValueError): 870 return None, None 871 872 if not info.validate(res): 873 return None, None 874 875 if fuzzy_with_tokens: 876 skipped_tokens = self._recombine_skipped(l, skipped_idxs) 877 return res, tuple(skipped_tokens) 878 else: 879 return res, None 880 881 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): 882 # Token is a number 883 value_repr = tokens[idx] 884 try: 885 value = self._to_decimal(value_repr) 886 except Exception as e: 887 six.raise_from(ValueError('Unknown numeric token'), e) 888 889 len_li = len(value_repr) 890 891 len_l = len(tokens) 892 893 if (len(ymd) == 3 and len_li in (2, 4) and 894 res.hour is None and 895 (idx + 1 >= len_l or 896 (tokens[idx + 1] != ':' and 897 info.hms(tokens[idx + 1]) is None))): 898 # 19990101T23[59] 899 s = tokens[idx] 900 res.hour = int(s[:2]) 901 902 if len_li == 4: 903 res.minute = int(s[2:]) 904 905 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): 906 # YYMMDD or HHMMSS[.ss] 907 s = tokens[idx] 908 909 if not ymd and '.' not in tokens[idx]: 910 ymd.append(s[:2]) 911 ymd.append(s[2:4]) 912 ymd.append(s[4:]) 913 else: 914 # 19990101T235959[.59] 915 916 # TODO: Check if res attributes already set. 917 res.hour = int(s[:2]) 918 res.minute = int(s[2:4]) 919 res.second, res.microsecond = self._parsems(s[4:]) 920 921 elif len_li in (8, 12, 14): 922 # YYYYMMDD 923 s = tokens[idx] 924 ymd.append(s[:4], 'Y') 925 ymd.append(s[4:6]) 926 ymd.append(s[6:8]) 927 928 if len_li > 8: 929 res.hour = int(s[8:10]) 930 res.minute = int(s[10:12]) 931 932 if len_li > 12: 933 res.second = int(s[12:]) 934 935 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: 936 # HH[ ]h or MM[ ]m or SS[.ss][ ]s 937 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) 938 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) 939 if hms is not None: 940 # TODO: checking that hour/minute/second are not 941 # already set? 942 self._assign_hms(res, value_repr, hms) 943 944 elif idx + 2 < len_l and tokens[idx + 1] == ':': 945 # HH:MM[:SS[.ss]] 946 res.hour = int(value) 947 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? 948 (res.minute, res.second) = self._parse_min_sec(value) 949 950 if idx + 4 < len_l and tokens[idx + 3] == ':': 951 res.second, res.microsecond = self._parsems(tokens[idx + 4]) 952 953 idx += 2 954 955 idx += 2 956 957 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): 958 sep = tokens[idx + 1] 959 ymd.append(value_repr) 960 961 if idx + 2 < len_l and not info.jump(tokens[idx + 2]): 962 if tokens[idx + 2].isdigit(): 963 # 01-01[-01] 964 ymd.append(tokens[idx + 2]) 965 else: 966 # 01-Jan[-01] 967 value = info.month(tokens[idx + 2]) 968 969 if value is not None: 970 ymd.append(value, 'M') 971 else: 972 raise ValueError() 973 974 if idx + 3 < len_l and tokens[idx + 3] == sep: 975 # We have three members 976 value = info.month(tokens[idx + 4]) 977 978 if value is not None: 979 ymd.append(value, 'M') 980 else: 981 ymd.append(tokens[idx + 4]) 982 idx += 2 983 984 idx += 1 985 idx += 1 986 987 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): 988 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: 989 # 12 am 990 hour = int(value) 991 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) 992 idx += 1 993 else: 994 # Year, month or day 995 ymd.append(value) 996 idx += 1 997 998 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): 999 # 12am 1000 hour = int(value) 1001 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) 1002 idx += 1 1003 1004 elif ymd.could_be_day(value): 1005 ymd.append(value) 1006 1007 elif not fuzzy: 1008 raise ValueError() 1009 1010 return idx 1011 1012 def _find_hms_idx(self, idx, tokens, info, allow_jump): 1013 len_l = len(tokens) 1014 1015 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: 1016 # There is an "h", "m", or "s" label following this token. We take 1017 # assign the upcoming label to the current token. 1018 # e.g. the "12" in 12h" 1019 hms_idx = idx + 1 1020 1021 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and 1022 info.hms(tokens[idx+2]) is not None): 1023 # There is a space and then an "h", "m", or "s" label. 1024 # e.g. the "12" in "12 h" 1025 hms_idx = idx + 2 1026 1027 elif idx > 0 and info.hms(tokens[idx-1]) is not None: 1028 # There is a "h", "m", or "s" preceding this token. Since neither 1029 # of the previous cases was hit, there is no label following this 1030 # token, so we use the previous label. 1031 # e.g. the "04" in "12h04" 1032 hms_idx = idx-1 1033 1034 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and 1035 info.hms(tokens[idx-2]) is not None): 1036 # If we are looking at the final token, we allow for a 1037 # backward-looking check to skip over a space. 1038 # TODO: Are we sure this is the right condition here? 1039 hms_idx = idx - 2 1040 1041 else: 1042 hms_idx = None 1043 1044 return hms_idx 1045 1046 def _assign_hms(self, res, value_repr, hms): 1047 # See GH issue #427, fixing float rounding 1048 value = self._to_decimal(value_repr) 1049 1050 if hms == 0: 1051 # Hour 1052 res.hour = int(value) 1053 if value % 1: 1054 res.minute = int(60*(value % 1)) 1055 1056 elif hms == 1: 1057 (res.minute, res.second) = self._parse_min_sec(value) 1058 1059 elif hms == 2: 1060 (res.second, res.microsecond) = self._parsems(value_repr) 1061 1062 def _could_be_tzname(self, hour, tzname, tzoffset, token): 1063 return (hour is not None and 1064 tzname is None and 1065 tzoffset is None and 1066 len(token) <= 5 and 1067 (all(x in string.ascii_uppercase for x in token) 1068 or token in self.info.UTCZONE)) 1069 1070 def _ampm_valid(self, hour, ampm, fuzzy): 1071 """ 1072 For fuzzy parsing, 'a' or 'am' (both valid English words) 1073 may erroneously trigger the AM/PM flag. Deal with that 1074 here. 1075 """ 1076 val_is_ampm = True 1077 1078 # If there's already an AM/PM flag, this one isn't one. 1079 if fuzzy and ampm is not None: 1080 val_is_ampm = False 1081 1082 # If AM/PM is found and hour is not, raise a ValueError 1083 if hour is None: 1084 if fuzzy: 1085 val_is_ampm = False 1086 else: 1087 raise ValueError('No hour specified with AM or PM flag.') 1088 elif not 0 <= hour <= 12: 1089 # If AM/PM is found, it's a 12 hour clock, so raise 1090 # an error for invalid range 1091 if fuzzy: 1092 val_is_ampm = False 1093 else: 1094 raise ValueError('Invalid hour specified for 12-hour clock.') 1095 1096 return val_is_ampm 1097 1098 def _adjust_ampm(self, hour, ampm): 1099 if hour < 12 and ampm == 1: 1100 hour += 12 1101 elif hour == 12 and ampm == 0: 1102 hour = 0 1103 return hour 1104 1105 def _parse_min_sec(self, value): 1106 # TODO: Every usage of this function sets res.second to the return 1107 # value. Are there any cases where second will be returned as None and 1108 # we *don't* want to set res.second = None? 1109 minute = int(value) 1110 second = None 1111 1112 sec_remainder = value % 1 1113 if sec_remainder: 1114 second = int(60 * sec_remainder) 1115 return (minute, second) 1116 1117 def _parse_hms(self, idx, tokens, info, hms_idx): 1118 # TODO: Is this going to admit a lot of false-positives for when we 1119 # just happen to have digits and "h", "m" or "s" characters in non-date 1120 # text? I guess hex hashes won't have that problem, but there's plenty 1121 # of random junk out there. 1122 if hms_idx is None: 1123 hms = None 1124 new_idx = idx 1125 elif hms_idx > idx: 1126 hms = info.hms(tokens[hms_idx]) 1127 new_idx = hms_idx 1128 else: 1129 # Looking backwards, increment one. 1130 hms = info.hms(tokens[hms_idx]) + 1 1131 new_idx = idx 1132 1133 return (new_idx, hms) 1134 1135 # ------------------------------------------------------------------ 1136 # Handling for individual tokens. These are kept as methods instead 1137 # of functions for the sake of customizability via subclassing. 1138 1139 def _parsems(self, value): 1140 """Parse a I[.F] seconds value into (seconds, microseconds).""" 1141 if "." not in value: 1142 return int(value), 0 1143 else: 1144 i, f = value.split(".") 1145 return int(i), int(f.ljust(6, "0")[:6]) 1146 1147 def _to_decimal(self, val): 1148 try: 1149 decimal_value = Decimal(val) 1150 # See GH 662, edge case, infinite value should not be converted 1151 # via `_to_decimal` 1152 if not decimal_value.is_finite(): 1153 raise ValueError("Converted decimal value is infinite or NaN") 1154 except Exception as e: 1155 msg = "Could not convert %s to decimal" % val 1156 six.raise_from(ValueError(msg), e) 1157 else: 1158 return decimal_value 1159 1160 # ------------------------------------------------------------------ 1161 # Post-Parsing construction of datetime output. These are kept as 1162 # methods instead of functions for the sake of customizability via 1163 # subclassing. 1164 1165 def _build_tzinfo(self, tzinfos, tzname, tzoffset): 1166 if callable(tzinfos): 1167 tzdata = tzinfos(tzname, tzoffset) 1168 else: 1169 tzdata = tzinfos.get(tzname) 1170 # handle case where tzinfo is paased an options that returns None 1171 # eg tzinfos = {'BRST' : None} 1172 if isinstance(tzdata, datetime.tzinfo) or tzdata is None: 1173 tzinfo = tzdata 1174 elif isinstance(tzdata, text_type): 1175 tzinfo = tz.tzstr(tzdata) 1176 elif isinstance(tzdata, integer_types): 1177 tzinfo = tz.tzoffset(tzname, tzdata) 1178 else: 1179 raise TypeError("Offset must be tzinfo subclass, tz string, " 1180 "or int offset.") 1181 return tzinfo 1182 1183 def _build_tzaware(self, naive, res, tzinfos): 1184 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): 1185 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) 1186 aware = naive.replace(tzinfo=tzinfo) 1187 aware = self._assign_tzname(aware, res.tzname) 1188 1189 elif res.tzname and res.tzname in time.tzname: 1190 aware = naive.replace(tzinfo=tz.tzlocal()) 1191 1192 # Handle ambiguous local datetime 1193 aware = self._assign_tzname(aware, res.tzname) 1194 1195 # This is mostly relevant for winter GMT zones parsed in the UK 1196 if (aware.tzname() != res.tzname and 1197 res.tzname in self.info.UTCZONE): 1198 aware = aware.replace(tzinfo=tz.UTC) 1199 1200 elif res.tzoffset == 0: 1201 aware = naive.replace(tzinfo=tz.UTC) 1202 1203 elif res.tzoffset: 1204 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) 1205 1206 elif not res.tzname and not res.tzoffset: 1207 # i.e. no timezone information was found. 1208 aware = naive 1209 1210 elif res.tzname: 1211 # tz-like string was parsed but we don't know what to do 1212 # with it 1213 warnings.warn("tzname {tzname} identified but not understood. " 1214 "Pass `tzinfos` argument in order to correctly " 1215 "return a timezone-aware datetime. In a future " 1216 "version, this will raise an " 1217 "exception.".format(tzname=res.tzname), 1218 category=UnknownTimezoneWarning) 1219 aware = naive 1220 1221 return aware 1222 1223 def _build_naive(self, res, default): 1224 repl = {} 1225 for attr in ("year", "month", "day", "hour", 1226 "minute", "second", "microsecond"): 1227 value = getattr(res, attr) 1228 if value is not None: 1229 repl[attr] = value 1230 1231 if 'day' not in repl: 1232 # If the default day exceeds the last day of the month, fall back 1233 # to the end of the month. 1234 cyear = default.year if res.year is None else res.year 1235 cmonth = default.month if res.month is None else res.month 1236 cday = default.day if res.day is None else res.day 1237 1238 if cday > monthrange(cyear, cmonth)[1]: 1239 repl['day'] = monthrange(cyear, cmonth)[1] 1240 1241 naive = default.replace(**repl) 1242 1243 if res.weekday is not None and not res.day: 1244 naive = naive + relativedelta.relativedelta(weekday=res.weekday) 1245 1246 return naive 1247 1248 def _assign_tzname(self, dt, tzname): 1249 if dt.tzname() != tzname: 1250 new_dt = tz.enfold(dt, fold=1) 1251 if new_dt.tzname() == tzname: 1252 return new_dt 1253 1254 return dt 1255 1256 def _recombine_skipped(self, tokens, skipped_idxs): 1257 """ 1258 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] 1259 >>> skipped_idxs = [0, 1, 2, 5] 1260 >>> _recombine_skipped(tokens, skipped_idxs) 1261 ["foo bar", "baz"] 1262 """ 1263 skipped_tokens = [] 1264 for i, idx in enumerate(sorted(skipped_idxs)): 1265 if i > 0 and idx - 1 == skipped_idxs[i - 1]: 1266 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] 1267 else: 1268 skipped_tokens.append(tokens[idx]) 1269 1270 return skipped_tokens 1271 1272 1273DEFAULTPARSER = parser() 1274 1275 1276def parse(timestr, parserinfo=None, **kwargs): 1277 """ 1278 1279 Parse a string in one of the supported formats, using the 1280 ``parserinfo`` parameters. 1281 1282 :param timestr: 1283 A string containing a date/time stamp. 1284 1285 :param parserinfo: 1286 A :class:`parserinfo` object containing parameters for the parser. 1287 If ``None``, the default arguments to the :class:`parserinfo` 1288 constructor are used. 1289 1290 The ``**kwargs`` parameter takes the following keyword arguments: 1291 1292 :param default: 1293 The default datetime object, if this is a datetime object and not 1294 ``None``, elements specified in ``timestr`` replace elements in the 1295 default object. 1296 1297 :param ignoretz: 1298 If set ``True``, time zones in parsed strings are ignored and a naive 1299 :class:`datetime` object is returned. 1300 1301 :param tzinfos: 1302 Additional time zone names / aliases which may be present in the 1303 string. This argument maps time zone names (and optionally offsets 1304 from those time zones) to time zones. This parameter can be a 1305 dictionary with timezone aliases mapping time zone names to time 1306 zones or a function taking two parameters (``tzname`` and 1307 ``tzoffset``) and returning a time zone. 1308 1309 The timezones to which the names are mapped can be an integer 1310 offset from UTC in seconds or a :class:`tzinfo` object. 1311 1312 .. doctest:: 1313 :options: +NORMALIZE_WHITESPACE 1314 1315 >>> from dateutil.parser import parse 1316 >>> from dateutil.tz import gettz 1317 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 1318 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 1319 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 1320 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 1321 datetime.datetime(2012, 1, 19, 17, 21, 1322 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 1323 1324 This parameter is ignored if ``ignoretz`` is set. 1325 1326 :param dayfirst: 1327 Whether to interpret the first value in an ambiguous 3-integer date 1328 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 1329 ``yearfirst`` is set to ``True``, this distinguishes between YDM and 1330 YMD. If set to ``None``, this value is retrieved from the current 1331 :class:`parserinfo` object (which itself defaults to ``False``). 1332 1333 :param yearfirst: 1334 Whether to interpret the first value in an ambiguous 3-integer date 1335 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to 1336 be the year, otherwise the last number is taken to be the year. If 1337 this is set to ``None``, the value is retrieved from the current 1338 :class:`parserinfo` object (which itself defaults to ``False``). 1339 1340 :param fuzzy: 1341 Whether to allow fuzzy parsing, allowing for string like "Today is 1342 January 1, 2047 at 8:21:00AM". 1343 1344 :param fuzzy_with_tokens: 1345 If ``True``, ``fuzzy`` is automatically set to True, and the parser 1346 will return a tuple where the first element is the parsed 1347 :class:`datetime.datetime` datetimestamp and the second element is 1348 a tuple containing the portions of the string which were ignored: 1349 1350 .. doctest:: 1351 1352 >>> from dateutil.parser import parse 1353 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 1354 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 1355 1356 :return: 1357 Returns a :class:`datetime.datetime` object or, if the 1358 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 1359 first element being a :class:`datetime.datetime` object, the second 1360 a tuple containing the fuzzy tokens. 1361 1362 :raises ValueError: 1363 Raised for invalid or unknown string format, if the provided 1364 :class:`tzinfo` is not in a valid format, or if an invalid date 1365 would be created. 1366 1367 :raises OverflowError: 1368 Raised if the parsed date exceeds the largest valid C integer on 1369 your system. 1370 """ 1371 if parserinfo: 1372 return parser(parserinfo).parse(timestr, **kwargs) 1373 else: 1374 return DEFAULTPARSER.parse(timestr, **kwargs) 1375 1376 1377class _tzparser(object): 1378 1379 class _result(_resultbase): 1380 1381 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", 1382 "start", "end"] 1383 1384 class _attr(_resultbase): 1385 __slots__ = ["month", "week", "weekday", 1386 "yday", "jyday", "day", "time"] 1387 1388 def __repr__(self): 1389 return self._repr("") 1390 1391 def __init__(self): 1392 _resultbase.__init__(self) 1393 self.start = self._attr() 1394 self.end = self._attr() 1395 1396 def parse(self, tzstr): 1397 res = self._result() 1398 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] 1399 used_idxs = list() 1400 try: 1401 1402 len_l = len(l) 1403 1404 i = 0 1405 while i < len_l: 1406 # BRST+3[BRDT[+2]] 1407 j = i 1408 while j < len_l and not [x for x in l[j] 1409 if x in "0123456789:,-+"]: 1410 j += 1 1411 if j != i: 1412 if not res.stdabbr: 1413 offattr = "stdoffset" 1414 res.stdabbr = "".join(l[i:j]) 1415 else: 1416 offattr = "dstoffset" 1417 res.dstabbr = "".join(l[i:j]) 1418 1419 for ii in range(j): 1420 used_idxs.append(ii) 1421 i = j 1422 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in 1423 "0123456789")): 1424 if l[i] in ('+', '-'): 1425 # Yes, that's right. See the TZ variable 1426 # documentation. 1427 signal = (1, -1)[l[i] == '+'] 1428 used_idxs.append(i) 1429 i += 1 1430 else: 1431 signal = -1 1432 len_li = len(l[i]) 1433 if len_li == 4: 1434 # -0300 1435 setattr(res, offattr, (int(l[i][:2]) * 3600 + 1436 int(l[i][2:]) * 60) * signal) 1437 elif i + 1 < len_l and l[i + 1] == ':': 1438 # -03:00 1439 setattr(res, offattr, 1440 (int(l[i]) * 3600 + 1441 int(l[i + 2]) * 60) * signal) 1442 used_idxs.append(i) 1443 i += 2 1444 elif len_li <= 2: 1445 # -[0]3 1446 setattr(res, offattr, 1447 int(l[i][:2]) * 3600 * signal) 1448 else: 1449 return None 1450 used_idxs.append(i) 1451 i += 1 1452 if res.dstabbr: 1453 break 1454 else: 1455 break 1456 1457 1458 if i < len_l: 1459 for j in range(i, len_l): 1460 if l[j] == ';': 1461 l[j] = ',' 1462 1463 assert l[i] == ',' 1464 1465 i += 1 1466 1467 if i >= len_l: 1468 pass 1469 elif (8 <= l.count(',') <= 9 and 1470 not [y for x in l[i:] if x != ',' 1471 for y in x if y not in "0123456789+-"]): 1472 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] 1473 for x in (res.start, res.end): 1474 x.month = int(l[i]) 1475 used_idxs.append(i) 1476 i += 2 1477 if l[i] == '-': 1478 value = int(l[i + 1]) * -1 1479 used_idxs.append(i) 1480 i += 1 1481 else: 1482 value = int(l[i]) 1483 used_idxs.append(i) 1484 i += 2 1485 if value: 1486 x.week = value 1487 x.weekday = (int(l[i]) - 1) % 7 1488 else: 1489 x.day = int(l[i]) 1490 used_idxs.append(i) 1491 i += 2 1492 x.time = int(l[i]) 1493 used_idxs.append(i) 1494 i += 2 1495 if i < len_l: 1496 if l[i] in ('-', '+'): 1497 signal = (-1, 1)[l[i] == "+"] 1498 used_idxs.append(i) 1499 i += 1 1500 else: 1501 signal = 1 1502 used_idxs.append(i) 1503 res.dstoffset = (res.stdoffset + int(l[i]) * signal) 1504 1505 # This was a made-up format that is not in normal use 1506 warn(('Parsed time zone "%s"' % tzstr) + 1507 'is in a non-standard dateutil-specific format, which ' + 1508 'is now deprecated; support for parsing this format ' + 1509 'will be removed in future versions. It is recommended ' + 1510 'that you switch to a standard format like the GNU ' + 1511 'TZ variable format.', tz.DeprecatedTzFormatWarning) 1512 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and 1513 not [y for x in l[i:] if x not in (',', '/', 'J', 'M', 1514 '.', '-', ':') 1515 for y in x if y not in "0123456789"]): 1516 for x in (res.start, res.end): 1517 if l[i] == 'J': 1518 # non-leap year day (1 based) 1519 used_idxs.append(i) 1520 i += 1 1521 x.jyday = int(l[i]) 1522 elif l[i] == 'M': 1523 # month[-.]week[-.]weekday 1524 used_idxs.append(i) 1525 i += 1 1526 x.month = int(l[i]) 1527 used_idxs.append(i) 1528 i += 1 1529 assert l[i] in ('-', '.') 1530 used_idxs.append(i) 1531 i += 1 1532 x.week = int(l[i]) 1533 if x.week == 5: 1534 x.week = -1 1535 used_idxs.append(i) 1536 i += 1 1537 assert l[i] in ('-', '.') 1538 used_idxs.append(i) 1539 i += 1 1540 x.weekday = (int(l[i]) - 1) % 7 1541 else: 1542 # year day (zero based) 1543 x.yday = int(l[i]) + 1 1544 1545 used_idxs.append(i) 1546 i += 1 1547 1548 if i < len_l and l[i] == '/': 1549 used_idxs.append(i) 1550 i += 1 1551 # start time 1552 len_li = len(l[i]) 1553 if len_li == 4: 1554 # -0300 1555 x.time = (int(l[i][:2]) * 3600 + 1556 int(l[i][2:]) * 60) 1557 elif i + 1 < len_l and l[i + 1] == ':': 1558 # -03:00 1559 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 1560 used_idxs.append(i) 1561 i += 2 1562 if i + 1 < len_l and l[i + 1] == ':': 1563 used_idxs.append(i) 1564 i += 2 1565 x.time += int(l[i]) 1566 elif len_li <= 2: 1567 # -[0]3 1568 x.time = (int(l[i][:2]) * 3600) 1569 else: 1570 return None 1571 used_idxs.append(i) 1572 i += 1 1573 1574 assert i == len_l or l[i] == ',' 1575 1576 i += 1 1577 1578 assert i >= len_l 1579 1580 except (IndexError, ValueError, AssertionError): 1581 return None 1582 1583 unused_idxs = set(range(len_l)).difference(used_idxs) 1584 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) 1585 return res 1586 1587 1588DEFAULTTZPARSER = _tzparser() 1589 1590 1591def _parsetz(tzstr): 1592 return DEFAULTTZPARSER.parse(tzstr) 1593 1594 1595class ParserError(ValueError): 1596 """Error class for representing failure to parse a datetime string.""" 1597 def __str__(self): 1598 try: 1599 return self.args[0] % self.args[1:] 1600 except (TypeError, IndexError): 1601 return super(ParserError, self).__str__() 1602 1603 def __repr__(self): 1604 return "%s(%s)" % (self.__class__.__name__, str(self)) 1605 1606 1607class UnknownTimezoneWarning(RuntimeWarning): 1608 """Raised when the parser finds a timezone it cannot parse into a tzinfo""" 1609# vim:ts=4:sw=4:et 1610