1# -*- coding: utf-8 -*- 2""" 3This module offers a generic date/time string parser which is able to parse 4most known formats to represent a date and/or time. 5 6This module attempts to be forgiving with regards to unlikely input formats, 7returning a datetime object even for dates which are ambiguous. If an element 8of a date/time stamp is omitted, the following rules are applied: 9 10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour 11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is 12 specified. 13- If a time zone is omitted, a timezone-naive datetime is returned. 14 15If any other elements are missing, they are taken from the 16:class:`datetime.datetime` object passed to the parameter ``default``. If this 17results in a day number exceeding the valid number of days per month, the 18value falls back to the end of the month. 19 20Additional resources about date/time string formats can be found below: 21 22- `A summary of the international standard date and time notation 23 <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ 24- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_ 25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ 26- `CPAN ParseDate module 27 <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ 28- `Java SimpleDateFormat Class 29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ 30""" 31from __future__ import unicode_literals 32 33import datetime 34import re 35import string 36import time 37import warnings 38 39from calendar import monthrange 40from io import StringIO 41 42import six 43from six import integer_types, text_type 44 45from decimal import Decimal 46 47from warnings import warn 48 49from .. import relativedelta 50from .. import tz 51 52__all__ = ["parse", "parserinfo", "ParserError"] 53 54 55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth 56# making public and/or figuring out if there is something we can 57# take off their plate. 58class _timelex(object): 59 # Fractional seconds are sometimes split by a comma 60 _split_decimal = re.compile("([.,])") 61 62 def __init__(self, instream): 63 if isinstance(instream, (bytes, bytearray)): 64 instream = instream.decode() 65 66 if isinstance(instream, text_type): 67 instream = StringIO(instream) 68 elif getattr(instream, 'read', None) is None: 69 raise TypeError('Parser must be a string or character stream, not ' 70 '{itype}'.format(itype=instream.__class__.__name__)) 71 72 self.instream = instream 73 self.charstack = [] 74 self.tokenstack = [] 75 self.eof = False 76 77 def get_token(self): 78 """ 79 This function breaks the time string into lexical units (tokens), which 80 can be parsed by the parser. Lexical units are demarcated by changes in 81 the character set, so any continuous string of letters is considered 82 one unit, any continuous string of numbers is considered one unit. 83 84 The main complication arises from the fact that dots ('.') can be used 85 both as separators (e.g. "Sep.20.2009") or decimal points (e.g. 86 "4:30:21.447"). As such, it is necessary to read the full context of 87 any dot-separated strings before breaking it into tokens; as such, this 88 function maintains a "token stack", for when the ambiguous context 89 demands that multiple tokens be parsed at once. 90 """ 91 if self.tokenstack: 92 return self.tokenstack.pop(0) 93 94 seenletters = False 95 token = None 96 state = None 97 98 while not self.eof: 99 # We only realize that we've reached the end of a token when we 100 # find a character that's not part of the current token - since 101 # that character may be part of the next token, it's stored in the 102 # charstack. 103 if self.charstack: 104 nextchar = self.charstack.pop(0) 105 else: 106 nextchar = self.instream.read(1) 107 while nextchar == '\x00': 108 nextchar = self.instream.read(1) 109 110 if not nextchar: 111 self.eof = True 112 break 113 elif not state: 114 # First character of the token - determines if we're starting 115 # to parse a word, a number or something else. 116 token = nextchar 117 if self.isword(nextchar): 118 state = 'a' 119 elif self.isnum(nextchar): 120 state = '0' 121 elif self.isspace(nextchar): 122 token = ' ' 123 break # emit token 124 else: 125 break # emit token 126 elif state == 'a': 127 # If we've already started reading a word, we keep reading 128 # letters until we find something that's not part of a word. 129 seenletters = True 130 if self.isword(nextchar): 131 token += nextchar 132 elif nextchar == '.': 133 token += nextchar 134 state = 'a.' 135 else: 136 self.charstack.append(nextchar) 137 break # emit token 138 elif state == '0': 139 # If we've already started reading a number, we keep reading 140 # numbers until we find something that doesn't fit. 141 if self.isnum(nextchar): 142 token += nextchar 143 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): 144 token += nextchar 145 state = '0.' 146 else: 147 self.charstack.append(nextchar) 148 break # emit token 149 elif state == 'a.': 150 # If we've seen some letters and a dot separator, continue 151 # parsing, and the tokens will be broken up later. 152 seenletters = True 153 if nextchar == '.' or self.isword(nextchar): 154 token += nextchar 155 elif self.isnum(nextchar) and token[-1] == '.': 156 token += nextchar 157 state = '0.' 158 else: 159 self.charstack.append(nextchar) 160 break # emit token 161 elif state == '0.': 162 # If we've seen at least one dot separator, keep going, we'll 163 # break up the tokens later. 164 if nextchar == '.' or self.isnum(nextchar): 165 token += nextchar 166 elif self.isword(nextchar) and token[-1] == '.': 167 token += nextchar 168 state = 'a.' 169 else: 170 self.charstack.append(nextchar) 171 break # emit token 172 173 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or 174 token[-1] in '.,')): 175 l = self._split_decimal.split(token) 176 token = l[0] 177 for tok in l[1:]: 178 if tok: 179 self.tokenstack.append(tok) 180 181 if state == '0.' and token.count('.') == 0: 182 token = token.replace(',', '.') 183 184 return token 185 186 def __iter__(self): 187 return self 188 189 def __next__(self): 190 token = self.get_token() 191 if token is None: 192 raise StopIteration 193 194 return token 195 196 def next(self): 197 return self.__next__() # Python 2.x support 198 199 @classmethod 200 def split(cls, s): 201 return list(cls(s)) 202 203 @classmethod 204 def isword(cls, nextchar): 205 """ Whether or not the next character is part of a word """ 206 return nextchar.isalpha() 207 208 @classmethod 209 def isnum(cls, nextchar): 210 """ Whether the next character is part of a number """ 211 return nextchar.isdigit() 212 213 @classmethod 214 def isspace(cls, nextchar): 215 """ Whether the next character is whitespace """ 216 return nextchar.isspace() 217 218 219class _resultbase(object): 220 221 def __init__(self): 222 for attr in self.__slots__: 223 setattr(self, attr, None) 224 225 def _repr(self, classname): 226 l = [] 227 for attr in self.__slots__: 228 value = getattr(self, attr) 229 if value is not None: 230 l.append("%s=%s" % (attr, repr(value))) 231 return "%s(%s)" % (classname, ", ".join(l)) 232 233 def __len__(self): 234 return (sum(getattr(self, attr) is not None 235 for attr in self.__slots__)) 236 237 def __repr__(self): 238 return self._repr(self.__class__.__name__) 239 240 241class parserinfo(object): 242 """ 243 Class which handles what inputs are accepted. Subclass this to customize 244 the language and acceptable values for each parameter. 245 246 :param dayfirst: 247 Whether to interpret the first value in an ambiguous 3-integer date 248 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 249 ``yearfirst`` is set to ``True``, this distinguishes between YDM 250 and YMD. Default is ``False``. 251 252 :param yearfirst: 253 Whether to interpret the first value in an ambiguous 3-integer date 254 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 255 to be the year, otherwise the last number is taken to be the year. 256 Default is ``False``. 257 """ 258 259 # m from a.m/p.m, t from ISO T separator 260 JUMP = [" ", ".", ",", ";", "-", "/", "'", 261 "at", "on", "and", "ad", "m", "t", "of", 262 "st", "nd", "rd", "th"] 263 264 WEEKDAYS = [("Mon", "Monday"), 265 ("Tue", "Tuesday"), # TODO: "Tues" 266 ("Wed", "Wednesday"), 267 ("Thu", "Thursday"), # TODO: "Thurs" 268 ("Fri", "Friday"), 269 ("Sat", "Saturday"), 270 ("Sun", "Sunday")] 271 MONTHS = [("Jan", "January"), 272 ("Feb", "February"), # TODO: "Febr" 273 ("Mar", "March"), 274 ("Apr", "April"), 275 ("May", "May"), 276 ("Jun", "June"), 277 ("Jul", "July"), 278 ("Aug", "August"), 279 ("Sep", "Sept", "September"), 280 ("Oct", "October"), 281 ("Nov", "November"), 282 ("Dec", "December")] 283 HMS = [("h", "hour", "hours"), 284 ("m", "minute", "minutes"), 285 ("s", "second", "seconds")] 286 AMPM = [("am", "a"), 287 ("pm", "p")] 288 UTCZONE = ["UTC", "GMT", "Z", "z"] 289 PERTAIN = ["of"] 290 TZOFFSET = {} 291 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", 292 # "Anno Domini", "Year of Our Lord"] 293 294 def __init__(self, dayfirst=False, yearfirst=False): 295 self._jump = self._convert(self.JUMP) 296 self._weekdays = self._convert(self.WEEKDAYS) 297 self._months = self._convert(self.MONTHS) 298 self._hms = self._convert(self.HMS) 299 self._ampm = self._convert(self.AMPM) 300 self._utczone = self._convert(self.UTCZONE) 301 self._pertain = self._convert(self.PERTAIN) 302 303 self.dayfirst = dayfirst 304 self.yearfirst = yearfirst 305 306 self._year = time.localtime().tm_year 307 self._century = self._year // 100 * 100 308 309 def _convert(self, lst): 310 dct = {} 311 for i, v in enumerate(lst): 312 if isinstance(v, tuple): 313 for v in v: 314 dct[v.lower()] = i 315 else: 316 dct[v.lower()] = i 317 return dct 318 319 def jump(self, name): 320 return name.lower() in self._jump 321 322 def weekday(self, name): 323 try: 324 return self._weekdays[name.lower()] 325 except KeyError: 326 pass 327 return None 328 329 def month(self, name): 330 try: 331 return self._months[name.lower()] + 1 332 except KeyError: 333 pass 334 return None 335 336 def hms(self, name): 337 try: 338 return self._hms[name.lower()] 339 except KeyError: 340 return None 341 342 def ampm(self, name): 343 try: 344 return self._ampm[name.lower()] 345 except KeyError: 346 return None 347 348 def pertain(self, name): 349 return name.lower() in self._pertain 350 351 def utczone(self, name): 352 return name.lower() in self._utczone 353 354 def tzoffset(self, name): 355 if name in self._utczone: 356 return 0 357 358 return self.TZOFFSET.get(name) 359 360 def convertyear(self, year, century_specified=False): 361 """ 362 Converts two-digit years to year within [-50, 49] 363 range of self._year (current local time) 364 """ 365 366 # Function contract is that the year is always positive 367 assert year >= 0 368 369 if year < 100 and not century_specified: 370 # assume current century to start 371 year += self._century 372 373 if year >= self._year + 50: # if too far in future 374 year -= 100 375 elif year < self._year - 50: # if too far in past 376 year += 100 377 378 return year 379 380 def validate(self, res): 381 # move to info 382 if res.year is not None: 383 res.year = self.convertyear(res.year, res.century_specified) 384 385 if ((res.tzoffset == 0 and not res.tzname) or 386 (res.tzname == 'Z' or res.tzname == 'z')): 387 res.tzname = "UTC" 388 res.tzoffset = 0 389 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): 390 res.tzoffset = 0 391 return True 392 393 394class _ymd(list): 395 def __init__(self, *args, **kwargs): 396 super(self.__class__, self).__init__(*args, **kwargs) 397 self.century_specified = False 398 self.dstridx = None 399 self.mstridx = None 400 self.ystridx = None 401 402 @property 403 def has_year(self): 404 return self.ystridx is not None 405 406 @property 407 def has_month(self): 408 return self.mstridx is not None 409 410 @property 411 def has_day(self): 412 return self.dstridx is not None 413 414 def could_be_day(self, value): 415 if self.has_day: 416 return False 417 elif not self.has_month: 418 return 1 <= value <= 31 419 elif not self.has_year: 420 # Be permissive, assume leap year 421 month = self[self.mstridx] 422 return 1 <= value <= monthrange(2000, month)[1] 423 else: 424 month = self[self.mstridx] 425 year = self[self.ystridx] 426 return 1 <= value <= monthrange(year, month)[1] 427 428 def append(self, val, label=None): 429 if hasattr(val, '__len__'): 430 if val.isdigit() and len(val) > 2: 431 self.century_specified = True 432 if label not in [None, 'Y']: # pragma: no cover 433 raise ValueError(label) 434 label = 'Y' 435 elif val > 100: 436 self.century_specified = True 437 if label not in [None, 'Y']: # pragma: no cover 438 raise ValueError(label) 439 label = 'Y' 440 441 super(self.__class__, self).append(int(val)) 442 443 if label == 'M': 444 if self.has_month: 445 raise ValueError('Month is already set') 446 self.mstridx = len(self) - 1 447 elif label == 'D': 448 if self.has_day: 449 raise ValueError('Day is already set') 450 self.dstridx = len(self) - 1 451 elif label == 'Y': 452 if self.has_year: 453 raise ValueError('Year is already set') 454 self.ystridx = len(self) - 1 455 456 def _resolve_from_stridxs(self, strids): 457 """ 458 Try to resolve the identities of year/month/day elements using 459 ystridx, mstridx, and dstridx, if enough of these are specified. 460 """ 461 if len(self) == 3 and len(strids) == 2: 462 # we can back out the remaining stridx value 463 missing = [x for x in range(3) if x not in strids.values()] 464 key = [x for x in ['y', 'm', 'd'] if x not in strids] 465 assert len(missing) == len(key) == 1 466 key = key[0] 467 val = missing[0] 468 strids[key] = val 469 470 assert len(self) == len(strids) # otherwise this should not be called 471 out = {key: self[strids[key]] for key in strids} 472 return (out.get('y'), out.get('m'), out.get('d')) 473 474 def resolve_ymd(self, yearfirst, dayfirst): 475 len_ymd = len(self) 476 year, month, day = (None, None, None) 477 478 strids = (('y', self.ystridx), 479 ('m', self.mstridx), 480 ('d', self.dstridx)) 481 482 strids = {key: val for key, val in strids if val is not None} 483 if (len(self) == len(strids) > 0 or 484 (len(self) == 3 and len(strids) == 2)): 485 return self._resolve_from_stridxs(strids) 486 487 mstridx = self.mstridx 488 489 if len_ymd > 3: 490 raise ValueError("More than three YMD values") 491 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): 492 # One member, or two members with a month string 493 if mstridx is not None: 494 month = self[mstridx] 495 # since mstridx is 0 or 1, self[mstridx-1] always 496 # looks up the other element 497 other = self[mstridx - 1] 498 else: 499 other = self[0] 500 501 if len_ymd > 1 or mstridx is None: 502 if other > 31: 503 year = other 504 else: 505 day = other 506 507 elif len_ymd == 2: 508 # Two members with numbers 509 if self[0] > 31: 510 # 99-01 511 year, month = self 512 elif self[1] > 31: 513 # 01-99 514 month, year = self 515 elif dayfirst and self[1] <= 12: 516 # 13-01 517 day, month = self 518 else: 519 # 01-13 520 month, day = self 521 522 elif len_ymd == 3: 523 # Three members 524 if mstridx == 0: 525 if self[1] > 31: 526 # Apr-2003-25 527 month, year, day = self 528 else: 529 month, day, year = self 530 elif mstridx == 1: 531 if self[0] > 31 or (yearfirst and self[2] <= 31): 532 # 99-Jan-01 533 year, month, day = self 534 else: 535 # 01-Jan-01 536 # Give precedence to day-first, since 537 # two-digit years is usually hand-written. 538 day, month, year = self 539 540 elif mstridx == 2: 541 # WTF!? 542 if self[1] > 31: 543 # 01-99-Jan 544 day, year, month = self 545 else: 546 # 99-01-Jan 547 year, day, month = self 548 549 else: 550 if (self[0] > 31 or 551 self.ystridx == 0 or 552 (yearfirst and self[1] <= 12 and self[2] <= 31)): 553 # 99-01-01 554 if dayfirst and self[2] <= 12: 555 year, day, month = self 556 else: 557 year, month, day = self 558 elif self[0] > 12 or (dayfirst and self[1] <= 12): 559 # 13-01-01 560 day, month, year = self 561 else: 562 # 01-13-01 563 month, day, year = self 564 565 return year, month, day 566 567 568class parser(object): 569 def __init__(self, info=None): 570 self.info = info or parserinfo() 571 572 def parse(self, timestr, default=None, 573 ignoretz=False, tzinfos=None, **kwargs): 574 """ 575 Parse the date/time string into a :class:`datetime.datetime` object. 576 577 :param timestr: 578 Any date/time string using the supported formats. 579 580 :param default: 581 The default datetime object, if this is a datetime object and not 582 ``None``, elements specified in ``timestr`` replace elements in the 583 default object. 584 585 :param ignoretz: 586 If set ``True``, time zones in parsed strings are ignored and a 587 naive :class:`datetime.datetime` object is returned. 588 589 :param tzinfos: 590 Additional time zone names / aliases which may be present in the 591 string. This argument maps time zone names (and optionally offsets 592 from those time zones) to time zones. This parameter can be a 593 dictionary with timezone aliases mapping time zone names to time 594 zones or a function taking two parameters (``tzname`` and 595 ``tzoffset``) and returning a time zone. 596 597 The timezones to which the names are mapped can be an integer 598 offset from UTC in seconds or a :class:`tzinfo` object. 599 600 .. doctest:: 601 :options: +NORMALIZE_WHITESPACE 602 603 >>> from dateutil.parser import parse 604 >>> from dateutil.tz import gettz 605 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 606 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 607 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 608 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 609 datetime.datetime(2012, 1, 19, 17, 21, 610 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 611 612 This parameter is ignored if ``ignoretz`` is set. 613 614 :param \\*\\*kwargs: 615 Keyword arguments as passed to ``_parse()``. 616 617 :return: 618 Returns a :class:`datetime.datetime` object or, if the 619 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 620 first element being a :class:`datetime.datetime` object, the second 621 a tuple containing the fuzzy tokens. 622 623 :raises ParserError: 624 Raised for invalid or unknown string format, if the provided 625 :class:`tzinfo` is not in a valid format, or if an invalid date 626 would be created. 627 628 :raises TypeError: 629 Raised for non-string or character stream input. 630 631 :raises OverflowError: 632 Raised if the parsed date exceeds the largest valid C integer on 633 your system. 634 """ 635 636 if default is None: 637 default = datetime.datetime.now().replace(hour=0, minute=0, 638 second=0, microsecond=0) 639 640 res, skipped_tokens = self._parse(timestr, **kwargs) 641 642 if res is None: 643 raise ParserError("Unknown string format: %s", timestr) 644 645 if len(res) == 0: 646 raise ParserError("String does not contain a date: %s", timestr) 647 648 try: 649 ret = self._build_naive(res, default) 650 except ValueError as e: 651 six.raise_from(ParserError(str(e) + ": %s", timestr), e) 652 653 if not ignoretz: 654 ret = self._build_tzaware(ret, res, tzinfos) 655 656 if kwargs.get('fuzzy_with_tokens', False): 657 return ret, skipped_tokens 658 else: 659 return ret 660 661 class _result(_resultbase): 662 __slots__ = ["year", "month", "day", "weekday", 663 "hour", "minute", "second", "microsecond", 664 "tzname", "tzoffset", "ampm","any_unused_tokens"] 665 666 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, 667 fuzzy_with_tokens=False): 668 """ 669 Private method which performs the heavy lifting of parsing, called from 670 ``parse()``, which passes on its ``kwargs`` to this function. 671 672 :param timestr: 673 The string to parse. 674 675 :param dayfirst: 676 Whether to interpret the first value in an ambiguous 3-integer date 677 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 678 ``yearfirst`` is set to ``True``, this distinguishes between YDM 679 and YMD. If set to ``None``, this value is retrieved from the 680 current :class:`parserinfo` object (which itself defaults to 681 ``False``). 682 683 :param yearfirst: 684 Whether to interpret the first value in an ambiguous 3-integer date 685 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 686 to be the year, otherwise the last number is taken to be the year. 687 If this is set to ``None``, the value is retrieved from the current 688 :class:`parserinfo` object (which itself defaults to ``False``). 689 690 :param fuzzy: 691 Whether to allow fuzzy parsing, allowing for string like "Today is 692 January 1, 2047 at 8:21:00AM". 693 694 :param fuzzy_with_tokens: 695 If ``True``, ``fuzzy`` is automatically set to True, and the parser 696 will return a tuple where the first element is the parsed 697 :class:`datetime.datetime` datetimestamp and the second element is 698 a tuple containing the portions of the string which were ignored: 699 700 .. doctest:: 701 702 >>> from dateutil.parser import parse 703 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 704 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 705 706 """ 707 if fuzzy_with_tokens: 708 fuzzy = True 709 710 info = self.info 711 712 if dayfirst is None: 713 dayfirst = info.dayfirst 714 715 if yearfirst is None: 716 yearfirst = info.yearfirst 717 718 res = self._result() 719 l = _timelex.split(timestr) # Splits the timestr into tokens 720 721 skipped_idxs = [] 722 723 # year/month/day list 724 ymd = _ymd() 725 726 len_l = len(l) 727 i = 0 728 try: 729 while i < len_l: 730 731 # Check if it's a number 732 value_repr = l[i] 733 try: 734 value = float(value_repr) 735 except ValueError: 736 value = None 737 738 if value is not None: 739 # Numeric token 740 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) 741 742 # Check weekday 743 elif info.weekday(l[i]) is not None: 744 value = info.weekday(l[i]) 745 res.weekday = value 746 747 # Check month name 748 elif info.month(l[i]) is not None: 749 value = info.month(l[i]) 750 ymd.append(value, 'M') 751 752 if i + 1 < len_l: 753 if l[i + 1] in ('-', '/'): 754 # Jan-01[-99] 755 sep = l[i + 1] 756 ymd.append(l[i + 2]) 757 758 if i + 3 < len_l and l[i + 3] == sep: 759 # Jan-01-99 760 ymd.append(l[i + 4]) 761 i += 2 762 763 i += 2 764 765 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and 766 info.pertain(l[i + 2])): 767 # Jan of 01 768 # In this case, 01 is clearly year 769 if l[i + 4].isdigit(): 770 # Convert it here to become unambiguous 771 value = int(l[i + 4]) 772 year = str(info.convertyear(value)) 773 ymd.append(year, 'Y') 774 else: 775 # Wrong guess 776 pass 777 # TODO: not hit in tests 778 i += 4 779 780 # Check am/pm 781 elif info.ampm(l[i]) is not None: 782 value = info.ampm(l[i]) 783 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) 784 785 if val_is_ampm: 786 res.hour = self._adjust_ampm(res.hour, value) 787 res.ampm = value 788 789 elif fuzzy: 790 skipped_idxs.append(i) 791 792 # Check for a timezone name 793 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): 794 res.tzname = l[i] 795 res.tzoffset = info.tzoffset(res.tzname) 796 797 # Check for something like GMT+3, or BRST+3. Notice 798 # that it doesn't mean "I am 3 hours after GMT", but 799 # "my time +3 is GMT". If found, we reverse the 800 # logic so that timezone parsing code will get it 801 # right. 802 if i + 1 < len_l and l[i + 1] in ('+', '-'): 803 l[i + 1] = ('+', '-')[l[i + 1] == '+'] 804 res.tzoffset = None 805 if info.utczone(res.tzname): 806 # With something like GMT+3, the timezone 807 # is *not* GMT. 808 res.tzname = None 809 810 # Check for a numbered timezone 811 elif res.hour is not None and l[i] in ('+', '-'): 812 signal = (-1, 1)[l[i] == '+'] 813 len_li = len(l[i + 1]) 814 815 # TODO: check that l[i + 1] is integer? 816 if len_li == 4: 817 # -0300 818 hour_offset = int(l[i + 1][:2]) 819 min_offset = int(l[i + 1][2:]) 820 elif i + 2 < len_l and l[i + 2] == ':': 821 # -03:00 822 hour_offset = int(l[i + 1]) 823 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? 824 i += 2 825 elif len_li <= 2: 826 # -[0]3 827 hour_offset = int(l[i + 1][:2]) 828 min_offset = 0 829 else: 830 raise ValueError(timestr) 831 832 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) 833 834 # Look for a timezone name between parenthesis 835 if (i + 5 < len_l and 836 info.jump(l[i + 2]) and l[i + 3] == '(' and 837 l[i + 5] == ')' and 838 3 <= len(l[i + 4]) and 839 self._could_be_tzname(res.hour, res.tzname, 840 None, l[i + 4])): 841 # -0300 (BRST) 842 res.tzname = l[i + 4] 843 i += 4 844 845 i += 1 846 847 # Check jumps 848 elif not (info.jump(l[i]) or fuzzy): 849 raise ValueError(timestr) 850 851 else: 852 skipped_idxs.append(i) 853 i += 1 854 855 # Process year/month/day 856 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) 857 858 res.century_specified = ymd.century_specified 859 res.year = year 860 res.month = month 861 res.day = day 862 863 except (IndexError, ValueError): 864 return None, None 865 866 if not info.validate(res): 867 return None, None 868 869 if fuzzy_with_tokens: 870 skipped_tokens = self._recombine_skipped(l, skipped_idxs) 871 return res, tuple(skipped_tokens) 872 else: 873 return res, None 874 875 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): 876 # Token is a number 877 value_repr = tokens[idx] 878 try: 879 value = self._to_decimal(value_repr) 880 except Exception as e: 881 six.raise_from(ValueError('Unknown numeric token'), e) 882 883 len_li = len(value_repr) 884 885 len_l = len(tokens) 886 887 if (len(ymd) == 3 and len_li in (2, 4) and 888 res.hour is None and 889 (idx + 1 >= len_l or 890 (tokens[idx + 1] != ':' and 891 info.hms(tokens[idx + 1]) is None))): 892 # 19990101T23[59] 893 s = tokens[idx] 894 res.hour = int(s[:2]) 895 896 if len_li == 4: 897 res.minute = int(s[2:]) 898 899 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): 900 # YYMMDD or HHMMSS[.ss] 901 s = tokens[idx] 902 903 if not ymd and '.' not in tokens[idx]: 904 ymd.append(s[:2]) 905 ymd.append(s[2:4]) 906 ymd.append(s[4:]) 907 else: 908 # 19990101T235959[.59] 909 910 # TODO: Check if res attributes already set. 911 res.hour = int(s[:2]) 912 res.minute = int(s[2:4]) 913 res.second, res.microsecond = self._parsems(s[4:]) 914 915 elif len_li in (8, 12, 14): 916 # YYYYMMDD 917 s = tokens[idx] 918 ymd.append(s[:4], 'Y') 919 ymd.append(s[4:6]) 920 ymd.append(s[6:8]) 921 922 if len_li > 8: 923 res.hour = int(s[8:10]) 924 res.minute = int(s[10:12]) 925 926 if len_li > 12: 927 res.second = int(s[12:]) 928 929 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: 930 # HH[ ]h or MM[ ]m or SS[.ss][ ]s 931 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) 932 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) 933 if hms is not None: 934 # TODO: checking that hour/minute/second are not 935 # already set? 936 self._assign_hms(res, value_repr, hms) 937 938 elif idx + 2 < len_l and tokens[idx + 1] == ':': 939 # HH:MM[:SS[.ss]] 940 res.hour = int(value) 941 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? 942 (res.minute, res.second) = self._parse_min_sec(value) 943 944 if idx + 4 < len_l and tokens[idx + 3] == ':': 945 res.second, res.microsecond = self._parsems(tokens[idx + 4]) 946 947 idx += 2 948 949 idx += 2 950 951 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): 952 sep = tokens[idx + 1] 953 ymd.append(value_repr) 954 955 if idx + 2 < len_l and not info.jump(tokens[idx + 2]): 956 if tokens[idx + 2].isdigit(): 957 # 01-01[-01] 958 ymd.append(tokens[idx + 2]) 959 else: 960 # 01-Jan[-01] 961 value = info.month(tokens[idx + 2]) 962 963 if value is not None: 964 ymd.append(value, 'M') 965 else: 966 raise ValueError() 967 968 if idx + 3 < len_l and tokens[idx + 3] == sep: 969 # We have three members 970 value = info.month(tokens[idx + 4]) 971 972 if value is not None: 973 ymd.append(value, 'M') 974 else: 975 ymd.append(tokens[idx + 4]) 976 idx += 2 977 978 idx += 1 979 idx += 1 980 981 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): 982 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: 983 # 12 am 984 hour = int(value) 985 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) 986 idx += 1 987 else: 988 # Year, month or day 989 ymd.append(value) 990 idx += 1 991 992 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): 993 # 12am 994 hour = int(value) 995 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) 996 idx += 1 997 998 elif ymd.could_be_day(value): 999 ymd.append(value) 1000 1001 elif not fuzzy: 1002 raise ValueError() 1003 1004 return idx 1005 1006 def _find_hms_idx(self, idx, tokens, info, allow_jump): 1007 len_l = len(tokens) 1008 1009 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: 1010 # There is an "h", "m", or "s" label following this token. We take 1011 # assign the upcoming label to the current token. 1012 # e.g. the "12" in 12h" 1013 hms_idx = idx + 1 1014 1015 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and 1016 info.hms(tokens[idx+2]) is not None): 1017 # There is a space and then an "h", "m", or "s" label. 1018 # e.g. the "12" in "12 h" 1019 hms_idx = idx + 2 1020 1021 elif idx > 0 and info.hms(tokens[idx-1]) is not None: 1022 # There is a "h", "m", or "s" preceding this token. Since neither 1023 # of the previous cases was hit, there is no label following this 1024 # token, so we use the previous label. 1025 # e.g. the "04" in "12h04" 1026 hms_idx = idx-1 1027 1028 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and 1029 info.hms(tokens[idx-2]) is not None): 1030 # If we are looking at the final token, we allow for a 1031 # backward-looking check to skip over a space. 1032 # TODO: Are we sure this is the right condition here? 1033 hms_idx = idx - 2 1034 1035 else: 1036 hms_idx = None 1037 1038 return hms_idx 1039 1040 def _assign_hms(self, res, value_repr, hms): 1041 # See GH issue #427, fixing float rounding 1042 value = self._to_decimal(value_repr) 1043 1044 if hms == 0: 1045 # Hour 1046 res.hour = int(value) 1047 if value % 1: 1048 res.minute = int(60*(value % 1)) 1049 1050 elif hms == 1: 1051 (res.minute, res.second) = self._parse_min_sec(value) 1052 1053 elif hms == 2: 1054 (res.second, res.microsecond) = self._parsems(value_repr) 1055 1056 def _could_be_tzname(self, hour, tzname, tzoffset, token): 1057 return (hour is not None and 1058 tzname is None and 1059 tzoffset is None and 1060 len(token) <= 5 and 1061 (all(x in string.ascii_uppercase for x in token) 1062 or token in self.info.UTCZONE)) 1063 1064 def _ampm_valid(self, hour, ampm, fuzzy): 1065 """ 1066 For fuzzy parsing, 'a' or 'am' (both valid English words) 1067 may erroneously trigger the AM/PM flag. Deal with that 1068 here. 1069 """ 1070 val_is_ampm = True 1071 1072 # If there's already an AM/PM flag, this one isn't one. 1073 if fuzzy and ampm is not None: 1074 val_is_ampm = False 1075 1076 # If AM/PM is found and hour is not, raise a ValueError 1077 if hour is None: 1078 if fuzzy: 1079 val_is_ampm = False 1080 else: 1081 raise ValueError('No hour specified with AM or PM flag.') 1082 elif not 0 <= hour <= 12: 1083 # If AM/PM is found, it's a 12 hour clock, so raise 1084 # an error for invalid range 1085 if fuzzy: 1086 val_is_ampm = False 1087 else: 1088 raise ValueError('Invalid hour specified for 12-hour clock.') 1089 1090 return val_is_ampm 1091 1092 def _adjust_ampm(self, hour, ampm): 1093 if hour < 12 and ampm == 1: 1094 hour += 12 1095 elif hour == 12 and ampm == 0: 1096 hour = 0 1097 return hour 1098 1099 def _parse_min_sec(self, value): 1100 # TODO: Every usage of this function sets res.second to the return 1101 # value. Are there any cases where second will be returned as None and 1102 # we *don't* want to set res.second = None? 1103 minute = int(value) 1104 second = None 1105 1106 sec_remainder = value % 1 1107 if sec_remainder: 1108 second = int(60 * sec_remainder) 1109 return (minute, second) 1110 1111 def _parse_hms(self, idx, tokens, info, hms_idx): 1112 # TODO: Is this going to admit a lot of false-positives for when we 1113 # just happen to have digits and "h", "m" or "s" characters in non-date 1114 # text? I guess hex hashes won't have that problem, but there's plenty 1115 # of random junk out there. 1116 if hms_idx is None: 1117 hms = None 1118 new_idx = idx 1119 elif hms_idx > idx: 1120 hms = info.hms(tokens[hms_idx]) 1121 new_idx = hms_idx 1122 else: 1123 # Looking backwards, increment one. 1124 hms = info.hms(tokens[hms_idx]) + 1 1125 new_idx = idx 1126 1127 return (new_idx, hms) 1128 1129 # ------------------------------------------------------------------ 1130 # Handling for individual tokens. These are kept as methods instead 1131 # of functions for the sake of customizability via subclassing. 1132 1133 def _parsems(self, value): 1134 """Parse a I[.F] seconds value into (seconds, microseconds).""" 1135 if "." not in value: 1136 return int(value), 0 1137 else: 1138 i, f = value.split(".") 1139 return int(i), int(f.ljust(6, "0")[:6]) 1140 1141 def _to_decimal(self, val): 1142 try: 1143 decimal_value = Decimal(val) 1144 # See GH 662, edge case, infinite value should not be converted 1145 # via `_to_decimal` 1146 if not decimal_value.is_finite(): 1147 raise ValueError("Converted decimal value is infinite or NaN") 1148 except Exception as e: 1149 msg = "Could not convert %s to decimal" % val 1150 six.raise_from(ValueError(msg), e) 1151 else: 1152 return decimal_value 1153 1154 # ------------------------------------------------------------------ 1155 # Post-Parsing construction of datetime output. These are kept as 1156 # methods instead of functions for the sake of customizability via 1157 # subclassing. 1158 1159 def _build_tzinfo(self, tzinfos, tzname, tzoffset): 1160 if callable(tzinfos): 1161 tzdata = tzinfos(tzname, tzoffset) 1162 else: 1163 tzdata = tzinfos.get(tzname) 1164 # handle case where tzinfo is paased an options that returns None 1165 # eg tzinfos = {'BRST' : None} 1166 if isinstance(tzdata, datetime.tzinfo) or tzdata is None: 1167 tzinfo = tzdata 1168 elif isinstance(tzdata, text_type): 1169 tzinfo = tz.tzstr(tzdata) 1170 elif isinstance(tzdata, integer_types): 1171 tzinfo = tz.tzoffset(tzname, tzdata) 1172 else: 1173 raise TypeError("Offset must be tzinfo subclass, tz string, " 1174 "or int offset.") 1175 return tzinfo 1176 1177 def _build_tzaware(self, naive, res, tzinfos): 1178 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): 1179 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) 1180 aware = naive.replace(tzinfo=tzinfo) 1181 aware = self._assign_tzname(aware, res.tzname) 1182 1183 elif res.tzname and res.tzname in time.tzname: 1184 aware = naive.replace(tzinfo=tz.tzlocal()) 1185 1186 # Handle ambiguous local datetime 1187 aware = self._assign_tzname(aware, res.tzname) 1188 1189 # This is mostly relevant for winter GMT zones parsed in the UK 1190 if (aware.tzname() != res.tzname and 1191 res.tzname in self.info.UTCZONE): 1192 aware = aware.replace(tzinfo=tz.UTC) 1193 1194 elif res.tzoffset == 0: 1195 aware = naive.replace(tzinfo=tz.UTC) 1196 1197 elif res.tzoffset: 1198 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) 1199 1200 elif not res.tzname and not res.tzoffset: 1201 # i.e. no timezone information was found. 1202 aware = naive 1203 1204 elif res.tzname: 1205 # tz-like string was parsed but we don't know what to do 1206 # with it 1207 warnings.warn("tzname {tzname} identified but not understood. " 1208 "Pass `tzinfos` argument in order to correctly " 1209 "return a timezone-aware datetime. In a future " 1210 "version, this will raise an " 1211 "exception.".format(tzname=res.tzname), 1212 category=UnknownTimezoneWarning) 1213 aware = naive 1214 1215 return aware 1216 1217 def _build_naive(self, res, default): 1218 repl = {} 1219 for attr in ("year", "month", "day", "hour", 1220 "minute", "second", "microsecond"): 1221 value = getattr(res, attr) 1222 if value is not None: 1223 repl[attr] = value 1224 1225 if 'day' not in repl: 1226 # If the default day exceeds the last day of the month, fall back 1227 # to the end of the month. 1228 cyear = default.year if res.year is None else res.year 1229 cmonth = default.month if res.month is None else res.month 1230 cday = default.day if res.day is None else res.day 1231 1232 if cday > monthrange(cyear, cmonth)[1]: 1233 repl['day'] = monthrange(cyear, cmonth)[1] 1234 1235 naive = default.replace(**repl) 1236 1237 if res.weekday is not None and not res.day: 1238 naive = naive + relativedelta.relativedelta(weekday=res.weekday) 1239 1240 return naive 1241 1242 def _assign_tzname(self, dt, tzname): 1243 if dt.tzname() != tzname: 1244 new_dt = tz.enfold(dt, fold=1) 1245 if new_dt.tzname() == tzname: 1246 return new_dt 1247 1248 return dt 1249 1250 def _recombine_skipped(self, tokens, skipped_idxs): 1251 """ 1252 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] 1253 >>> skipped_idxs = [0, 1, 2, 5] 1254 >>> _recombine_skipped(tokens, skipped_idxs) 1255 ["foo bar", "baz"] 1256 """ 1257 skipped_tokens = [] 1258 for i, idx in enumerate(sorted(skipped_idxs)): 1259 if i > 0 and idx - 1 == skipped_idxs[i - 1]: 1260 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] 1261 else: 1262 skipped_tokens.append(tokens[idx]) 1263 1264 return skipped_tokens 1265 1266 1267DEFAULTPARSER = parser() 1268 1269 1270def parse(timestr, parserinfo=None, **kwargs): 1271 """ 1272 1273 Parse a string in one of the supported formats, using the 1274 ``parserinfo`` parameters. 1275 1276 :param timestr: 1277 A string containing a date/time stamp. 1278 1279 :param parserinfo: 1280 A :class:`parserinfo` object containing parameters for the parser. 1281 If ``None``, the default arguments to the :class:`parserinfo` 1282 constructor are used. 1283 1284 The ``**kwargs`` parameter takes the following keyword arguments: 1285 1286 :param default: 1287 The default datetime object, if this is a datetime object and not 1288 ``None``, elements specified in ``timestr`` replace elements in the 1289 default object. 1290 1291 :param ignoretz: 1292 If set ``True``, time zones in parsed strings are ignored and a naive 1293 :class:`datetime` object is returned. 1294 1295 :param tzinfos: 1296 Additional time zone names / aliases which may be present in the 1297 string. This argument maps time zone names (and optionally offsets 1298 from those time zones) to time zones. This parameter can be a 1299 dictionary with timezone aliases mapping time zone names to time 1300 zones or a function taking two parameters (``tzname`` and 1301 ``tzoffset``) and returning a time zone. 1302 1303 The timezones to which the names are mapped can be an integer 1304 offset from UTC in seconds or a :class:`tzinfo` object. 1305 1306 .. doctest:: 1307 :options: +NORMALIZE_WHITESPACE 1308 1309 >>> from dateutil.parser import parse 1310 >>> from dateutil.tz import gettz 1311 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 1312 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 1313 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 1314 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 1315 datetime.datetime(2012, 1, 19, 17, 21, 1316 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 1317 1318 This parameter is ignored if ``ignoretz`` is set. 1319 1320 :param dayfirst: 1321 Whether to interpret the first value in an ambiguous 3-integer date 1322 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 1323 ``yearfirst`` is set to ``True``, this distinguishes between YDM and 1324 YMD. If set to ``None``, this value is retrieved from the current 1325 :class:`parserinfo` object (which itself defaults to ``False``). 1326 1327 :param yearfirst: 1328 Whether to interpret the first value in an ambiguous 3-integer date 1329 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to 1330 be the year, otherwise the last number is taken to be the year. If 1331 this is set to ``None``, the value is retrieved from the current 1332 :class:`parserinfo` object (which itself defaults to ``False``). 1333 1334 :param fuzzy: 1335 Whether to allow fuzzy parsing, allowing for string like "Today is 1336 January 1, 2047 at 8:21:00AM". 1337 1338 :param fuzzy_with_tokens: 1339 If ``True``, ``fuzzy`` is automatically set to True, and the parser 1340 will return a tuple where the first element is the parsed 1341 :class:`datetime.datetime` datetimestamp and the second element is 1342 a tuple containing the portions of the string which were ignored: 1343 1344 .. doctest:: 1345 1346 >>> from dateutil.parser import parse 1347 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 1348 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 1349 1350 :return: 1351 Returns a :class:`datetime.datetime` object or, if the 1352 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 1353 first element being a :class:`datetime.datetime` object, the second 1354 a tuple containing the fuzzy tokens. 1355 1356 :raises ParserError: 1357 Raised for invalid or unknown string formats, if the provided 1358 :class:`tzinfo` is not in a valid format, or if an invalid date would 1359 be created. 1360 1361 :raises OverflowError: 1362 Raised if the parsed date exceeds the largest valid C integer on 1363 your system. 1364 """ 1365 if parserinfo: 1366 return parser(parserinfo).parse(timestr, **kwargs) 1367 else: 1368 return DEFAULTPARSER.parse(timestr, **kwargs) 1369 1370 1371class _tzparser(object): 1372 1373 class _result(_resultbase): 1374 1375 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", 1376 "start", "end"] 1377 1378 class _attr(_resultbase): 1379 __slots__ = ["month", "week", "weekday", 1380 "yday", "jyday", "day", "time"] 1381 1382 def __repr__(self): 1383 return self._repr("") 1384 1385 def __init__(self): 1386 _resultbase.__init__(self) 1387 self.start = self._attr() 1388 self.end = self._attr() 1389 1390 def parse(self, tzstr): 1391 res = self._result() 1392 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] 1393 used_idxs = list() 1394 try: 1395 1396 len_l = len(l) 1397 1398 i = 0 1399 while i < len_l: 1400 # BRST+3[BRDT[+2]] 1401 j = i 1402 while j < len_l and not [x for x in l[j] 1403 if x in "0123456789:,-+"]: 1404 j += 1 1405 if j != i: 1406 if not res.stdabbr: 1407 offattr = "stdoffset" 1408 res.stdabbr = "".join(l[i:j]) 1409 else: 1410 offattr = "dstoffset" 1411 res.dstabbr = "".join(l[i:j]) 1412 1413 for ii in range(j): 1414 used_idxs.append(ii) 1415 i = j 1416 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in 1417 "0123456789")): 1418 if l[i] in ('+', '-'): 1419 # Yes, that's right. See the TZ variable 1420 # documentation. 1421 signal = (1, -1)[l[i] == '+'] 1422 used_idxs.append(i) 1423 i += 1 1424 else: 1425 signal = -1 1426 len_li = len(l[i]) 1427 if len_li == 4: 1428 # -0300 1429 setattr(res, offattr, (int(l[i][:2]) * 3600 + 1430 int(l[i][2:]) * 60) * signal) 1431 elif i + 1 < len_l and l[i + 1] == ':': 1432 # -03:00 1433 setattr(res, offattr, 1434 (int(l[i]) * 3600 + 1435 int(l[i + 2]) * 60) * signal) 1436 used_idxs.append(i) 1437 i += 2 1438 elif len_li <= 2: 1439 # -[0]3 1440 setattr(res, offattr, 1441 int(l[i][:2]) * 3600 * signal) 1442 else: 1443 return None 1444 used_idxs.append(i) 1445 i += 1 1446 if res.dstabbr: 1447 break 1448 else: 1449 break 1450 1451 1452 if i < len_l: 1453 for j in range(i, len_l): 1454 if l[j] == ';': 1455 l[j] = ',' 1456 1457 assert l[i] == ',' 1458 1459 i += 1 1460 1461 if i >= len_l: 1462 pass 1463 elif (8 <= l.count(',') <= 9 and 1464 not [y for x in l[i:] if x != ',' 1465 for y in x if y not in "0123456789+-"]): 1466 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] 1467 for x in (res.start, res.end): 1468 x.month = int(l[i]) 1469 used_idxs.append(i) 1470 i += 2 1471 if l[i] == '-': 1472 value = int(l[i + 1]) * -1 1473 used_idxs.append(i) 1474 i += 1 1475 else: 1476 value = int(l[i]) 1477 used_idxs.append(i) 1478 i += 2 1479 if value: 1480 x.week = value 1481 x.weekday = (int(l[i]) - 1) % 7 1482 else: 1483 x.day = int(l[i]) 1484 used_idxs.append(i) 1485 i += 2 1486 x.time = int(l[i]) 1487 used_idxs.append(i) 1488 i += 2 1489 if i < len_l: 1490 if l[i] in ('-', '+'): 1491 signal = (-1, 1)[l[i] == "+"] 1492 used_idxs.append(i) 1493 i += 1 1494 else: 1495 signal = 1 1496 used_idxs.append(i) 1497 res.dstoffset = (res.stdoffset + int(l[i]) * signal) 1498 1499 # This was a made-up format that is not in normal use 1500 warn(('Parsed time zone "%s"' % tzstr) + 1501 'is in a non-standard dateutil-specific format, which ' + 1502 'is now deprecated; support for parsing this format ' + 1503 'will be removed in future versions. It is recommended ' + 1504 'that you switch to a standard format like the GNU ' + 1505 'TZ variable format.', tz.DeprecatedTzFormatWarning) 1506 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and 1507 not [y for x in l[i:] if x not in (',', '/', 'J', 'M', 1508 '.', '-', ':') 1509 for y in x if y not in "0123456789"]): 1510 for x in (res.start, res.end): 1511 if l[i] == 'J': 1512 # non-leap year day (1 based) 1513 used_idxs.append(i) 1514 i += 1 1515 x.jyday = int(l[i]) 1516 elif l[i] == 'M': 1517 # month[-.]week[-.]weekday 1518 used_idxs.append(i) 1519 i += 1 1520 x.month = int(l[i]) 1521 used_idxs.append(i) 1522 i += 1 1523 assert l[i] in ('-', '.') 1524 used_idxs.append(i) 1525 i += 1 1526 x.week = int(l[i]) 1527 if x.week == 5: 1528 x.week = -1 1529 used_idxs.append(i) 1530 i += 1 1531 assert l[i] in ('-', '.') 1532 used_idxs.append(i) 1533 i += 1 1534 x.weekday = (int(l[i]) - 1) % 7 1535 else: 1536 # year day (zero based) 1537 x.yday = int(l[i]) + 1 1538 1539 used_idxs.append(i) 1540 i += 1 1541 1542 if i < len_l and l[i] == '/': 1543 used_idxs.append(i) 1544 i += 1 1545 # start time 1546 len_li = len(l[i]) 1547 if len_li == 4: 1548 # -0300 1549 x.time = (int(l[i][:2]) * 3600 + 1550 int(l[i][2:]) * 60) 1551 elif i + 1 < len_l and l[i + 1] == ':': 1552 # -03:00 1553 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 1554 used_idxs.append(i) 1555 i += 2 1556 if i + 1 < len_l and l[i + 1] == ':': 1557 used_idxs.append(i) 1558 i += 2 1559 x.time += int(l[i]) 1560 elif len_li <= 2: 1561 # -[0]3 1562 x.time = (int(l[i][:2]) * 3600) 1563 else: 1564 return None 1565 used_idxs.append(i) 1566 i += 1 1567 1568 assert i == len_l or l[i] == ',' 1569 1570 i += 1 1571 1572 assert i >= len_l 1573 1574 except (IndexError, ValueError, AssertionError): 1575 return None 1576 1577 unused_idxs = set(range(len_l)).difference(used_idxs) 1578 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) 1579 return res 1580 1581 1582DEFAULTTZPARSER = _tzparser() 1583 1584 1585def _parsetz(tzstr): 1586 return DEFAULTTZPARSER.parse(tzstr) 1587 1588 1589class ParserError(ValueError): 1590 """Exception subclass used for any failure to parse a datetime string. 1591 1592 This is a subclass of :py:exc:`ValueError`, and should be raised any time 1593 earlier versions of ``dateutil`` would have raised ``ValueError``. 1594 1595 .. versionadded:: 2.8.1 1596 """ 1597 def __str__(self): 1598 try: 1599 return self.args[0] % self.args[1:] 1600 except (TypeError, IndexError): 1601 return super(ParserError, self).__str__() 1602 1603 def __repr__(self): 1604 args = ", ".join("'%s'" % arg for arg in self.args) 1605 return "%s(%s)" % (self.__class__.__name__, args) 1606 1607 1608class UnknownTimezoneWarning(RuntimeWarning): 1609 """Raised when the parser finds a timezone it cannot parse into a tzinfo. 1610 1611 .. versionadded:: 2.7.0 1612 """ 1613# vim:ts=4:sw=4:et 1614