1# -*- coding: utf-8 -*- 2""" 3This module offers a generic date/time string parser which is able to parse 4most known formats to represent a date and/or time. 5 6This module attempts to be forgiving with regards to unlikely input formats, 7returning a datetime object even for dates which are ambiguous. If an element 8of a date/time stamp is omitted, the following rules are applied: 9 10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour 11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is 12 specified. 13- If a time zone is omitted, a timezone-naive datetime is returned. 14 15If any other elements are missing, they are taken from the 16:class:`datetime.datetime` object passed to the parameter ``default``. If this 17results in a day number exceeding the valid number of days per month, the 18value falls back to the end of the month. 19 20Additional resources about date/time string formats can be found below: 21 22- `A summary of the international standard date and time notation 23 <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ 24- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_ 25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ 26- `CPAN ParseDate module 27 <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ 28- `Java SimpleDateFormat Class 29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ 30""" 31from __future__ import unicode_literals 32 33import datetime 34import re 35import string 36import time 37import warnings 38 39from calendar import monthrange 40from io import StringIO 41 42import six 43from six import binary_type, integer_types, text_type 44 45from decimal import Decimal 46 47from warnings import warn 48 49from .. import relativedelta 50from .. import tz 51 52__all__ = ["parse", "parserinfo"] 53 54 55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth 56# making public and/or figuring out if there is something we can 57# take off their plate. 58class _timelex(object): 59 # Fractional seconds are sometimes split by a comma 60 _split_decimal = re.compile("([.,])") 61 62 def __init__(self, instream): 63 if six.PY2: 64 # In Python 2, we can't duck type properly because unicode has 65 # a 'decode' function, and we'd be double-decoding 66 if isinstance(instream, (binary_type, bytearray)): 67 instream = instream.decode() 68 else: 69 if getattr(instream, 'decode', None) is not None: 70 instream = instream.decode() 71 72 if isinstance(instream, text_type): 73 instream = StringIO(instream) 74 elif getattr(instream, 'read', None) is None: 75 raise TypeError('Parser must be a string or character stream, not ' 76 '{itype}'.format(itype=instream.__class__.__name__)) 77 78 self.instream = instream 79 self.charstack = [] 80 self.tokenstack = [] 81 self.eof = False 82 83 def get_token(self): 84 """ 85 This function breaks the time string into lexical units (tokens), which 86 can be parsed by the parser. Lexical units are demarcated by changes in 87 the character set, so any continuous string of letters is considered 88 one unit, any continuous string of numbers is considered one unit. 89 90 The main complication arises from the fact that dots ('.') can be used 91 both as separators (e.g. "Sep.20.2009") or decimal points (e.g. 92 "4:30:21.447"). As such, it is necessary to read the full context of 93 any dot-separated strings before breaking it into tokens; as such, this 94 function maintains a "token stack", for when the ambiguous context 95 demands that multiple tokens be parsed at once. 96 """ 97 if self.tokenstack: 98 return self.tokenstack.pop(0) 99 100 seenletters = False 101 token = None 102 state = None 103 104 while not self.eof: 105 # We only realize that we've reached the end of a token when we 106 # find a character that's not part of the current token - since 107 # that character may be part of the next token, it's stored in the 108 # charstack. 109 if self.charstack: 110 nextchar = self.charstack.pop(0) 111 else: 112 nextchar = self.instream.read(1) 113 while nextchar == '\x00': 114 nextchar = self.instream.read(1) 115 116 if not nextchar: 117 self.eof = True 118 break 119 elif not state: 120 # First character of the token - determines if we're starting 121 # to parse a word, a number or something else. 122 token = nextchar 123 if self.isword(nextchar): 124 state = 'a' 125 elif self.isnum(nextchar): 126 state = '0' 127 elif self.isspace(nextchar): 128 token = ' ' 129 break # emit token 130 else: 131 break # emit token 132 elif state == 'a': 133 # If we've already started reading a word, we keep reading 134 # letters until we find something that's not part of a word. 135 seenletters = True 136 if self.isword(nextchar): 137 token += nextchar 138 elif nextchar == '.': 139 token += nextchar 140 state = 'a.' 141 else: 142 self.charstack.append(nextchar) 143 break # emit token 144 elif state == '0': 145 # If we've already started reading a number, we keep reading 146 # numbers until we find something that doesn't fit. 147 if self.isnum(nextchar): 148 token += nextchar 149 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): 150 token += nextchar 151 state = '0.' 152 else: 153 self.charstack.append(nextchar) 154 break # emit token 155 elif state == 'a.': 156 # If we've seen some letters and a dot separator, continue 157 # parsing, and the tokens will be broken up later. 158 seenletters = True 159 if nextchar == '.' or self.isword(nextchar): 160 token += nextchar 161 elif self.isnum(nextchar) and token[-1] == '.': 162 token += nextchar 163 state = '0.' 164 else: 165 self.charstack.append(nextchar) 166 break # emit token 167 elif state == '0.': 168 # If we've seen at least one dot separator, keep going, we'll 169 # break up the tokens later. 170 if nextchar == '.' or self.isnum(nextchar): 171 token += nextchar 172 elif self.isword(nextchar) and token[-1] == '.': 173 token += nextchar 174 state = 'a.' 175 else: 176 self.charstack.append(nextchar) 177 break # emit token 178 179 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or 180 token[-1] in '.,')): 181 l = self._split_decimal.split(token) 182 token = l[0] 183 for tok in l[1:]: 184 if tok: 185 self.tokenstack.append(tok) 186 187 if state == '0.' and token.count('.') == 0: 188 token = token.replace(',', '.') 189 190 return token 191 192 def __iter__(self): 193 return self 194 195 def __next__(self): 196 token = self.get_token() 197 if token is None: 198 raise StopIteration 199 200 return token 201 202 def next(self): 203 return self.__next__() # Python 2.x support 204 205 @classmethod 206 def split(cls, s): 207 return list(cls(s)) 208 209 @classmethod 210 def isword(cls, nextchar): 211 """ Whether or not the next character is part of a word """ 212 return nextchar.isalpha() 213 214 @classmethod 215 def isnum(cls, nextchar): 216 """ Whether the next character is part of a number """ 217 return nextchar.isdigit() 218 219 @classmethod 220 def isspace(cls, nextchar): 221 """ Whether the next character is whitespace """ 222 return nextchar.isspace() 223 224 225class _resultbase(object): 226 227 def __init__(self): 228 for attr in self.__slots__: 229 setattr(self, attr, None) 230 231 def _repr(self, classname): 232 l = [] 233 for attr in self.__slots__: 234 value = getattr(self, attr) 235 if value is not None: 236 l.append("%s=%s" % (attr, repr(value))) 237 return "%s(%s)" % (classname, ", ".join(l)) 238 239 def __len__(self): 240 return (sum(getattr(self, attr) is not None 241 for attr in self.__slots__)) 242 243 def __repr__(self): 244 return self._repr(self.__class__.__name__) 245 246 247class parserinfo(object): 248 """ 249 Class which handles what inputs are accepted. Subclass this to customize 250 the language and acceptable values for each parameter. 251 252 :param dayfirst: 253 Whether to interpret the first value in an ambiguous 3-integer date 254 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 255 ``yearfirst`` is set to ``True``, this distinguishes between YDM 256 and YMD. Default is ``False``. 257 258 :param yearfirst: 259 Whether to interpret the first value in an ambiguous 3-integer date 260 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 261 to be the year, otherwise the last number is taken to be the year. 262 Default is ``False``. 263 """ 264 265 # m from a.m/p.m, t from ISO T separator 266 JUMP = [" ", ".", ",", ";", "-", "/", "'", 267 "at", "on", "and", "ad", "m", "t", "of", 268 "st", "nd", "rd", "th"] 269 270 WEEKDAYS = [("Mon", "Monday"), 271 ("Tue", "Tuesday"), # TODO: "Tues" 272 ("Wed", "Wednesday"), 273 ("Thu", "Thursday"), # TODO: "Thurs" 274 ("Fri", "Friday"), 275 ("Sat", "Saturday"), 276 ("Sun", "Sunday")] 277 MONTHS = [("Jan", "January"), 278 ("Feb", "February"), # TODO: "Febr" 279 ("Mar", "March"), 280 ("Apr", "April"), 281 ("May", "May"), 282 ("Jun", "June"), 283 ("Jul", "July"), 284 ("Aug", "August"), 285 ("Sep", "Sept", "September"), 286 ("Oct", "October"), 287 ("Nov", "November"), 288 ("Dec", "December")] 289 HMS = [("h", "hour", "hours"), 290 ("m", "minute", "minutes"), 291 ("s", "second", "seconds")] 292 AMPM = [("am", "a"), 293 ("pm", "p")] 294 UTCZONE = ["UTC", "GMT", "Z"] 295 PERTAIN = ["of"] 296 TZOFFSET = {} 297 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", 298 # "Anno Domini", "Year of Our Lord"] 299 300 def __init__(self, dayfirst=False, yearfirst=False): 301 self._jump = self._convert(self.JUMP) 302 self._weekdays = self._convert(self.WEEKDAYS) 303 self._months = self._convert(self.MONTHS) 304 self._hms = self._convert(self.HMS) 305 self._ampm = self._convert(self.AMPM) 306 self._utczone = self._convert(self.UTCZONE) 307 self._pertain = self._convert(self.PERTAIN) 308 309 self.dayfirst = dayfirst 310 self.yearfirst = yearfirst 311 312 self._year = time.localtime().tm_year 313 self._century = self._year // 100 * 100 314 315 def _convert(self, lst): 316 dct = {} 317 for i, v in enumerate(lst): 318 if isinstance(v, tuple): 319 for v in v: 320 dct[v.lower()] = i 321 else: 322 dct[v.lower()] = i 323 return dct 324 325 def jump(self, name): 326 return name.lower() in self._jump 327 328 def weekday(self, name): 329 try: 330 return self._weekdays[name.lower()] 331 except KeyError: 332 pass 333 return None 334 335 def month(self, name): 336 try: 337 return self._months[name.lower()] + 1 338 except KeyError: 339 pass 340 return None 341 342 def hms(self, name): 343 try: 344 return self._hms[name.lower()] 345 except KeyError: 346 return None 347 348 def ampm(self, name): 349 try: 350 return self._ampm[name.lower()] 351 except KeyError: 352 return None 353 354 def pertain(self, name): 355 return name.lower() in self._pertain 356 357 def utczone(self, name): 358 return name.lower() in self._utczone 359 360 def tzoffset(self, name): 361 if name in self._utczone: 362 return 0 363 364 return self.TZOFFSET.get(name) 365 366 def convertyear(self, year, century_specified=False): 367 """ 368 Converts two-digit years to year within [-50, 49] 369 range of self._year (current local time) 370 """ 371 372 # Function contract is that the year is always positive 373 assert year >= 0 374 375 if year < 100 and not century_specified: 376 # assume current century to start 377 year += self._century 378 379 if year >= self._year + 50: # if too far in future 380 year -= 100 381 elif year < self._year - 50: # if too far in past 382 year += 100 383 384 return year 385 386 def validate(self, res): 387 # move to info 388 if res.year is not None: 389 res.year = self.convertyear(res.year, res.century_specified) 390 391 if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z': 392 res.tzname = "UTC" 393 res.tzoffset = 0 394 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): 395 res.tzoffset = 0 396 return True 397 398 399class _ymd(list): 400 def __init__(self, *args, **kwargs): 401 super(self.__class__, self).__init__(*args, **kwargs) 402 self.century_specified = False 403 self.dstridx = None 404 self.mstridx = None 405 self.ystridx = None 406 407 @property 408 def has_year(self): 409 return self.ystridx is not None 410 411 @property 412 def has_month(self): 413 return self.mstridx is not None 414 415 @property 416 def has_day(self): 417 return self.dstridx is not None 418 419 def could_be_day(self, value): 420 if self.has_day: 421 return False 422 elif not self.has_month: 423 return 1 <= value <= 31 424 elif not self.has_year: 425 # Be permissive, assume leapyear 426 month = self[self.mstridx] 427 return 1 <= value <= monthrange(2000, month)[1] 428 else: 429 month = self[self.mstridx] 430 year = self[self.ystridx] 431 return 1 <= value <= monthrange(year, month)[1] 432 433 def append(self, val, label=None): 434 if hasattr(val, '__len__'): 435 if val.isdigit() and len(val) > 2: 436 self.century_specified = True 437 if label not in [None, 'Y']: # pragma: no cover 438 raise ValueError(label) 439 label = 'Y' 440 elif val > 100: 441 self.century_specified = True 442 if label not in [None, 'Y']: # pragma: no cover 443 raise ValueError(label) 444 label = 'Y' 445 446 super(self.__class__, self).append(int(val)) 447 448 if label == 'M': 449 if self.has_month: 450 raise ValueError('Month is already set') 451 self.mstridx = len(self) - 1 452 elif label == 'D': 453 if self.has_day: 454 raise ValueError('Day is already set') 455 self.dstridx = len(self) - 1 456 elif label == 'Y': 457 if self.has_year: 458 raise ValueError('Year is already set') 459 self.ystridx = len(self) - 1 460 461 def _resolve_from_stridxs(self, strids): 462 """ 463 Try to resolve the identities of year/month/day elements using 464 ystridx, mstridx, and dstridx, if enough of these are specified. 465 """ 466 if len(self) == 3 and len(strids) == 2: 467 # we can back out the remaining stridx value 468 missing = [x for x in range(3) if x not in strids.values()] 469 key = [x for x in ['y', 'm', 'd'] if x not in strids] 470 assert len(missing) == len(key) == 1 471 key = key[0] 472 val = missing[0] 473 strids[key] = val 474 475 assert len(self) == len(strids) # otherwise this should not be called 476 out = {key: self[strids[key]] for key in strids} 477 return (out.get('y'), out.get('m'), out.get('d')) 478 479 def resolve_ymd(self, yearfirst, dayfirst): 480 len_ymd = len(self) 481 year, month, day = (None, None, None) 482 483 strids = (('y', self.ystridx), 484 ('m', self.mstridx), 485 ('d', self.dstridx)) 486 487 strids = {key: val for key, val in strids if val is not None} 488 if (len(self) == len(strids) > 0 or 489 (len(self) == 3 and len(strids) == 2)): 490 return self._resolve_from_stridxs(strids) 491 492 mstridx = self.mstridx 493 494 if len_ymd > 3: 495 raise ValueError("More than three YMD values") 496 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): 497 # One member, or two members with a month string 498 if mstridx is not None: 499 month = self[mstridx] 500 # since mstridx is 0 or 1, self[mstridx-1] always 501 # looks up the other element 502 other = self[mstridx - 1] 503 else: 504 other = self[0] 505 506 if len_ymd > 1 or mstridx is None: 507 if other > 31: 508 year = other 509 else: 510 day = other 511 512 elif len_ymd == 2: 513 # Two members with numbers 514 if self[0] > 31: 515 # 99-01 516 year, month = self 517 elif self[1] > 31: 518 # 01-99 519 month, year = self 520 elif dayfirst and self[1] <= 12: 521 # 13-01 522 day, month = self 523 else: 524 # 01-13 525 month, day = self 526 527 elif len_ymd == 3: 528 # Three members 529 if mstridx == 0: 530 if self[1] > 31: 531 # Apr-2003-25 532 month, year, day = self 533 else: 534 month, day, year = self 535 elif mstridx == 1: 536 if self[0] > 31 or (yearfirst and self[2] <= 31): 537 # 99-Jan-01 538 year, month, day = self 539 else: 540 # 01-Jan-01 541 # Give precendence to day-first, since 542 # two-digit years is usually hand-written. 543 day, month, year = self 544 545 elif mstridx == 2: 546 # WTF!? 547 if self[1] > 31: 548 # 01-99-Jan 549 day, year, month = self 550 else: 551 # 99-01-Jan 552 year, day, month = self 553 554 else: 555 if (self[0] > 31 or 556 self.ystridx == 0 or 557 (yearfirst and self[1] <= 12 and self[2] <= 31)): 558 # 99-01-01 559 if dayfirst and self[2] <= 12: 560 year, day, month = self 561 else: 562 year, month, day = self 563 elif self[0] > 12 or (dayfirst and self[1] <= 12): 564 # 13-01-01 565 day, month, year = self 566 else: 567 # 01-13-01 568 month, day, year = self 569 570 return year, month, day 571 572 573class parser(object): 574 def __init__(self, info=None): 575 self.info = info or parserinfo() 576 577 def parse(self, timestr, default=None, 578 ignoretz=False, tzinfos=None, **kwargs): 579 """ 580 Parse the date/time string into a :class:`datetime.datetime` object. 581 582 :param timestr: 583 Any date/time string using the supported formats. 584 585 :param default: 586 The default datetime object, if this is a datetime object and not 587 ``None``, elements specified in ``timestr`` replace elements in the 588 default object. 589 590 :param ignoretz: 591 If set ``True``, time zones in parsed strings are ignored and a 592 naive :class:`datetime.datetime` object is returned. 593 594 :param tzinfos: 595 Additional time zone names / aliases which may be present in the 596 string. This argument maps time zone names (and optionally offsets 597 from those time zones) to time zones. This parameter can be a 598 dictionary with timezone aliases mapping time zone names to time 599 zones or a function taking two parameters (``tzname`` and 600 ``tzoffset``) and returning a time zone. 601 602 The timezones to which the names are mapped can be an integer 603 offset from UTC in seconds or a :class:`tzinfo` object. 604 605 .. doctest:: 606 :options: +NORMALIZE_WHITESPACE 607 608 >>> from dateutil.parser import parse 609 >>> from dateutil.tz import gettz 610 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 611 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 612 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 613 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 614 datetime.datetime(2012, 1, 19, 17, 21, 615 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 616 617 This parameter is ignored if ``ignoretz`` is set. 618 619 :param \\*\\*kwargs: 620 Keyword arguments as passed to ``_parse()``. 621 622 :return: 623 Returns a :class:`datetime.datetime` object or, if the 624 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 625 first element being a :class:`datetime.datetime` object, the second 626 a tuple containing the fuzzy tokens. 627 628 :raises ValueError: 629 Raised for invalid or unknown string format, if the provided 630 :class:`tzinfo` is not in a valid format, or if an invalid date 631 would be created. 632 633 :raises TypeError: 634 Raised for non-string or character stream input. 635 636 :raises OverflowError: 637 Raised if the parsed date exceeds the largest valid C integer on 638 your system. 639 """ 640 641 if default is None: 642 default = datetime.datetime.now().replace(hour=0, minute=0, 643 second=0, microsecond=0) 644 645 res, skipped_tokens = self._parse(timestr, **kwargs) 646 647 if res is None: 648 raise ValueError("Unknown string format:", timestr) 649 650 if len(res) == 0: 651 raise ValueError("String does not contain a date:", timestr) 652 653 ret = self._build_naive(res, default) 654 655 if not ignoretz: 656 ret = self._build_tzaware(ret, res, tzinfos) 657 658 if kwargs.get('fuzzy_with_tokens', False): 659 return ret, skipped_tokens 660 else: 661 return ret 662 663 class _result(_resultbase): 664 __slots__ = ["year", "month", "day", "weekday", 665 "hour", "minute", "second", "microsecond", 666 "tzname", "tzoffset", "ampm","any_unused_tokens"] 667 668 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, 669 fuzzy_with_tokens=False): 670 """ 671 Private method which performs the heavy lifting of parsing, called from 672 ``parse()``, which passes on its ``kwargs`` to this function. 673 674 :param timestr: 675 The string to parse. 676 677 :param dayfirst: 678 Whether to interpret the first value in an ambiguous 3-integer date 679 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 680 ``yearfirst`` is set to ``True``, this distinguishes between YDM 681 and YMD. If set to ``None``, this value is retrieved from the 682 current :class:`parserinfo` object (which itself defaults to 683 ``False``). 684 685 :param yearfirst: 686 Whether to interpret the first value in an ambiguous 3-integer date 687 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 688 to be the year, otherwise the last number is taken to be the year. 689 If this is set to ``None``, the value is retrieved from the current 690 :class:`parserinfo` object (which itself defaults to ``False``). 691 692 :param fuzzy: 693 Whether to allow fuzzy parsing, allowing for string like "Today is 694 January 1, 2047 at 8:21:00AM". 695 696 :param fuzzy_with_tokens: 697 If ``True``, ``fuzzy`` is automatically set to True, and the parser 698 will return a tuple where the first element is the parsed 699 :class:`datetime.datetime` datetimestamp and the second element is 700 a tuple containing the portions of the string which were ignored: 701 702 .. doctest:: 703 704 >>> from dateutil.parser import parse 705 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 706 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 707 708 """ 709 if fuzzy_with_tokens: 710 fuzzy = True 711 712 info = self.info 713 714 if dayfirst is None: 715 dayfirst = info.dayfirst 716 717 if yearfirst is None: 718 yearfirst = info.yearfirst 719 720 res = self._result() 721 l = _timelex.split(timestr) # Splits the timestr into tokens 722 723 skipped_idxs = [] 724 725 # year/month/day list 726 ymd = _ymd() 727 728 len_l = len(l) 729 i = 0 730 try: 731 while i < len_l: 732 733 # Check if it's a number 734 value_repr = l[i] 735 try: 736 value = float(value_repr) 737 except ValueError: 738 value = None 739 740 if value is not None: 741 # Numeric token 742 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) 743 744 # Check weekday 745 elif info.weekday(l[i]) is not None: 746 value = info.weekday(l[i]) 747 res.weekday = value 748 749 # Check month name 750 elif info.month(l[i]) is not None: 751 value = info.month(l[i]) 752 ymd.append(value, 'M') 753 754 if i + 1 < len_l: 755 if l[i + 1] in ('-', '/'): 756 # Jan-01[-99] 757 sep = l[i + 1] 758 ymd.append(l[i + 2]) 759 760 if i + 3 < len_l and l[i + 3] == sep: 761 # Jan-01-99 762 ymd.append(l[i + 4]) 763 i += 2 764 765 i += 2 766 767 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and 768 info.pertain(l[i + 2])): 769 # Jan of 01 770 # In this case, 01 is clearly year 771 if l[i + 4].isdigit(): 772 # Convert it here to become unambiguous 773 value = int(l[i + 4]) 774 year = str(info.convertyear(value)) 775 ymd.append(year, 'Y') 776 else: 777 # Wrong guess 778 pass 779 # TODO: not hit in tests 780 i += 4 781 782 # Check am/pm 783 elif info.ampm(l[i]) is not None: 784 value = info.ampm(l[i]) 785 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) 786 787 if val_is_ampm: 788 res.hour = self._adjust_ampm(res.hour, value) 789 res.ampm = value 790 791 elif fuzzy: 792 skipped_idxs.append(i) 793 794 # Check for a timezone name 795 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): 796 res.tzname = l[i] 797 res.tzoffset = info.tzoffset(res.tzname) 798 799 # Check for something like GMT+3, or BRST+3. Notice 800 # that it doesn't mean "I am 3 hours after GMT", but 801 # "my time +3 is GMT". If found, we reverse the 802 # logic so that timezone parsing code will get it 803 # right. 804 if i + 1 < len_l and l[i + 1] in ('+', '-'): 805 l[i + 1] = ('+', '-')[l[i + 1] == '+'] 806 res.tzoffset = None 807 if info.utczone(res.tzname): 808 # With something like GMT+3, the timezone 809 # is *not* GMT. 810 res.tzname = None 811 812 # Check for a numbered timezone 813 elif res.hour is not None and l[i] in ('+', '-'): 814 signal = (-1, 1)[l[i] == '+'] 815 len_li = len(l[i + 1]) 816 817 # TODO: check that l[i + 1] is integer? 818 if len_li == 4: 819 # -0300 820 hour_offset = int(l[i + 1][:2]) 821 min_offset = int(l[i + 1][2:]) 822 elif i + 2 < len_l and l[i + 2] == ':': 823 # -03:00 824 hour_offset = int(l[i + 1]) 825 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? 826 i += 2 827 elif len_li <= 2: 828 # -[0]3 829 hour_offset = int(l[i + 1][:2]) 830 min_offset = 0 831 else: 832 raise ValueError(timestr) 833 834 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) 835 836 # Look for a timezone name between parenthesis 837 if (i + 5 < len_l and 838 info.jump(l[i + 2]) and l[i + 3] == '(' and 839 l[i + 5] == ')' and 840 3 <= len(l[i + 4]) and 841 self._could_be_tzname(res.hour, res.tzname, 842 None, l[i + 4])): 843 # -0300 (BRST) 844 res.tzname = l[i + 4] 845 i += 4 846 847 i += 1 848 849 # Check jumps 850 elif not (info.jump(l[i]) or fuzzy): 851 raise ValueError(timestr) 852 853 else: 854 skipped_idxs.append(i) 855 i += 1 856 857 # Process year/month/day 858 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) 859 860 res.century_specified = ymd.century_specified 861 res.year = year 862 res.month = month 863 res.day = day 864 865 except (IndexError, ValueError): 866 return None, None 867 868 if not info.validate(res): 869 return None, None 870 871 if fuzzy_with_tokens: 872 skipped_tokens = self._recombine_skipped(l, skipped_idxs) 873 return res, tuple(skipped_tokens) 874 else: 875 return res, None 876 877 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): 878 # Token is a number 879 value_repr = tokens[idx] 880 try: 881 value = self._to_decimal(value_repr) 882 except Exception as e: 883 six.raise_from(ValueError('Unknown numeric token'), e) 884 885 len_li = len(value_repr) 886 887 len_l = len(tokens) 888 889 if (len(ymd) == 3 and len_li in (2, 4) and 890 res.hour is None and 891 (idx + 1 >= len_l or 892 (tokens[idx + 1] != ':' and 893 info.hms(tokens[idx + 1]) is None))): 894 # 19990101T23[59] 895 s = tokens[idx] 896 res.hour = int(s[:2]) 897 898 if len_li == 4: 899 res.minute = int(s[2:]) 900 901 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): 902 # YYMMDD or HHMMSS[.ss] 903 s = tokens[idx] 904 905 if not ymd and '.' not in tokens[idx]: 906 ymd.append(s[:2]) 907 ymd.append(s[2:4]) 908 ymd.append(s[4:]) 909 else: 910 # 19990101T235959[.59] 911 912 # TODO: Check if res attributes already set. 913 res.hour = int(s[:2]) 914 res.minute = int(s[2:4]) 915 res.second, res.microsecond = self._parsems(s[4:]) 916 917 elif len_li in (8, 12, 14): 918 # YYYYMMDD 919 s = tokens[idx] 920 ymd.append(s[:4], 'Y') 921 ymd.append(s[4:6]) 922 ymd.append(s[6:8]) 923 924 if len_li > 8: 925 res.hour = int(s[8:10]) 926 res.minute = int(s[10:12]) 927 928 if len_li > 12: 929 res.second = int(s[12:]) 930 931 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: 932 # HH[ ]h or MM[ ]m or SS[.ss][ ]s 933 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) 934 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) 935 if hms is not None: 936 # TODO: checking that hour/minute/second are not 937 # already set? 938 self._assign_hms(res, value_repr, hms) 939 940 elif idx + 2 < len_l and tokens[idx + 1] == ':': 941 # HH:MM[:SS[.ss]] 942 res.hour = int(value) 943 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? 944 (res.minute, res.second) = self._parse_min_sec(value) 945 946 if idx + 4 < len_l and tokens[idx + 3] == ':': 947 res.second, res.microsecond = self._parsems(tokens[idx + 4]) 948 949 idx += 2 950 951 idx += 2 952 953 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): 954 sep = tokens[idx + 1] 955 ymd.append(value_repr) 956 957 if idx + 2 < len_l and not info.jump(tokens[idx + 2]): 958 if tokens[idx + 2].isdigit(): 959 # 01-01[-01] 960 ymd.append(tokens[idx + 2]) 961 else: 962 # 01-Jan[-01] 963 value = info.month(tokens[idx + 2]) 964 965 if value is not None: 966 ymd.append(value, 'M') 967 else: 968 raise ValueError() 969 970 if idx + 3 < len_l and tokens[idx + 3] == sep: 971 # We have three members 972 value = info.month(tokens[idx + 4]) 973 974 if value is not None: 975 ymd.append(value, 'M') 976 else: 977 ymd.append(tokens[idx + 4]) 978 idx += 2 979 980 idx += 1 981 idx += 1 982 983 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): 984 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: 985 # 12 am 986 hour = int(value) 987 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) 988 idx += 1 989 else: 990 # Year, month or day 991 ymd.append(value) 992 idx += 1 993 994 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): 995 # 12am 996 hour = int(value) 997 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) 998 idx += 1 999 1000 elif ymd.could_be_day(value): 1001 ymd.append(value) 1002 1003 elif not fuzzy: 1004 raise ValueError() 1005 1006 return idx 1007 1008 def _find_hms_idx(self, idx, tokens, info, allow_jump): 1009 len_l = len(tokens) 1010 1011 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: 1012 # There is an "h", "m", or "s" label following this token. We take 1013 # assign the upcoming label to the current token. 1014 # e.g. the "12" in 12h" 1015 hms_idx = idx + 1 1016 1017 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and 1018 info.hms(tokens[idx+2]) is not None): 1019 # There is a space and then an "h", "m", or "s" label. 1020 # e.g. the "12" in "12 h" 1021 hms_idx = idx + 2 1022 1023 elif idx > 0 and info.hms(tokens[idx-1]) is not None: 1024 # There is a "h", "m", or "s" preceeding this token. Since neither 1025 # of the previous cases was hit, there is no label following this 1026 # token, so we use the previous label. 1027 # e.g. the "04" in "12h04" 1028 hms_idx = idx-1 1029 1030 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and 1031 info.hms(tokens[idx-2]) is not None): 1032 # If we are looking at the final token, we allow for a 1033 # backward-looking check to skip over a space. 1034 # TODO: Are we sure this is the right condition here? 1035 hms_idx = idx - 2 1036 1037 else: 1038 hms_idx = None 1039 1040 return hms_idx 1041 1042 def _assign_hms(self, res, value_repr, hms): 1043 # See GH issue #427, fixing float rounding 1044 value = self._to_decimal(value_repr) 1045 1046 if hms == 0: 1047 # Hour 1048 res.hour = int(value) 1049 if value % 1: 1050 res.minute = int(60*(value % 1)) 1051 1052 elif hms == 1: 1053 (res.minute, res.second) = self._parse_min_sec(value) 1054 1055 elif hms == 2: 1056 (res.second, res.microsecond) = self._parsems(value_repr) 1057 1058 def _could_be_tzname(self, hour, tzname, tzoffset, token): 1059 return (hour is not None and 1060 tzname is None and 1061 tzoffset is None and 1062 len(token) <= 5 and 1063 all(x in string.ascii_uppercase for x in token)) 1064 1065 def _ampm_valid(self, hour, ampm, fuzzy): 1066 """ 1067 For fuzzy parsing, 'a' or 'am' (both valid English words) 1068 may erroneously trigger the AM/PM flag. Deal with that 1069 here. 1070 """ 1071 val_is_ampm = True 1072 1073 # If there's already an AM/PM flag, this one isn't one. 1074 if fuzzy and ampm is not None: 1075 val_is_ampm = False 1076 1077 # If AM/PM is found and hour is not, raise a ValueError 1078 if hour is None: 1079 if fuzzy: 1080 val_is_ampm = False 1081 else: 1082 raise ValueError('No hour specified with AM or PM flag.') 1083 elif not 0 <= hour <= 12: 1084 # If AM/PM is found, it's a 12 hour clock, so raise 1085 # an error for invalid range 1086 if fuzzy: 1087 val_is_ampm = False 1088 else: 1089 raise ValueError('Invalid hour specified for 12-hour clock.') 1090 1091 return val_is_ampm 1092 1093 def _adjust_ampm(self, hour, ampm): 1094 if hour < 12 and ampm == 1: 1095 hour += 12 1096 elif hour == 12 and ampm == 0: 1097 hour = 0 1098 return hour 1099 1100 def _parse_min_sec(self, value): 1101 # TODO: Every usage of this function sets res.second to the return 1102 # value. Are there any cases where second will be returned as None and 1103 # we *dont* want to set res.second = None? 1104 minute = int(value) 1105 second = None 1106 1107 sec_remainder = value % 1 1108 if sec_remainder: 1109 second = int(60 * sec_remainder) 1110 return (minute, second) 1111 1112 def _parsems(self, value): 1113 """Parse a I[.F] seconds value into (seconds, microseconds).""" 1114 if "." not in value: 1115 return int(value), 0 1116 else: 1117 i, f = value.split(".") 1118 return int(i), int(f.ljust(6, "0")[:6]) 1119 1120 def _parse_hms(self, idx, tokens, info, hms_idx): 1121 # TODO: Is this going to admit a lot of false-positives for when we 1122 # just happen to have digits and "h", "m" or "s" characters in non-date 1123 # text? I guess hex hashes won't have that problem, but there's plenty 1124 # of random junk out there. 1125 if hms_idx is None: 1126 hms = None 1127 new_idx = idx 1128 elif hms_idx > idx: 1129 hms = info.hms(tokens[hms_idx]) 1130 new_idx = hms_idx 1131 else: 1132 # Looking backwards, increment one. 1133 hms = info.hms(tokens[hms_idx]) + 1 1134 new_idx = idx 1135 1136 return (new_idx, hms) 1137 1138 def _recombine_skipped(self, tokens, skipped_idxs): 1139 """ 1140 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] 1141 >>> skipped_idxs = [0, 1, 2, 5] 1142 >>> _recombine_skipped(tokens, skipped_idxs) 1143 ["foo bar", "baz"] 1144 """ 1145 skipped_tokens = [] 1146 for i, idx in enumerate(sorted(skipped_idxs)): 1147 if i > 0 and idx - 1 == skipped_idxs[i - 1]: 1148 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] 1149 else: 1150 skipped_tokens.append(tokens[idx]) 1151 1152 return skipped_tokens 1153 1154 def _build_tzinfo(self, tzinfos, tzname, tzoffset): 1155 if callable(tzinfos): 1156 tzdata = tzinfos(tzname, tzoffset) 1157 else: 1158 tzdata = tzinfos.get(tzname) 1159 # handle case where tzinfo is paased an options that returns None 1160 # eg tzinfos = {'BRST' : None} 1161 if isinstance(tzdata, datetime.tzinfo) or tzdata is None: 1162 tzinfo = tzdata 1163 elif isinstance(tzdata, text_type): 1164 tzinfo = tz.tzstr(tzdata) 1165 elif isinstance(tzdata, integer_types): 1166 tzinfo = tz.tzoffset(tzname, tzdata) 1167 return tzinfo 1168 1169 def _build_tzaware(self, naive, res, tzinfos): 1170 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): 1171 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) 1172 aware = naive.replace(tzinfo=tzinfo) 1173 aware = self._assign_tzname(aware, res.tzname) 1174 1175 elif res.tzname and res.tzname in time.tzname: 1176 aware = naive.replace(tzinfo=tz.tzlocal()) 1177 1178 # Handle ambiguous local datetime 1179 aware = self._assign_tzname(aware, res.tzname) 1180 1181 # This is mostly relevant for winter GMT zones parsed in the UK 1182 if (aware.tzname() != res.tzname and 1183 res.tzname in self.info.UTCZONE): 1184 aware = aware.replace(tzinfo=tz.tzutc()) 1185 1186 elif res.tzoffset == 0: 1187 aware = naive.replace(tzinfo=tz.tzutc()) 1188 1189 elif res.tzoffset: 1190 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) 1191 1192 elif not res.tzname and not res.tzoffset: 1193 # i.e. no timezone information was found. 1194 aware = naive 1195 1196 elif res.tzname: 1197 # tz-like string was parsed but we don't know what to do 1198 # with it 1199 warnings.warn("tzname {tzname} identified but not understood. " 1200 "Pass `tzinfos` argument in order to correctly " 1201 "return a timezone-aware datetime. In a future " 1202 "version, this will raise an " 1203 "exception.".format(tzname=res.tzname), 1204 category=UnknownTimezoneWarning) 1205 aware = naive 1206 1207 return aware 1208 1209 def _build_naive(self, res, default): 1210 repl = {} 1211 for attr in ("year", "month", "day", "hour", 1212 "minute", "second", "microsecond"): 1213 value = getattr(res, attr) 1214 if value is not None: 1215 repl[attr] = value 1216 1217 if 'day' not in repl: 1218 # If the default day exceeds the last day of the month, fall back 1219 # to the end of the month. 1220 cyear = default.year if res.year is None else res.year 1221 cmonth = default.month if res.month is None else res.month 1222 cday = default.day if res.day is None else res.day 1223 1224 if cday > monthrange(cyear, cmonth)[1]: 1225 repl['day'] = monthrange(cyear, cmonth)[1] 1226 1227 naive = default.replace(**repl) 1228 1229 if res.weekday is not None and not res.day: 1230 naive = naive + relativedelta.relativedelta(weekday=res.weekday) 1231 1232 return naive 1233 1234 def _assign_tzname(self, dt, tzname): 1235 if dt.tzname() != tzname: 1236 new_dt = tz.enfold(dt, fold=1) 1237 if new_dt.tzname() == tzname: 1238 return new_dt 1239 1240 return dt 1241 1242 def _to_decimal(self, val): 1243 try: 1244 decimal_value = Decimal(val) 1245 # See GH 662, edge case, infinite value should not be converted via `_to_decimal` 1246 if not decimal_value.is_finite(): 1247 raise ValueError("Converted decimal value is infinite or NaN") 1248 except Exception as e: 1249 msg = "Could not convert %s to decimal" % val 1250 six.raise_from(ValueError(msg), e) 1251 else: 1252 return decimal_value 1253 1254 1255DEFAULTPARSER = parser() 1256 1257 1258def parse(timestr, parserinfo=None, **kwargs): 1259 """ 1260 1261 Parse a string in one of the supported formats, using the 1262 ``parserinfo`` parameters. 1263 1264 :param timestr: 1265 A string containing a date/time stamp. 1266 1267 :param parserinfo: 1268 A :class:`parserinfo` object containing parameters for the parser. 1269 If ``None``, the default arguments to the :class:`parserinfo` 1270 constructor are used. 1271 1272 The ``**kwargs`` parameter takes the following keyword arguments: 1273 1274 :param default: 1275 The default datetime object, if this is a datetime object and not 1276 ``None``, elements specified in ``timestr`` replace elements in the 1277 default object. 1278 1279 :param ignoretz: 1280 If set ``True``, time zones in parsed strings are ignored and a naive 1281 :class:`datetime` object is returned. 1282 1283 :param tzinfos: 1284 Additional time zone names / aliases which may be present in the 1285 string. This argument maps time zone names (and optionally offsets 1286 from those time zones) to time zones. This parameter can be a 1287 dictionary with timezone aliases mapping time zone names to time 1288 zones or a function taking two parameters (``tzname`` and 1289 ``tzoffset``) and returning a time zone. 1290 1291 The timezones to which the names are mapped can be an integer 1292 offset from UTC in seconds or a :class:`tzinfo` object. 1293 1294 .. doctest:: 1295 :options: +NORMALIZE_WHITESPACE 1296 1297 >>> from dateutil.parser import parse 1298 >>> from dateutil.tz import gettz 1299 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 1300 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 1301 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 1302 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 1303 datetime.datetime(2012, 1, 19, 17, 21, 1304 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 1305 1306 This parameter is ignored if ``ignoretz`` is set. 1307 1308 :param dayfirst: 1309 Whether to interpret the first value in an ambiguous 3-integer date 1310 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 1311 ``yearfirst`` is set to ``True``, this distinguishes between YDM and 1312 YMD. If set to ``None``, this value is retrieved from the current 1313 :class:`parserinfo` object (which itself defaults to ``False``). 1314 1315 :param yearfirst: 1316 Whether to interpret the first value in an ambiguous 3-integer date 1317 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to 1318 be the year, otherwise the last number is taken to be the year. If 1319 this is set to ``None``, the value is retrieved from the current 1320 :class:`parserinfo` object (which itself defaults to ``False``). 1321 1322 :param fuzzy: 1323 Whether to allow fuzzy parsing, allowing for string like "Today is 1324 January 1, 2047 at 8:21:00AM". 1325 1326 :param fuzzy_with_tokens: 1327 If ``True``, ``fuzzy`` is automatically set to True, and the parser 1328 will return a tuple where the first element is the parsed 1329 :class:`datetime.datetime` datetimestamp and the second element is 1330 a tuple containing the portions of the string which were ignored: 1331 1332 .. doctest:: 1333 1334 >>> from dateutil.parser import parse 1335 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 1336 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 1337 1338 :return: 1339 Returns a :class:`datetime.datetime` object or, if the 1340 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 1341 first element being a :class:`datetime.datetime` object, the second 1342 a tuple containing the fuzzy tokens. 1343 1344 :raises ValueError: 1345 Raised for invalid or unknown string format, if the provided 1346 :class:`tzinfo` is not in a valid format, or if an invalid date 1347 would be created. 1348 1349 :raises OverflowError: 1350 Raised if the parsed date exceeds the largest valid C integer on 1351 your system. 1352 """ 1353 if parserinfo: 1354 return parser(parserinfo).parse(timestr, **kwargs) 1355 else: 1356 return DEFAULTPARSER.parse(timestr, **kwargs) 1357 1358 1359class _tzparser(object): 1360 1361 class _result(_resultbase): 1362 1363 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", 1364 "start", "end"] 1365 1366 class _attr(_resultbase): 1367 __slots__ = ["month", "week", "weekday", 1368 "yday", "jyday", "day", "time"] 1369 1370 def __repr__(self): 1371 return self._repr("") 1372 1373 def __init__(self): 1374 _resultbase.__init__(self) 1375 self.start = self._attr() 1376 self.end = self._attr() 1377 1378 def parse(self, tzstr): 1379 res = self._result() 1380 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] 1381 used_idxs = list() 1382 try: 1383 1384 len_l = len(l) 1385 1386 i = 0 1387 while i < len_l: 1388 # BRST+3[BRDT[+2]] 1389 j = i 1390 while j < len_l and not [x for x in l[j] 1391 if x in "0123456789:,-+"]: 1392 j += 1 1393 if j != i: 1394 if not res.stdabbr: 1395 offattr = "stdoffset" 1396 res.stdabbr = "".join(l[i:j]) 1397 else: 1398 offattr = "dstoffset" 1399 res.dstabbr = "".join(l[i:j]) 1400 1401 for ii in range(j): 1402 used_idxs.append(ii) 1403 i = j 1404 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in 1405 "0123456789")): 1406 if l[i] in ('+', '-'): 1407 # Yes, that's right. See the TZ variable 1408 # documentation. 1409 signal = (1, -1)[l[i] == '+'] 1410 used_idxs.append(i) 1411 i += 1 1412 else: 1413 signal = -1 1414 len_li = len(l[i]) 1415 if len_li == 4: 1416 # -0300 1417 setattr(res, offattr, (int(l[i][:2]) * 3600 + 1418 int(l[i][2:]) * 60) * signal) 1419 elif i + 1 < len_l and l[i + 1] == ':': 1420 # -03:00 1421 setattr(res, offattr, 1422 (int(l[i]) * 3600 + 1423 int(l[i + 2]) * 60) * signal) 1424 used_idxs.append(i) 1425 i += 2 1426 elif len_li <= 2: 1427 # -[0]3 1428 setattr(res, offattr, 1429 int(l[i][:2]) * 3600 * signal) 1430 else: 1431 return None 1432 used_idxs.append(i) 1433 i += 1 1434 if res.dstabbr: 1435 break 1436 else: 1437 break 1438 1439 1440 if i < len_l: 1441 for j in range(i, len_l): 1442 if l[j] == ';': 1443 l[j] = ',' 1444 1445 assert l[i] == ',' 1446 1447 i += 1 1448 1449 if i >= len_l: 1450 pass 1451 elif (8 <= l.count(',') <= 9 and 1452 not [y for x in l[i:] if x != ',' 1453 for y in x if y not in "0123456789+-"]): 1454 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] 1455 for x in (res.start, res.end): 1456 x.month = int(l[i]) 1457 used_idxs.append(i) 1458 i += 2 1459 if l[i] == '-': 1460 value = int(l[i + 1]) * -1 1461 used_idxs.append(i) 1462 i += 1 1463 else: 1464 value = int(l[i]) 1465 used_idxs.append(i) 1466 i += 2 1467 if value: 1468 x.week = value 1469 x.weekday = (int(l[i]) - 1) % 7 1470 else: 1471 x.day = int(l[i]) 1472 used_idxs.append(i) 1473 i += 2 1474 x.time = int(l[i]) 1475 used_idxs.append(i) 1476 i += 2 1477 if i < len_l: 1478 if l[i] in ('-', '+'): 1479 signal = (-1, 1)[l[i] == "+"] 1480 used_idxs.append(i) 1481 i += 1 1482 else: 1483 signal = 1 1484 used_idxs.append(i) 1485 res.dstoffset = (res.stdoffset + int(l[i]) * signal) 1486 1487 # This was a made-up format that is not in normal use 1488 warn(('Parsed time zone "%s"' % tzstr) + 1489 'is in a non-standard dateutil-specific format, which ' + 1490 'is now deprecated; support for parsing this format ' + 1491 'will be removed in future versions. It is recommended ' + 1492 'that you switch to a standard format like the GNU ' + 1493 'TZ variable format.', tz.DeprecatedTzFormatWarning) 1494 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and 1495 not [y for x in l[i:] if x not in (',', '/', 'J', 'M', 1496 '.', '-', ':') 1497 for y in x if y not in "0123456789"]): 1498 for x in (res.start, res.end): 1499 if l[i] == 'J': 1500 # non-leap year day (1 based) 1501 used_idxs.append(i) 1502 i += 1 1503 x.jyday = int(l[i]) 1504 elif l[i] == 'M': 1505 # month[-.]week[-.]weekday 1506 used_idxs.append(i) 1507 i += 1 1508 x.month = int(l[i]) 1509 used_idxs.append(i) 1510 i += 1 1511 assert l[i] in ('-', '.') 1512 used_idxs.append(i) 1513 i += 1 1514 x.week = int(l[i]) 1515 if x.week == 5: 1516 x.week = -1 1517 used_idxs.append(i) 1518 i += 1 1519 assert l[i] in ('-', '.') 1520 used_idxs.append(i) 1521 i += 1 1522 x.weekday = (int(l[i]) - 1) % 7 1523 else: 1524 # year day (zero based) 1525 x.yday = int(l[i]) + 1 1526 1527 used_idxs.append(i) 1528 i += 1 1529 1530 if i < len_l and l[i] == '/': 1531 used_idxs.append(i) 1532 i += 1 1533 # start time 1534 len_li = len(l[i]) 1535 if len_li == 4: 1536 # -0300 1537 x.time = (int(l[i][:2]) * 3600 + 1538 int(l[i][2:]) * 60) 1539 elif i + 1 < len_l and l[i + 1] == ':': 1540 # -03:00 1541 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 1542 used_idxs.append(i) 1543 i += 2 1544 if i + 1 < len_l and l[i + 1] == ':': 1545 used_idxs.append(i) 1546 i += 2 1547 x.time += int(l[i]) 1548 elif len_li <= 2: 1549 # -[0]3 1550 x.time = (int(l[i][:2]) * 3600) 1551 else: 1552 return None 1553 used_idxs.append(i) 1554 i += 1 1555 1556 assert i == len_l or l[i] == ',' 1557 1558 i += 1 1559 1560 assert i >= len_l 1561 1562 except (IndexError, ValueError, AssertionError): 1563 return None 1564 1565 unused_idxs = set(range(len_l)).difference(used_idxs) 1566 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) 1567 return res 1568 1569 1570DEFAULTTZPARSER = _tzparser() 1571 1572 1573def _parsetz(tzstr): 1574 return DEFAULTTZPARSER.parse(tzstr) 1575 1576class UnknownTimezoneWarning(RuntimeWarning): 1577 """Raised when the parser finds a timezone it cannot parse into a tzinfo""" 1578# vim:ts=4:sw=4:et 1579