1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings.""" 2 3import re 4import sys 5from datetime import datetime, timedelta 6from datetime import tzinfo as dt_tzinfo 7from functools import lru_cache 8from typing import ( 9 Any, 10 ClassVar, 11 Dict, 12 Iterable, 13 List, 14 Match, 15 Optional, 16 Pattern, 17 SupportsFloat, 18 SupportsInt, 19 Tuple, 20 Union, 21 cast, 22 overload, 23) 24 25from dateutil import tz 26 27from arrow import locales 28from arrow.constants import DEFAULT_LOCALE 29from arrow.util import next_weekday, normalize_timestamp 30 31if sys.version_info < (3, 8): # pragma: no cover 32 from typing_extensions import Literal, TypedDict 33else: 34 from typing import Literal, TypedDict # pragma: no cover 35 36 37class ParserError(ValueError): 38 pass 39 40 41# Allows for ParserErrors to be propagated from _build_datetime() 42# when day_of_year errors occur. 43# Before this, the ParserErrors were caught by the try/except in 44# _parse_multiformat() and the appropriate error message was not 45# transmitted to the user. 46class ParserMatchError(ParserError): 47 pass 48 49 50_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray] 51 52_FORMAT_TYPE = Literal[ 53 "YYYY", 54 "YY", 55 "MM", 56 "M", 57 "DDDD", 58 "DDD", 59 "DD", 60 "D", 61 "HH", 62 "H", 63 "hh", 64 "h", 65 "mm", 66 "m", 67 "ss", 68 "s", 69 "X", 70 "x", 71 "ZZZ", 72 "ZZ", 73 "Z", 74 "S", 75 "W", 76 "MMMM", 77 "MMM", 78 "Do", 79 "dddd", 80 "ddd", 81 "d", 82 "a", 83 "A", 84] 85 86 87class _Parts(TypedDict, total=False): 88 year: int 89 month: int 90 day_of_year: int 91 day: int 92 hour: int 93 minute: int 94 second: int 95 microsecond: int 96 timestamp: float 97 expanded_timestamp: int 98 tzinfo: dt_tzinfo 99 am_pm: Literal["am", "pm"] 100 day_of_week: int 101 weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]] 102 103 104class DateTimeParser: 105 _FORMAT_RE: ClassVar[Pattern[str]] = re.compile( 106 r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)" 107 ) 108 _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]") 109 110 _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}") 111 _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}") 112 _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+") 113 _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}") 114 _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}") 115 _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}") 116 _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z") 117 _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z") 118 _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+") 119 # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will 120 # break cases like "15 Jul 2000" and a format list (see issue #447) 121 _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$") 122 _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$") 123 _TIME_RE: ClassVar[Pattern[str]] = re.compile( 124 r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$" 125 ) 126 _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile( 127 r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?" 128 ) 129 130 _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = { 131 "YYYY": _FOUR_DIGIT_RE, 132 "YY": _TWO_DIGIT_RE, 133 "MM": _TWO_DIGIT_RE, 134 "M": _ONE_OR_TWO_DIGIT_RE, 135 "DDDD": _THREE_DIGIT_RE, 136 "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE, 137 "DD": _TWO_DIGIT_RE, 138 "D": _ONE_OR_TWO_DIGIT_RE, 139 "HH": _TWO_DIGIT_RE, 140 "H": _ONE_OR_TWO_DIGIT_RE, 141 "hh": _TWO_DIGIT_RE, 142 "h": _ONE_OR_TWO_DIGIT_RE, 143 "mm": _TWO_DIGIT_RE, 144 "m": _ONE_OR_TWO_DIGIT_RE, 145 "ss": _TWO_DIGIT_RE, 146 "s": _ONE_OR_TWO_DIGIT_RE, 147 "X": _TIMESTAMP_RE, 148 "x": _TIMESTAMP_EXPANDED_RE, 149 "ZZZ": _TZ_NAME_RE, 150 "ZZ": _TZ_ZZ_RE, 151 "Z": _TZ_Z_RE, 152 "S": _ONE_OR_MORE_DIGIT_RE, 153 "W": _WEEK_DATE_RE, 154 } 155 156 SEPARATORS: ClassVar[List[str]] = ["-", "/", "."] 157 158 locale: locales.Locale 159 _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]] 160 161 def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None: 162 163 self.locale = locales.get_locale(locale) 164 self._input_re_map = self._BASE_INPUT_RE_MAP.copy() 165 self._input_re_map.update( 166 { 167 "MMMM": self._generate_choice_re( 168 self.locale.month_names[1:], re.IGNORECASE 169 ), 170 "MMM": self._generate_choice_re( 171 self.locale.month_abbreviations[1:], re.IGNORECASE 172 ), 173 "Do": re.compile(self.locale.ordinal_day_re), 174 "dddd": self._generate_choice_re( 175 self.locale.day_names[1:], re.IGNORECASE 176 ), 177 "ddd": self._generate_choice_re( 178 self.locale.day_abbreviations[1:], re.IGNORECASE 179 ), 180 "d": re.compile(r"[1-7]"), 181 "a": self._generate_choice_re( 182 (self.locale.meridians["am"], self.locale.meridians["pm"]) 183 ), 184 # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to 185 # ensure backwards compatibility of this token 186 "A": self._generate_choice_re(self.locale.meridians.values()), 187 } 188 ) 189 if cache_size > 0: 190 self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore 191 self._generate_pattern_re 192 ) 193 194 # TODO: since we support more than ISO 8601, we should rename this function 195 # IDEA: break into multiple functions 196 def parse_iso( 197 self, datetime_string: str, normalize_whitespace: bool = False 198 ) -> datetime: 199 200 if normalize_whitespace: 201 datetime_string = re.sub(r"\s+", " ", datetime_string.strip()) 202 203 has_space_divider = " " in datetime_string 204 has_t_divider = "T" in datetime_string 205 206 num_spaces = datetime_string.count(" ") 207 if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0: 208 raise ParserError( 209 f"Expected an ISO 8601-like string, but was given {datetime_string!r}. " 210 "Try passing in a format string to resolve this." 211 ) 212 213 has_time = has_space_divider or has_t_divider 214 has_tz = False 215 216 # date formats (ISO 8601 and others) to test against 217 # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used) 218 formats = [ 219 "YYYY-MM-DD", 220 "YYYY-M-DD", 221 "YYYY-M-D", 222 "YYYY/MM/DD", 223 "YYYY/M/DD", 224 "YYYY/M/D", 225 "YYYY.MM.DD", 226 "YYYY.M.DD", 227 "YYYY.M.D", 228 "YYYYMMDD", 229 "YYYY-DDDD", 230 "YYYYDDDD", 231 "YYYY-MM", 232 "YYYY/MM", 233 "YYYY.MM", 234 "YYYY", 235 "W", 236 ] 237 238 if has_time: 239 240 if has_space_divider: 241 date_string, time_string = datetime_string.split(" ", 1) 242 else: 243 date_string, time_string = datetime_string.split("T", 1) 244 245 time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE) 246 247 time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0]) 248 249 if time_components is None: 250 raise ParserError( 251 "Invalid time component provided. " 252 "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format." 253 ) 254 255 ( 256 hours, 257 minutes, 258 seconds, 259 subseconds_sep, 260 subseconds, 261 ) = time_components.groups() 262 263 has_tz = len(time_parts) == 2 264 has_minutes = minutes is not None 265 has_seconds = seconds is not None 266 has_subseconds = subseconds is not None 267 268 is_basic_time_format = ":" not in time_parts[0] 269 tz_format = "Z" 270 271 # use 'ZZ' token instead since tz offset is present in non-basic format 272 if has_tz and ":" in time_parts[1]: 273 tz_format = "ZZ" 274 275 time_sep = "" if is_basic_time_format else ":" 276 277 if has_subseconds: 278 time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format( 279 time_sep=time_sep, subseconds_sep=subseconds_sep 280 ) 281 elif has_seconds: 282 time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep) 283 elif has_minutes: 284 time_string = f"HH{time_sep}mm" 285 else: 286 time_string = "HH" 287 288 if has_space_divider: 289 formats = [f"{f} {time_string}" for f in formats] 290 else: 291 formats = [f"{f}T{time_string}" for f in formats] 292 293 if has_time and has_tz: 294 # Add "Z" or "ZZ" to the format strings to indicate to 295 # _parse_token() that a timezone needs to be parsed 296 formats = [f"{f}{tz_format}" for f in formats] 297 298 return self._parse_multiformat(datetime_string, formats) 299 300 def parse( 301 self, 302 datetime_string: str, 303 fmt: Union[List[str], str], 304 normalize_whitespace: bool = False, 305 ) -> datetime: 306 307 if normalize_whitespace: 308 datetime_string = re.sub(r"\s+", " ", datetime_string) 309 310 if isinstance(fmt, list): 311 return self._parse_multiformat(datetime_string, fmt) 312 313 try: 314 fmt_tokens: List[_FORMAT_TYPE] 315 fmt_pattern_re: Pattern[str] 316 fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt) 317 except re.error as e: 318 raise ParserMatchError( 319 f"Failed to generate regular expression pattern: {e}." 320 ) 321 322 match = fmt_pattern_re.search(datetime_string) 323 324 if match is None: 325 raise ParserMatchError( 326 f"Failed to match {fmt!r} when parsing {datetime_string!r}." 327 ) 328 329 parts: _Parts = {} 330 for token in fmt_tokens: 331 value: Union[Tuple[str, str, str], str] 332 if token == "Do": 333 value = match.group("value") 334 elif token == "W": 335 value = (match.group("year"), match.group("week"), match.group("day")) 336 else: 337 value = match.group(token) 338 339 if value is None: 340 raise ParserMatchError( 341 f"Unable to find a match group for the specified token {token!r}." 342 ) 343 344 self._parse_token(token, value, parts) # type: ignore 345 346 return self._build_datetime(parts) 347 348 def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]: 349 350 # fmt is a string of tokens like 'YYYY-MM-DD' 351 # we construct a new string by replacing each 352 # token by its pattern: 353 # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})' 354 tokens: List[_FORMAT_TYPE] = [] 355 offset = 0 356 357 # Escape all special RegEx chars 358 escaped_fmt = re.escape(fmt) 359 360 # Extract the bracketed expressions to be reinserted later. 361 escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt) 362 363 # Any number of S is the same as one. 364 # TODO: allow users to specify the number of digits to parse 365 escaped_fmt = re.sub(r"S+", "S", escaped_fmt) 366 367 escaped_data = re.findall(self._ESCAPE_RE, fmt) 368 369 fmt_pattern = escaped_fmt 370 371 for m in self._FORMAT_RE.finditer(escaped_fmt): 372 token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0)) 373 try: 374 input_re = self._input_re_map[token] 375 except KeyError: 376 raise ParserError(f"Unrecognized token {token!r}.") 377 input_pattern = f"(?P<{token}>{input_re.pattern})" 378 tokens.append(token) 379 # a pattern doesn't have the same length as the token 380 # it replaces! We keep the difference in the offset variable. 381 # This works because the string is scanned left-to-right and matches 382 # are returned in the order found by finditer. 383 fmt_pattern = ( 384 fmt_pattern[: m.start() + offset] 385 + input_pattern 386 + fmt_pattern[m.end() + offset :] 387 ) 388 offset += len(input_pattern) - (m.end() - m.start()) 389 390 final_fmt_pattern = "" 391 split_fmt = fmt_pattern.split(r"\#") 392 393 # Due to the way Python splits, 'split_fmt' will always be longer 394 for i in range(len(split_fmt)): 395 final_fmt_pattern += split_fmt[i] 396 if i < len(escaped_data): 397 final_fmt_pattern += escaped_data[i][1:-1] 398 399 # Wrap final_fmt_pattern in a custom word boundary to strictly 400 # match the formatting pattern and filter out date and time formats 401 # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah, 402 # blah1998-09-12blah. The custom word boundary matches every character 403 # that is not a whitespace character to allow for searching for a date 404 # and time string in a natural language sentence. Therefore, searching 405 # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will 406 # work properly. 407 # Certain punctuation before or after the target pattern such as 408 # "1998-09-12," is permitted. For the full list of valid punctuation, 409 # see the documentation. 410 411 starting_word_boundary = ( 412 r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases 413 # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY) 414 r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the 415 # pattern (i.e. "It can't not be these characters before the pattern") 416 r"(\b|^)" 417 # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a 418 # negative number through i.e. before epoch numbers 419 ) 420 ending_word_boundary = ( 421 r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks 422 # can appear after the pattern at most 1 time 423 r"(?!\S))" # Don't allow any non-whitespace character after the punctuation 424 ) 425 bounded_fmt_pattern = r"{}{}{}".format( 426 starting_word_boundary, final_fmt_pattern, ending_word_boundary 427 ) 428 429 return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE) 430 431 @overload 432 def _parse_token( 433 self, 434 token: Literal[ 435 "YYYY", 436 "YY", 437 "MM", 438 "M", 439 "DDDD", 440 "DDD", 441 "DD", 442 "D", 443 "Do", 444 "HH", 445 "hh", 446 "h", 447 "H", 448 "mm", 449 "m", 450 "ss", 451 "s", 452 "x", 453 ], 454 value: Union[str, bytes, SupportsInt, bytearray], 455 parts: _Parts, 456 ) -> None: 457 ... # pragma: no cover 458 459 @overload 460 def _parse_token( 461 self, 462 token: Literal["X"], 463 value: Union[str, bytes, SupportsFloat, bytearray], 464 parts: _Parts, 465 ) -> None: 466 ... # pragma: no cover 467 468 @overload 469 def _parse_token( 470 self, 471 token: Literal["MMMM", "MMM", "dddd", "ddd", "S"], 472 value: Union[str, bytes, bytearray], 473 parts: _Parts, 474 ) -> None: 475 ... # pragma: no cover 476 477 @overload 478 def _parse_token( 479 self, 480 token: Literal["a", "A", "ZZZ", "ZZ", "Z"], 481 value: Union[str, bytes], 482 parts: _Parts, 483 ) -> None: 484 ... # pragma: no cover 485 486 @overload 487 def _parse_token( 488 self, 489 token: Literal["W"], 490 value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]], 491 parts: _Parts, 492 ) -> None: 493 ... # pragma: no cover 494 495 def _parse_token( 496 self, 497 token: Any, 498 value: Any, 499 parts: _Parts, 500 ) -> None: 501 502 if token == "YYYY": 503 parts["year"] = int(value) 504 505 elif token == "YY": 506 value = int(value) 507 parts["year"] = 1900 + value if value > 68 else 2000 + value 508 509 elif token in ["MMMM", "MMM"]: 510 # FIXME: month_number() is nullable 511 parts["month"] = self.locale.month_number(value.lower()) # type: ignore 512 513 elif token in ["MM", "M"]: 514 parts["month"] = int(value) 515 516 elif token in ["DDDD", "DDD"]: 517 parts["day_of_year"] = int(value) 518 519 elif token in ["DD", "D"]: 520 parts["day"] = int(value) 521 522 elif token == "Do": 523 parts["day"] = int(value) 524 525 elif token == "dddd": 526 # locale day names are 1-indexed 527 day_of_week = [x.lower() for x in self.locale.day_names].index( 528 value.lower() 529 ) 530 parts["day_of_week"] = day_of_week - 1 531 532 elif token == "ddd": 533 # locale day abbreviations are 1-indexed 534 day_of_week = [x.lower() for x in self.locale.day_abbreviations].index( 535 value.lower() 536 ) 537 parts["day_of_week"] = day_of_week - 1 538 539 elif token.upper() in ["HH", "H"]: 540 parts["hour"] = int(value) 541 542 elif token in ["mm", "m"]: 543 parts["minute"] = int(value) 544 545 elif token in ["ss", "s"]: 546 parts["second"] = int(value) 547 548 elif token == "S": 549 # We have the *most significant* digits of an arbitrary-precision integer. 550 # We want the six most significant digits as an integer, rounded. 551 # IDEA: add nanosecond support somehow? Need datetime support for it first. 552 value = value.ljust(7, "0") 553 554 # floating-point (IEEE-754) defaults to half-to-even rounding 555 seventh_digit = int(value[6]) 556 if seventh_digit == 5: 557 rounding = int(value[5]) % 2 558 elif seventh_digit > 5: 559 rounding = 1 560 else: 561 rounding = 0 562 563 parts["microsecond"] = int(value[:6]) + rounding 564 565 elif token == "X": 566 parts["timestamp"] = float(value) 567 568 elif token == "x": 569 parts["expanded_timestamp"] = int(value) 570 571 elif token in ["ZZZ", "ZZ", "Z"]: 572 parts["tzinfo"] = TzinfoParser.parse(value) 573 574 elif token in ["a", "A"]: 575 if value in (self.locale.meridians["am"], self.locale.meridians["AM"]): 576 parts["am_pm"] = "am" 577 if "hour" in parts and not 0 <= parts["hour"] <= 12: 578 raise ParserMatchError( 579 f"Hour token value must be between 0 and 12 inclusive for token {token!r}." 580 ) 581 elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]): 582 parts["am_pm"] = "pm" 583 elif token == "W": 584 parts["weekdate"] = value 585 586 @staticmethod 587 def _build_datetime(parts: _Parts) -> datetime: 588 weekdate = parts.get("weekdate") 589 590 if weekdate is not None: 591 592 year, week = int(weekdate[0]), int(weekdate[1]) 593 594 if weekdate[2] is not None: 595 _day = int(weekdate[2]) 596 else: 597 # day not given, default to 1 598 _day = 1 599 600 date_string = f"{year}-{week}-{_day}" 601 602 # tokens for ISO 8601 weekdates 603 dt = datetime.strptime(date_string, "%G-%V-%u") 604 605 parts["year"] = dt.year 606 parts["month"] = dt.month 607 parts["day"] = dt.day 608 609 timestamp = parts.get("timestamp") 610 611 if timestamp is not None: 612 return datetime.fromtimestamp(timestamp, tz=tz.tzutc()) 613 614 expanded_timestamp = parts.get("expanded_timestamp") 615 616 if expanded_timestamp is not None: 617 return datetime.fromtimestamp( 618 normalize_timestamp(expanded_timestamp), 619 tz=tz.tzutc(), 620 ) 621 622 day_of_year = parts.get("day_of_year") 623 624 if day_of_year is not None: 625 _year = parts.get("year") 626 month = parts.get("month") 627 if _year is None: 628 raise ParserError( 629 "Year component is required with the DDD and DDDD tokens." 630 ) 631 632 if month is not None: 633 raise ParserError( 634 "Month component is not allowed with the DDD and DDDD tokens." 635 ) 636 637 date_string = f"{_year}-{day_of_year}" 638 try: 639 dt = datetime.strptime(date_string, "%Y-%j") 640 except ValueError: 641 raise ParserError( 642 f"The provided day of year {day_of_year!r} is invalid." 643 ) 644 645 parts["year"] = dt.year 646 parts["month"] = dt.month 647 parts["day"] = dt.day 648 649 day_of_week: Optional[int] = parts.get("day_of_week") 650 day = parts.get("day") 651 652 # If day is passed, ignore day of week 653 if day_of_week is not None and day is None: 654 year = parts.get("year", 1970) 655 month = parts.get("month", 1) 656 day = 1 657 658 # dddd => first day of week after epoch 659 # dddd YYYY => first day of week in specified year 660 # dddd MM YYYY => first day of week in specified year and month 661 # dddd MM => first day after epoch in specified month 662 next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week) 663 parts["year"] = next_weekday_dt.year 664 parts["month"] = next_weekday_dt.month 665 parts["day"] = next_weekday_dt.day 666 667 am_pm = parts.get("am_pm") 668 hour = parts.get("hour", 0) 669 670 if am_pm == "pm" and hour < 12: 671 hour += 12 672 elif am_pm == "am" and hour == 12: 673 hour = 0 674 675 # Support for midnight at the end of day 676 if hour == 24: 677 if parts.get("minute", 0) != 0: 678 raise ParserError("Midnight at the end of day must not contain minutes") 679 if parts.get("second", 0) != 0: 680 raise ParserError("Midnight at the end of day must not contain seconds") 681 if parts.get("microsecond", 0) != 0: 682 raise ParserError( 683 "Midnight at the end of day must not contain microseconds" 684 ) 685 hour = 0 686 day_increment = 1 687 else: 688 day_increment = 0 689 690 # account for rounding up to 1000000 691 microsecond = parts.get("microsecond", 0) 692 if microsecond == 1000000: 693 microsecond = 0 694 second_increment = 1 695 else: 696 second_increment = 0 697 698 increment = timedelta(days=day_increment, seconds=second_increment) 699 700 return ( 701 datetime( 702 year=parts.get("year", 1), 703 month=parts.get("month", 1), 704 day=parts.get("day", 1), 705 hour=hour, 706 minute=parts.get("minute", 0), 707 second=parts.get("second", 0), 708 microsecond=microsecond, 709 tzinfo=parts.get("tzinfo"), 710 ) 711 + increment 712 ) 713 714 def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime: 715 716 _datetime: Optional[datetime] = None 717 718 for fmt in formats: 719 try: 720 _datetime = self.parse(string, fmt) 721 break 722 except ParserMatchError: 723 pass 724 725 if _datetime is None: 726 supported_formats = ", ".join(formats) 727 raise ParserError( 728 f"Could not match input {string!r} to any of the following formats: {supported_formats}." 729 ) 730 731 return _datetime 732 733 # generates a capture group of choices separated by an OR operator 734 @staticmethod 735 def _generate_choice_re( 736 choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0 737 ) -> Pattern[str]: 738 return re.compile(r"({})".format("|".join(choices)), flags=flags) 739 740 741class TzinfoParser: 742 _TZINFO_RE: ClassVar[Pattern[str]] = re.compile( 743 r"^([\+\-])?(\d{2})(?:\:?(\d{2}))?$" 744 ) 745 746 @classmethod 747 def parse(cls, tzinfo_string: str) -> dt_tzinfo: 748 749 tzinfo: Optional[dt_tzinfo] = None 750 751 if tzinfo_string == "local": 752 tzinfo = tz.tzlocal() 753 754 elif tzinfo_string in ["utc", "UTC", "Z"]: 755 tzinfo = tz.tzutc() 756 757 else: 758 759 iso_match = cls._TZINFO_RE.match(tzinfo_string) 760 761 if iso_match: 762 sign: Optional[str] 763 hours: str 764 minutes: Union[str, int, None] 765 sign, hours, minutes = iso_match.groups() 766 seconds = int(hours) * 3600 + int(minutes or 0) * 60 767 768 if sign == "-": 769 seconds *= -1 770 771 tzinfo = tz.tzoffset(None, seconds) 772 773 else: 774 tzinfo = tz.gettz(tzinfo_string) 775 776 if tzinfo is None: 777 raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.") 778 779 return tzinfo 780