1"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""
2
3import re
4import sys
5from datetime import datetime, timedelta
6from datetime import tzinfo as dt_tzinfo
7from functools import lru_cache
8from typing import (
9    Any,
10    ClassVar,
11    Dict,
12    Iterable,
13    List,
14    Match,
15    Optional,
16    Pattern,
17    SupportsFloat,
18    SupportsInt,
19    Tuple,
20    Union,
21    cast,
22    overload,
23)
24
25from dateutil import tz
26
27from arrow import locales
28from arrow.constants import DEFAULT_LOCALE
29from arrow.util import next_weekday, normalize_timestamp
30
31if sys.version_info < (3, 8):  # pragma: no cover
32    from typing_extensions import Literal, TypedDict
33else:
34    from typing import Literal, TypedDict  # pragma: no cover
35
36
37class ParserError(ValueError):
38    pass
39
40
41# Allows for ParserErrors to be propagated from _build_datetime()
42# when day_of_year errors occur.
43# Before this, the ParserErrors were caught by the try/except in
44# _parse_multiformat() and the appropriate error message was not
45# transmitted to the user.
46class ParserMatchError(ParserError):
47    pass
48
49
50_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]
51
52_FORMAT_TYPE = Literal[
53    "YYYY",
54    "YY",
55    "MM",
56    "M",
57    "DDDD",
58    "DDD",
59    "DD",
60    "D",
61    "HH",
62    "H",
63    "hh",
64    "h",
65    "mm",
66    "m",
67    "ss",
68    "s",
69    "X",
70    "x",
71    "ZZZ",
72    "ZZ",
73    "Z",
74    "S",
75    "W",
76    "MMMM",
77    "MMM",
78    "Do",
79    "dddd",
80    "ddd",
81    "d",
82    "a",
83    "A",
84]
85
86
87class _Parts(TypedDict, total=False):
88    year: int
89    month: int
90    day_of_year: int
91    day: int
92    hour: int
93    minute: int
94    second: int
95    microsecond: int
96    timestamp: float
97    expanded_timestamp: int
98    tzinfo: dt_tzinfo
99    am_pm: Literal["am", "pm"]
100    day_of_week: int
101    weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]
102
103
104class DateTimeParser:
105    _FORMAT_RE: ClassVar[Pattern[str]] = re.compile(
106        r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"
107    )
108    _ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")
109
110    _ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")
111    _ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")
112    _ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")
113    _TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")
114    _THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")
115    _FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")
116    _TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")
117    _TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")
118    _TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")
119    # NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will
120    # break cases like "15 Jul 2000" and a format list (see issue #447)
121    _TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")
122    _TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")
123    _TIME_RE: ClassVar[Pattern[str]] = re.compile(
124        r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"
125    )
126    _WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(
127        r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"
128    )
129
130    _BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {
131        "YYYY": _FOUR_DIGIT_RE,
132        "YY": _TWO_DIGIT_RE,
133        "MM": _TWO_DIGIT_RE,
134        "M": _ONE_OR_TWO_DIGIT_RE,
135        "DDDD": _THREE_DIGIT_RE,
136        "DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,
137        "DD": _TWO_DIGIT_RE,
138        "D": _ONE_OR_TWO_DIGIT_RE,
139        "HH": _TWO_DIGIT_RE,
140        "H": _ONE_OR_TWO_DIGIT_RE,
141        "hh": _TWO_DIGIT_RE,
142        "h": _ONE_OR_TWO_DIGIT_RE,
143        "mm": _TWO_DIGIT_RE,
144        "m": _ONE_OR_TWO_DIGIT_RE,
145        "ss": _TWO_DIGIT_RE,
146        "s": _ONE_OR_TWO_DIGIT_RE,
147        "X": _TIMESTAMP_RE,
148        "x": _TIMESTAMP_EXPANDED_RE,
149        "ZZZ": _TZ_NAME_RE,
150        "ZZ": _TZ_ZZ_RE,
151        "Z": _TZ_Z_RE,
152        "S": _ONE_OR_MORE_DIGIT_RE,
153        "W": _WEEK_DATE_RE,
154    }
155
156    SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]
157
158    locale: locales.Locale
159    _input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]
160
161    def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:
162
163        self.locale = locales.get_locale(locale)
164        self._input_re_map = self._BASE_INPUT_RE_MAP.copy()
165        self._input_re_map.update(
166            {
167                "MMMM": self._generate_choice_re(
168                    self.locale.month_names[1:], re.IGNORECASE
169                ),
170                "MMM": self._generate_choice_re(
171                    self.locale.month_abbreviations[1:], re.IGNORECASE
172                ),
173                "Do": re.compile(self.locale.ordinal_day_re),
174                "dddd": self._generate_choice_re(
175                    self.locale.day_names[1:], re.IGNORECASE
176                ),
177                "ddd": self._generate_choice_re(
178                    self.locale.day_abbreviations[1:], re.IGNORECASE
179                ),
180                "d": re.compile(r"[1-7]"),
181                "a": self._generate_choice_re(
182                    (self.locale.meridians["am"], self.locale.meridians["pm"])
183                ),
184                # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to
185                # ensure backwards compatibility of this token
186                "A": self._generate_choice_re(self.locale.meridians.values()),
187            }
188        )
189        if cache_size > 0:
190            self._generate_pattern_re = lru_cache(maxsize=cache_size)(  # type: ignore
191                self._generate_pattern_re
192            )
193
194    # TODO: since we support more than ISO 8601, we should rename this function
195    # IDEA: break into multiple functions
196    def parse_iso(
197        self, datetime_string: str, normalize_whitespace: bool = False
198    ) -> datetime:
199
200        if normalize_whitespace:
201            datetime_string = re.sub(r"\s+", " ", datetime_string.strip())
202
203        has_space_divider = " " in datetime_string
204        has_t_divider = "T" in datetime_string
205
206        num_spaces = datetime_string.count(" ")
207        if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:
208            raise ParserError(
209                f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "
210                "Try passing in a format string to resolve this."
211            )
212
213        has_time = has_space_divider or has_t_divider
214        has_tz = False
215
216        # date formats (ISO 8601 and others) to test against
217        # NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)
218        formats = [
219            "YYYY-MM-DD",
220            "YYYY-M-DD",
221            "YYYY-M-D",
222            "YYYY/MM/DD",
223            "YYYY/M/DD",
224            "YYYY/M/D",
225            "YYYY.MM.DD",
226            "YYYY.M.DD",
227            "YYYY.M.D",
228            "YYYYMMDD",
229            "YYYY-DDDD",
230            "YYYYDDDD",
231            "YYYY-MM",
232            "YYYY/MM",
233            "YYYY.MM",
234            "YYYY",
235            "W",
236        ]
237
238        if has_time:
239
240            if has_space_divider:
241                date_string, time_string = datetime_string.split(" ", 1)
242            else:
243                date_string, time_string = datetime_string.split("T", 1)
244
245            time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE)
246
247            time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])
248
249            if time_components is None:
250                raise ParserError(
251                    "Invalid time component provided. "
252                    "Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."
253                )
254
255            (
256                hours,
257                minutes,
258                seconds,
259                subseconds_sep,
260                subseconds,
261            ) = time_components.groups()
262
263            has_tz = len(time_parts) == 2
264            has_minutes = minutes is not None
265            has_seconds = seconds is not None
266            has_subseconds = subseconds is not None
267
268            is_basic_time_format = ":" not in time_parts[0]
269            tz_format = "Z"
270
271            # use 'ZZ' token instead since tz offset is present in non-basic format
272            if has_tz and ":" in time_parts[1]:
273                tz_format = "ZZ"
274
275            time_sep = "" if is_basic_time_format else ":"
276
277            if has_subseconds:
278                time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(
279                    time_sep=time_sep, subseconds_sep=subseconds_sep
280                )
281            elif has_seconds:
282                time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)
283            elif has_minutes:
284                time_string = f"HH{time_sep}mm"
285            else:
286                time_string = "HH"
287
288            if has_space_divider:
289                formats = [f"{f} {time_string}" for f in formats]
290            else:
291                formats = [f"{f}T{time_string}" for f in formats]
292
293        if has_time and has_tz:
294            # Add "Z" or "ZZ" to the format strings to indicate to
295            # _parse_token() that a timezone needs to be parsed
296            formats = [f"{f}{tz_format}" for f in formats]
297
298        return self._parse_multiformat(datetime_string, formats)
299
300    def parse(
301        self,
302        datetime_string: str,
303        fmt: Union[List[str], str],
304        normalize_whitespace: bool = False,
305    ) -> datetime:
306
307        if normalize_whitespace:
308            datetime_string = re.sub(r"\s+", " ", datetime_string)
309
310        if isinstance(fmt, list):
311            return self._parse_multiformat(datetime_string, fmt)
312
313        try:
314            fmt_tokens: List[_FORMAT_TYPE]
315            fmt_pattern_re: Pattern[str]
316            fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)
317        except re.error as e:
318            raise ParserMatchError(
319                f"Failed to generate regular expression pattern: {e}."
320            )
321
322        match = fmt_pattern_re.search(datetime_string)
323
324        if match is None:
325            raise ParserMatchError(
326                f"Failed to match {fmt!r} when parsing {datetime_string!r}."
327            )
328
329        parts: _Parts = {}
330        for token in fmt_tokens:
331            value: Union[Tuple[str, str, str], str]
332            if token == "Do":
333                value = match.group("value")
334            elif token == "W":
335                value = (match.group("year"), match.group("week"), match.group("day"))
336            else:
337                value = match.group(token)
338
339            if value is None:
340                raise ParserMatchError(
341                    f"Unable to find a match group for the specified token {token!r}."
342                )
343
344            self._parse_token(token, value, parts)  # type: ignore
345
346        return self._build_datetime(parts)
347
348    def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:
349
350        # fmt is a string of tokens like 'YYYY-MM-DD'
351        # we construct a new string by replacing each
352        # token by its pattern:
353        # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'
354        tokens: List[_FORMAT_TYPE] = []
355        offset = 0
356
357        # Escape all special RegEx chars
358        escaped_fmt = re.escape(fmt)
359
360        # Extract the bracketed expressions to be reinserted later.
361        escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)
362
363        # Any number of S is the same as one.
364        # TODO: allow users to specify the number of digits to parse
365        escaped_fmt = re.sub(r"S+", "S", escaped_fmt)
366
367        escaped_data = re.findall(self._ESCAPE_RE, fmt)
368
369        fmt_pattern = escaped_fmt
370
371        for m in self._FORMAT_RE.finditer(escaped_fmt):
372            token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))
373            try:
374                input_re = self._input_re_map[token]
375            except KeyError:
376                raise ParserError(f"Unrecognized token {token!r}.")
377            input_pattern = f"(?P<{token}>{input_re.pattern})"
378            tokens.append(token)
379            # a pattern doesn't have the same length as the token
380            # it replaces! We keep the difference in the offset variable.
381            # This works because the string is scanned left-to-right and matches
382            # are returned in the order found by finditer.
383            fmt_pattern = (
384                fmt_pattern[: m.start() + offset]
385                + input_pattern
386                + fmt_pattern[m.end() + offset :]
387            )
388            offset += len(input_pattern) - (m.end() - m.start())
389
390        final_fmt_pattern = ""
391        split_fmt = fmt_pattern.split(r"\#")
392
393        # Due to the way Python splits, 'split_fmt' will always be longer
394        for i in range(len(split_fmt)):
395            final_fmt_pattern += split_fmt[i]
396            if i < len(escaped_data):
397                final_fmt_pattern += escaped_data[i][1:-1]
398
399        # Wrap final_fmt_pattern in a custom word boundary to strictly
400        # match the formatting pattern and filter out date and time formats
401        # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,
402        # blah1998-09-12blah. The custom word boundary matches every character
403        # that is not a whitespace character to allow for searching for a date
404        # and time string in a natural language sentence. Therefore, searching
405        # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will
406        # work properly.
407        # Certain punctuation before or after the target pattern such as
408        # "1998-09-12," is permitted. For the full list of valid punctuation,
409        # see the documentation.
410
411        starting_word_boundary = (
412            r"(?<!\S\S)"  # Don't have two consecutive non-whitespace characters. This ensures that we allow cases
413            # like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)
414            r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])"  # This is the list of punctuation that is ok before the
415            # pattern (i.e. "It can't not be these characters before the pattern")
416            r"(\b|^)"
417            # The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a
418            # negative number through i.e. before epoch numbers
419        )
420        ending_word_boundary = (
421            r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?"  # Positive lookahead stating that these punctuation marks
422            # can appear after the pattern at most 1 time
423            r"(?!\S))"  # Don't allow any non-whitespace character after the punctuation
424        )
425        bounded_fmt_pattern = r"{}{}{}".format(
426            starting_word_boundary, final_fmt_pattern, ending_word_boundary
427        )
428
429        return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)
430
431    @overload
432    def _parse_token(
433        self,
434        token: Literal[
435            "YYYY",
436            "YY",
437            "MM",
438            "M",
439            "DDDD",
440            "DDD",
441            "DD",
442            "D",
443            "Do",
444            "HH",
445            "hh",
446            "h",
447            "H",
448            "mm",
449            "m",
450            "ss",
451            "s",
452            "x",
453        ],
454        value: Union[str, bytes, SupportsInt, bytearray],
455        parts: _Parts,
456    ) -> None:
457        ...  # pragma: no cover
458
459    @overload
460    def _parse_token(
461        self,
462        token: Literal["X"],
463        value: Union[str, bytes, SupportsFloat, bytearray],
464        parts: _Parts,
465    ) -> None:
466        ...  # pragma: no cover
467
468    @overload
469    def _parse_token(
470        self,
471        token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],
472        value: Union[str, bytes, bytearray],
473        parts: _Parts,
474    ) -> None:
475        ...  # pragma: no cover
476
477    @overload
478    def _parse_token(
479        self,
480        token: Literal["a", "A", "ZZZ", "ZZ", "Z"],
481        value: Union[str, bytes],
482        parts: _Parts,
483    ) -> None:
484        ...  # pragma: no cover
485
486    @overload
487    def _parse_token(
488        self,
489        token: Literal["W"],
490        value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],
491        parts: _Parts,
492    ) -> None:
493        ...  # pragma: no cover
494
495    def _parse_token(
496        self,
497        token: Any,
498        value: Any,
499        parts: _Parts,
500    ) -> None:
501
502        if token == "YYYY":
503            parts["year"] = int(value)
504
505        elif token == "YY":
506            value = int(value)
507            parts["year"] = 1900 + value if value > 68 else 2000 + value
508
509        elif token in ["MMMM", "MMM"]:
510            # FIXME: month_number() is nullable
511            parts["month"] = self.locale.month_number(value.lower())  # type: ignore
512
513        elif token in ["MM", "M"]:
514            parts["month"] = int(value)
515
516        elif token in ["DDDD", "DDD"]:
517            parts["day_of_year"] = int(value)
518
519        elif token in ["DD", "D"]:
520            parts["day"] = int(value)
521
522        elif token == "Do":
523            parts["day"] = int(value)
524
525        elif token == "dddd":
526            # locale day names are 1-indexed
527            day_of_week = [x.lower() for x in self.locale.day_names].index(
528                value.lower()
529            )
530            parts["day_of_week"] = day_of_week - 1
531
532        elif token == "ddd":
533            # locale day abbreviations are 1-indexed
534            day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(
535                value.lower()
536            )
537            parts["day_of_week"] = day_of_week - 1
538
539        elif token.upper() in ["HH", "H"]:
540            parts["hour"] = int(value)
541
542        elif token in ["mm", "m"]:
543            parts["minute"] = int(value)
544
545        elif token in ["ss", "s"]:
546            parts["second"] = int(value)
547
548        elif token == "S":
549            # We have the *most significant* digits of an arbitrary-precision integer.
550            # We want the six most significant digits as an integer, rounded.
551            # IDEA: add nanosecond support somehow? Need datetime support for it first.
552            value = value.ljust(7, "0")
553
554            # floating-point (IEEE-754) defaults to half-to-even rounding
555            seventh_digit = int(value[6])
556            if seventh_digit == 5:
557                rounding = int(value[5]) % 2
558            elif seventh_digit > 5:
559                rounding = 1
560            else:
561                rounding = 0
562
563            parts["microsecond"] = int(value[:6]) + rounding
564
565        elif token == "X":
566            parts["timestamp"] = float(value)
567
568        elif token == "x":
569            parts["expanded_timestamp"] = int(value)
570
571        elif token in ["ZZZ", "ZZ", "Z"]:
572            parts["tzinfo"] = TzinfoParser.parse(value)
573
574        elif token in ["a", "A"]:
575            if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):
576                parts["am_pm"] = "am"
577                if "hour" in parts and not 0 <= parts["hour"] <= 12:
578                    raise ParserMatchError(
579                        f"Hour token value must be between 0 and 12 inclusive for token {token!r}."
580                    )
581            elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):
582                parts["am_pm"] = "pm"
583        elif token == "W":
584            parts["weekdate"] = value
585
586    @staticmethod
587    def _build_datetime(parts: _Parts) -> datetime:
588        weekdate = parts.get("weekdate")
589
590        if weekdate is not None:
591
592            year, week = int(weekdate[0]), int(weekdate[1])
593
594            if weekdate[2] is not None:
595                _day = int(weekdate[2])
596            else:
597                # day not given, default to 1
598                _day = 1
599
600            date_string = f"{year}-{week}-{_day}"
601
602            #  tokens for ISO 8601 weekdates
603            dt = datetime.strptime(date_string, "%G-%V-%u")
604
605            parts["year"] = dt.year
606            parts["month"] = dt.month
607            parts["day"] = dt.day
608
609        timestamp = parts.get("timestamp")
610
611        if timestamp is not None:
612            return datetime.fromtimestamp(timestamp, tz=tz.tzutc())
613
614        expanded_timestamp = parts.get("expanded_timestamp")
615
616        if expanded_timestamp is not None:
617            return datetime.fromtimestamp(
618                normalize_timestamp(expanded_timestamp),
619                tz=tz.tzutc(),
620            )
621
622        day_of_year = parts.get("day_of_year")
623
624        if day_of_year is not None:
625            _year = parts.get("year")
626            month = parts.get("month")
627            if _year is None:
628                raise ParserError(
629                    "Year component is required with the DDD and DDDD tokens."
630                )
631
632            if month is not None:
633                raise ParserError(
634                    "Month component is not allowed with the DDD and DDDD tokens."
635                )
636
637            date_string = f"{_year}-{day_of_year}"
638            try:
639                dt = datetime.strptime(date_string, "%Y-%j")
640            except ValueError:
641                raise ParserError(
642                    f"The provided day of year {day_of_year!r} is invalid."
643                )
644
645            parts["year"] = dt.year
646            parts["month"] = dt.month
647            parts["day"] = dt.day
648
649        day_of_week: Optional[int] = parts.get("day_of_week")
650        day = parts.get("day")
651
652        # If day is passed, ignore day of week
653        if day_of_week is not None and day is None:
654            year = parts.get("year", 1970)
655            month = parts.get("month", 1)
656            day = 1
657
658            # dddd => first day of week after epoch
659            # dddd YYYY => first day of week in specified year
660            # dddd MM YYYY => first day of week in specified year and month
661            # dddd MM => first day after epoch in specified month
662            next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)
663            parts["year"] = next_weekday_dt.year
664            parts["month"] = next_weekday_dt.month
665            parts["day"] = next_weekday_dt.day
666
667        am_pm = parts.get("am_pm")
668        hour = parts.get("hour", 0)
669
670        if am_pm == "pm" and hour < 12:
671            hour += 12
672        elif am_pm == "am" and hour == 12:
673            hour = 0
674
675        # Support for midnight at the end of day
676        if hour == 24:
677            if parts.get("minute", 0) != 0:
678                raise ParserError("Midnight at the end of day must not contain minutes")
679            if parts.get("second", 0) != 0:
680                raise ParserError("Midnight at the end of day must not contain seconds")
681            if parts.get("microsecond", 0) != 0:
682                raise ParserError(
683                    "Midnight at the end of day must not contain microseconds"
684                )
685            hour = 0
686            day_increment = 1
687        else:
688            day_increment = 0
689
690        # account for rounding up to 1000000
691        microsecond = parts.get("microsecond", 0)
692        if microsecond == 1000000:
693            microsecond = 0
694            second_increment = 1
695        else:
696            second_increment = 0
697
698        increment = timedelta(days=day_increment, seconds=second_increment)
699
700        return (
701            datetime(
702                year=parts.get("year", 1),
703                month=parts.get("month", 1),
704                day=parts.get("day", 1),
705                hour=hour,
706                minute=parts.get("minute", 0),
707                second=parts.get("second", 0),
708                microsecond=microsecond,
709                tzinfo=parts.get("tzinfo"),
710            )
711            + increment
712        )
713
714    def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:
715
716        _datetime: Optional[datetime] = None
717
718        for fmt in formats:
719            try:
720                _datetime = self.parse(string, fmt)
721                break
722            except ParserMatchError:
723                pass
724
725        if _datetime is None:
726            supported_formats = ", ".join(formats)
727            raise ParserError(
728                f"Could not match input {string!r} to any of the following formats: {supported_formats}."
729            )
730
731        return _datetime
732
733    # generates a capture group of choices separated by an OR operator
734    @staticmethod
735    def _generate_choice_re(
736        choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0
737    ) -> Pattern[str]:
738        return re.compile(r"({})".format("|".join(choices)), flags=flags)
739
740
741class TzinfoParser:
742    _TZINFO_RE: ClassVar[Pattern[str]] = re.compile(
743        r"^([\+\-])?(\d{2})(?:\:?(\d{2}))?$"
744    )
745
746    @classmethod
747    def parse(cls, tzinfo_string: str) -> dt_tzinfo:
748
749        tzinfo: Optional[dt_tzinfo] = None
750
751        if tzinfo_string == "local":
752            tzinfo = tz.tzlocal()
753
754        elif tzinfo_string in ["utc", "UTC", "Z"]:
755            tzinfo = tz.tzutc()
756
757        else:
758
759            iso_match = cls._TZINFO_RE.match(tzinfo_string)
760
761            if iso_match:
762                sign: Optional[str]
763                hours: str
764                minutes: Union[str, int, None]
765                sign, hours, minutes = iso_match.groups()
766                seconds = int(hours) * 3600 + int(minutes or 0) * 60
767
768                if sign == "-":
769                    seconds *= -1
770
771                tzinfo = tz.tzoffset(None, seconds)
772
773            else:
774                tzinfo = tz.gettz(tzinfo_string)
775
776        if tzinfo is None:
777            raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")
778
779        return tzinfo
780