1# -*- coding: utf-8 -*-
2"""
3    babel.dates
4    ~~~~~~~~~~~
5
6    Locale dependent formatting and parsing of dates and times.
7
8    The default locale for the functions in this module is determined by the
9    following environment variables, in that order:
10
11     * ``LC_TIME``,
12     * ``LC_ALL``, and
13     * ``LANG``
14
15    :copyright: (c) 2013-2021 by the Babel Team.
16    :license: BSD, see LICENSE for more details.
17"""
18
19from __future__ import division
20
21import re
22import warnings
23import pytz as _pytz
24
25from datetime import date, datetime, time, timedelta
26from bisect import bisect_right
27
28from babel.core import default_locale, get_global, Locale
29from babel.util import UTC, LOCALTZ
30from babel._compat import string_types, integer_types, number_types, PY2
31
32# "If a given short metazone form is known NOT to be understood in a given
33#  locale and the parent locale has this value such that it would normally
34#  be inherited, the inheritance of this value can be explicitly disabled by
35#  use of the 'no inheritance marker' as the value, which is 3 simultaneous [sic]
36#  empty set characters ( U+2205 )."
37#  - https://www.unicode.org/reports/tr35/tr35-dates.html#Metazone_Names
38
39NO_INHERITANCE_MARKER = u'\u2205\u2205\u2205'
40
41
42LC_TIME = default_locale('LC_TIME')
43
44# Aliases for use in scopes where the modules are shadowed by local variables
45date_ = date
46datetime_ = datetime
47time_ = time
48
49
50def _get_dt_and_tzinfo(dt_or_tzinfo):
51    """
52    Parse a `dt_or_tzinfo` value into a datetime and a tzinfo.
53
54    See the docs for this function's callers for semantics.
55
56    :rtype: tuple[datetime, tzinfo]
57    """
58    if dt_or_tzinfo is None:
59        dt = datetime.now()
60        tzinfo = LOCALTZ
61    elif isinstance(dt_or_tzinfo, string_types):
62        dt = None
63        tzinfo = get_timezone(dt_or_tzinfo)
64    elif isinstance(dt_or_tzinfo, integer_types):
65        dt = None
66        tzinfo = UTC
67    elif isinstance(dt_or_tzinfo, (datetime, time)):
68        dt = _get_datetime(dt_or_tzinfo)
69        if dt.tzinfo is not None:
70            tzinfo = dt.tzinfo
71        else:
72            tzinfo = UTC
73    else:
74        dt = None
75        tzinfo = dt_or_tzinfo
76    return dt, tzinfo
77
78
79def _get_tz_name(dt_or_tzinfo):
80    """
81    Get the timezone name out of a time, datetime, or tzinfo object.
82
83    :rtype: str
84    """
85    dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo)
86    if hasattr(tzinfo, 'zone'):  # pytz object
87        return tzinfo.zone
88    elif hasattr(tzinfo, 'key') and tzinfo.key is not None:  # ZoneInfo object
89        return tzinfo.key
90    else:
91        return tzinfo.tzname(dt or datetime.utcnow())
92
93
94def _get_datetime(instant):
95    """
96    Get a datetime out of an "instant" (date, time, datetime, number).
97
98    .. warning:: The return values of this function may depend on the system clock.
99
100    If the instant is None, the current moment is used.
101    If the instant is a time, it's augmented with today's date.
102
103    Dates are converted to naive datetimes with midnight as the time component.
104
105    >>> _get_datetime(date(2015, 1, 1))
106    datetime.datetime(2015, 1, 1, 0, 0)
107
108    UNIX timestamps are converted to datetimes.
109
110    >>> _get_datetime(1400000000)
111    datetime.datetime(2014, 5, 13, 16, 53, 20)
112
113    Other values are passed through as-is.
114
115    >>> x = datetime(2015, 1, 1)
116    >>> _get_datetime(x) is x
117    True
118
119    :param instant: date, time, datetime, integer, float or None
120    :type instant: date|time|datetime|int|float|None
121    :return: a datetime
122    :rtype: datetime
123    """
124    if instant is None:
125        return datetime_.utcnow()
126    elif isinstance(instant, integer_types) or isinstance(instant, float):
127        return datetime_.utcfromtimestamp(instant)
128    elif isinstance(instant, time):
129        return datetime_.combine(date.today(), instant)
130    elif isinstance(instant, date) and not isinstance(instant, datetime):
131        return datetime_.combine(instant, time())
132    # TODO (3.x): Add an assertion/type check for this fallthrough branch:
133    return instant
134
135
136def _ensure_datetime_tzinfo(datetime, tzinfo=None):
137    """
138    Ensure the datetime passed has an attached tzinfo.
139
140    If the datetime is tz-naive to begin with, UTC is attached.
141
142    If a tzinfo is passed in, the datetime is normalized to that timezone.
143
144    >>> _ensure_datetime_tzinfo(datetime(2015, 1, 1)).tzinfo.zone
145    'UTC'
146
147    >>> tz = get_timezone("Europe/Stockholm")
148    >>> _ensure_datetime_tzinfo(datetime(2015, 1, 1, 13, 15, tzinfo=UTC), tzinfo=tz).hour
149    14
150
151    :param datetime: Datetime to augment.
152    :param tzinfo: Optional tznfo.
153    :return: datetime with tzinfo
154    :rtype: datetime
155    """
156    if datetime.tzinfo is None:
157        datetime = datetime.replace(tzinfo=UTC)
158    if tzinfo is not None:
159        datetime = datetime.astimezone(get_timezone(tzinfo))
160        if hasattr(tzinfo, 'normalize'):  # pytz
161            datetime = tzinfo.normalize(datetime)
162    return datetime
163
164
165def _get_time(time, tzinfo=None):
166    """
167    Get a timezoned time from a given instant.
168
169    .. warning:: The return values of this function may depend on the system clock.
170
171    :param time: time, datetime or None
172    :rtype: time
173    """
174    if time is None:
175        time = datetime.utcnow()
176    elif isinstance(time, number_types):
177        time = datetime.utcfromtimestamp(time)
178    if time.tzinfo is None:
179        time = time.replace(tzinfo=UTC)
180    if isinstance(time, datetime):
181        if tzinfo is not None:
182            time = time.astimezone(tzinfo)
183            if hasattr(tzinfo, 'normalize'):  # pytz
184                time = tzinfo.normalize(time)
185        time = time.timetz()
186    elif tzinfo is not None:
187        time = time.replace(tzinfo=tzinfo)
188    return time
189
190
191def get_timezone(zone=None):
192    """Looks up a timezone by name and returns it.  The timezone object
193    returned comes from ``pytz`` and corresponds to the `tzinfo` interface and
194    can be used with all of the functions of Babel that operate with dates.
195
196    If a timezone is not known a :exc:`LookupError` is raised.  If `zone`
197    is ``None`` a local zone object is returned.
198
199    :param zone: the name of the timezone to look up.  If a timezone object
200                 itself is passed in, mit's returned unchanged.
201    """
202    if zone is None:
203        return LOCALTZ
204    if not isinstance(zone, string_types):
205        return zone
206    try:
207        return _pytz.timezone(zone)
208    except _pytz.UnknownTimeZoneError:
209        raise LookupError('Unknown timezone %s' % zone)
210
211
212def get_next_timezone_transition(zone=None, dt=None):
213    """Given a timezone it will return a :class:`TimezoneTransition` object
214    that holds the information about the next timezone transition that's going
215    to happen.  For instance this can be used to detect when the next DST
216    change is going to happen and how it looks like.
217
218    The transition is calculated relative to the given datetime object.  The
219    next transition that follows the date is used.  If a transition cannot
220    be found the return value will be `None`.
221
222    Transition information can only be provided for timezones returned by
223    the :func:`get_timezone` function.
224
225    :param zone: the timezone for which the transition should be looked up.
226                 If not provided the local timezone is used.
227    :param dt: the date after which the next transition should be found.
228               If not given the current time is assumed.
229    """
230    zone = get_timezone(zone)
231    dt = _get_datetime(dt).replace(tzinfo=None)
232
233    if not hasattr(zone, '_utc_transition_times'):
234        raise TypeError('Given timezone does not have UTC transition '
235                        'times.  This can happen because the operating '
236                        'system fallback local timezone is used or a '
237                        'custom timezone object')
238
239    try:
240        idx = max(0, bisect_right(zone._utc_transition_times, dt))
241        old_trans = zone._transition_info[idx - 1]
242        new_trans = zone._transition_info[idx]
243        old_tz = zone._tzinfos[old_trans]
244        new_tz = zone._tzinfos[new_trans]
245    except (LookupError, ValueError):
246        return None
247
248    return TimezoneTransition(
249        activates=zone._utc_transition_times[idx],
250        from_tzinfo=old_tz,
251        to_tzinfo=new_tz,
252        reference_date=dt
253    )
254
255
256class TimezoneTransition(object):
257    """A helper object that represents the return value from
258    :func:`get_next_timezone_transition`.
259    """
260
261    def __init__(self, activates, from_tzinfo, to_tzinfo, reference_date=None):
262        #: the time of the activation of the timezone transition in UTC.
263        self.activates = activates
264        #: the timezone from where the transition starts.
265        self.from_tzinfo = from_tzinfo
266        #: the timezone for after the transition.
267        self.to_tzinfo = to_tzinfo
268        #: the reference date that was provided.  This is the `dt` parameter
269        #: to the :func:`get_next_timezone_transition`.
270        self.reference_date = reference_date
271
272    @property
273    def from_tz(self):
274        """The name of the timezone before the transition."""
275        return self.from_tzinfo._tzname
276
277    @property
278    def to_tz(self):
279        """The name of the timezone after the transition."""
280        return self.to_tzinfo._tzname
281
282    @property
283    def from_offset(self):
284        """The UTC offset in seconds before the transition."""
285        return int(self.from_tzinfo._utcoffset.total_seconds())
286
287    @property
288    def to_offset(self):
289        """The UTC offset in seconds after the transition."""
290        return int(self.to_tzinfo._utcoffset.total_seconds())
291
292    def __repr__(self):
293        return '<TimezoneTransition %s -> %s (%s)>' % (
294            self.from_tz,
295            self.to_tz,
296            self.activates,
297        )
298
299
300def get_period_names(width='wide', context='stand-alone', locale=LC_TIME):
301    """Return the names for day periods (AM/PM) used by the locale.
302
303    >>> get_period_names(locale='en_US')['am']
304    u'AM'
305
306    :param width: the width to use, one of "abbreviated", "narrow", or "wide"
307    :param context: the context, either "format" or "stand-alone"
308    :param locale: the `Locale` object, or a locale string
309    """
310    return Locale.parse(locale).day_periods[context][width]
311
312
313def get_day_names(width='wide', context='format', locale=LC_TIME):
314    """Return the day names used by the locale for the specified format.
315
316    >>> get_day_names('wide', locale='en_US')[1]
317    u'Tuesday'
318    >>> get_day_names('short', locale='en_US')[1]
319    u'Tu'
320    >>> get_day_names('abbreviated', locale='es')[1]
321    u'mar.'
322    >>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1]
323    u'D'
324
325    :param width: the width to use, one of "wide", "abbreviated", "short" or "narrow"
326    :param context: the context, either "format" or "stand-alone"
327    :param locale: the `Locale` object, or a locale string
328    """
329    return Locale.parse(locale).days[context][width]
330
331
332def get_month_names(width='wide', context='format', locale=LC_TIME):
333    """Return the month names used by the locale for the specified format.
334
335    >>> get_month_names('wide', locale='en_US')[1]
336    u'January'
337    >>> get_month_names('abbreviated', locale='es')[1]
338    u'ene.'
339    >>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1]
340    u'J'
341
342    :param width: the width to use, one of "wide", "abbreviated", or "narrow"
343    :param context: the context, either "format" or "stand-alone"
344    :param locale: the `Locale` object, or a locale string
345    """
346    return Locale.parse(locale).months[context][width]
347
348
349def get_quarter_names(width='wide', context='format', locale=LC_TIME):
350    """Return the quarter names used by the locale for the specified format.
351
352    >>> get_quarter_names('wide', locale='en_US')[1]
353    u'1st quarter'
354    >>> get_quarter_names('abbreviated', locale='de_DE')[1]
355    u'Q1'
356    >>> get_quarter_names('narrow', locale='de_DE')[1]
357    u'1'
358
359    :param width: the width to use, one of "wide", "abbreviated", or "narrow"
360    :param context: the context, either "format" or "stand-alone"
361    :param locale: the `Locale` object, or a locale string
362    """
363    return Locale.parse(locale).quarters[context][width]
364
365
366def get_era_names(width='wide', locale=LC_TIME):
367    """Return the era names used by the locale for the specified format.
368
369    >>> get_era_names('wide', locale='en_US')[1]
370    u'Anno Domini'
371    >>> get_era_names('abbreviated', locale='de_DE')[1]
372    u'n. Chr.'
373
374    :param width: the width to use, either "wide", "abbreviated", or "narrow"
375    :param locale: the `Locale` object, or a locale string
376    """
377    return Locale.parse(locale).eras[width]
378
379
380def get_date_format(format='medium', locale=LC_TIME):
381    """Return the date formatting patterns used by the locale for the specified
382    format.
383
384    >>> get_date_format(locale='en_US')
385    <DateTimePattern u'MMM d, y'>
386    >>> get_date_format('full', locale='de_DE')
387    <DateTimePattern u'EEEE, d. MMMM y'>
388
389    :param format: the format to use, one of "full", "long", "medium", or
390                   "short"
391    :param locale: the `Locale` object, or a locale string
392    """
393    return Locale.parse(locale).date_formats[format]
394
395
396def get_datetime_format(format='medium', locale=LC_TIME):
397    """Return the datetime formatting patterns used by the locale for the
398    specified format.
399
400    >>> get_datetime_format(locale='en_US')
401    u'{1}, {0}'
402
403    :param format: the format to use, one of "full", "long", "medium", or
404                   "short"
405    :param locale: the `Locale` object, or a locale string
406    """
407    patterns = Locale.parse(locale).datetime_formats
408    if format not in patterns:
409        format = None
410    return patterns[format]
411
412
413def get_time_format(format='medium', locale=LC_TIME):
414    """Return the time formatting patterns used by the locale for the specified
415    format.
416
417    >>> get_time_format(locale='en_US')
418    <DateTimePattern u'h:mm:ss a'>
419    >>> get_time_format('full', locale='de_DE')
420    <DateTimePattern u'HH:mm:ss zzzz'>
421
422    :param format: the format to use, one of "full", "long", "medium", or
423                   "short"
424    :param locale: the `Locale` object, or a locale string
425    """
426    return Locale.parse(locale).time_formats[format]
427
428
429def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME, return_z=False):
430    """Return the timezone associated with the given `datetime` object formatted
431    as string indicating the offset from GMT.
432
433    >>> dt = datetime(2007, 4, 1, 15, 30)
434    >>> get_timezone_gmt(dt, locale='en')
435    u'GMT+00:00'
436    >>> get_timezone_gmt(dt, locale='en', return_z=True)
437    'Z'
438    >>> get_timezone_gmt(dt, locale='en', width='iso8601_short')
439    u'+00'
440    >>> tz = get_timezone('America/Los_Angeles')
441    >>> dt = tz.localize(datetime(2007, 4, 1, 15, 30))
442    >>> get_timezone_gmt(dt, locale='en')
443    u'GMT-07:00'
444    >>> get_timezone_gmt(dt, 'short', locale='en')
445    u'-0700'
446    >>> get_timezone_gmt(dt, locale='en', width='iso8601_short')
447    u'-07'
448
449    The long format depends on the locale, for example in France the acronym
450    UTC string is used instead of GMT:
451
452    >>> get_timezone_gmt(dt, 'long', locale='fr_FR')
453    u'UTC-07:00'
454
455    .. versionadded:: 0.9
456
457    :param datetime: the ``datetime`` object; if `None`, the current date and
458                     time in UTC is used
459    :param width: either "long" or "short" or "iso8601" or "iso8601_short"
460    :param locale: the `Locale` object, or a locale string
461    :param return_z: True or False; Function returns indicator "Z"
462                     when local time offset is 0
463    """
464    datetime = _ensure_datetime_tzinfo(_get_datetime(datetime))
465    locale = Locale.parse(locale)
466
467    offset = datetime.tzinfo.utcoffset(datetime)
468    seconds = offset.days * 24 * 60 * 60 + offset.seconds
469    hours, seconds = divmod(seconds, 3600)
470    if return_z and hours == 0 and seconds == 0:
471        return 'Z'
472    elif seconds == 0 and width == 'iso8601_short':
473        return u'%+03d' % hours
474    elif width == 'short' or width == 'iso8601_short':
475        pattern = u'%+03d%02d'
476    elif width == 'iso8601':
477        pattern = u'%+03d:%02d'
478    else:
479        pattern = locale.zone_formats['gmt'] % '%+03d:%02d'
480    return pattern % (hours, seconds // 60)
481
482
483def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME, return_city=False):
484    u"""Return a representation of the given timezone using "location format".
485
486    The result depends on both the local display name of the country and the
487    city associated with the time zone:
488
489    >>> tz = get_timezone('America/St_Johns')
490    >>> print(get_timezone_location(tz, locale='de_DE'))
491    Kanada (St. John’s) Zeit
492    >>> print(get_timezone_location(tz, locale='en'))
493    Canada (St. John’s) Time
494    >>> print(get_timezone_location(tz, locale='en', return_city=True))
495    St. John’s
496    >>> tz = get_timezone('America/Mexico_City')
497    >>> get_timezone_location(tz, locale='de_DE')
498    u'Mexiko (Mexiko-Stadt) Zeit'
499
500    If the timezone is associated with a country that uses only a single
501    timezone, just the localized country name is returned:
502
503    >>> tz = get_timezone('Europe/Berlin')
504    >>> get_timezone_name(tz, locale='de_DE')
505    u'Mitteleurop\\xe4ische Zeit'
506
507    .. versionadded:: 0.9
508
509    :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
510                         the timezone; if `None`, the current date and time in
511                         UTC is assumed
512    :param locale: the `Locale` object, or a locale string
513    :param return_city: True or False, if True then return exemplar city (location)
514                        for the time zone
515    :return: the localized timezone name using location format
516
517    """
518    locale = Locale.parse(locale)
519
520    zone = _get_tz_name(dt_or_tzinfo)
521
522    # Get the canonical time-zone code
523    zone = get_global('zone_aliases').get(zone, zone)
524
525    info = locale.time_zones.get(zone, {})
526
527    # Otherwise, if there is only one timezone for the country, return the
528    # localized country name
529    region_format = locale.zone_formats['region']
530    territory = get_global('zone_territories').get(zone)
531    if territory not in locale.territories:
532        territory = 'ZZ'  # invalid/unknown
533    territory_name = locale.territories[territory]
534    if not return_city and territory and len(get_global('territory_zones').get(territory, [])) == 1:
535        return region_format % territory_name
536
537    # Otherwise, include the city in the output
538    fallback_format = locale.zone_formats['fallback']
539    if 'city' in info:
540        city_name = info['city']
541    else:
542        metazone = get_global('meta_zones').get(zone)
543        metazone_info = locale.meta_zones.get(metazone, {})
544        if 'city' in metazone_info:
545            city_name = metazone_info['city']
546        elif '/' in zone:
547            city_name = zone.split('/', 1)[1].replace('_', ' ')
548        else:
549            city_name = zone.replace('_', ' ')
550
551    if return_city:
552        return city_name
553    return region_format % (fallback_format % {
554        '0': city_name,
555        '1': territory_name
556    })
557
558
559def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False,
560                      locale=LC_TIME, zone_variant=None, return_zone=False):
561    r"""Return the localized display name for the given timezone. The timezone
562    may be specified using a ``datetime`` or `tzinfo` object.
563
564    >>> dt = time(15, 30, tzinfo=get_timezone('America/Los_Angeles'))
565    >>> get_timezone_name(dt, locale='en_US')
566    u'Pacific Standard Time'
567    >>> get_timezone_name(dt, locale='en_US', return_zone=True)
568    'America/Los_Angeles'
569    >>> get_timezone_name(dt, width='short', locale='en_US')
570    u'PST'
571
572    If this function gets passed only a `tzinfo` object and no concrete
573    `datetime`,  the returned display name is indenpendent of daylight savings
574    time. This can be used for example for selecting timezones, or to set the
575    time of events that recur across DST changes:
576
577    >>> tz = get_timezone('America/Los_Angeles')
578    >>> get_timezone_name(tz, locale='en_US')
579    u'Pacific Time'
580    >>> get_timezone_name(tz, 'short', locale='en_US')
581    u'PT'
582
583    If no localized display name for the timezone is available, and the timezone
584    is associated with a country that uses only a single timezone, the name of
585    that country is returned, formatted according to the locale:
586
587    >>> tz = get_timezone('Europe/Berlin')
588    >>> get_timezone_name(tz, locale='de_DE')
589    u'Mitteleurop\xe4ische Zeit'
590    >>> get_timezone_name(tz, locale='pt_BR')
591    u'Hor\xe1rio da Europa Central'
592
593    On the other hand, if the country uses multiple timezones, the city is also
594    included in the representation:
595
596    >>> tz = get_timezone('America/St_Johns')
597    >>> get_timezone_name(tz, locale='de_DE')
598    u'Neufundland-Zeit'
599
600    Note that short format is currently not supported for all timezones and
601    all locales.  This is partially because not every timezone has a short
602    code in every locale.  In that case it currently falls back to the long
603    format.
604
605    For more information see `LDML Appendix J: Time Zone Display Names
606    <https://www.unicode.org/reports/tr35/#Time_Zone_Fallback>`_
607
608    .. versionadded:: 0.9
609
610    .. versionchanged:: 1.0
611       Added `zone_variant` support.
612
613    :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
614                         the timezone; if a ``tzinfo`` object is used, the
615                         resulting display name will be generic, i.e.
616                         independent of daylight savings time; if `None`, the
617                         current date in UTC is assumed
618    :param width: either "long" or "short"
619    :param uncommon: deprecated and ignored
620    :param zone_variant: defines the zone variation to return.  By default the
621                           variation is defined from the datetime object
622                           passed in.  If no datetime object is passed in, the
623                           ``'generic'`` variation is assumed.  The following
624                           values are valid: ``'generic'``, ``'daylight'`` and
625                           ``'standard'``.
626    :param locale: the `Locale` object, or a locale string
627    :param return_zone: True or False. If true then function
628                        returns long time zone ID
629    """
630    dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo)
631    locale = Locale.parse(locale)
632
633    zone = _get_tz_name(dt_or_tzinfo)
634
635    if zone_variant is None:
636        if dt is None:
637            zone_variant = 'generic'
638        else:
639            dst = tzinfo.dst(dt)
640            if dst:
641                zone_variant = 'daylight'
642            else:
643                zone_variant = 'standard'
644    else:
645        if zone_variant not in ('generic', 'standard', 'daylight'):
646            raise ValueError('Invalid zone variation')
647
648    # Get the canonical time-zone code
649    zone = get_global('zone_aliases').get(zone, zone)
650    if return_zone:
651        return zone
652    info = locale.time_zones.get(zone, {})
653    # Try explicitly translated zone names first
654    if width in info:
655        if zone_variant in info[width]:
656            return info[width][zone_variant]
657
658    metazone = get_global('meta_zones').get(zone)
659    if metazone:
660        metazone_info = locale.meta_zones.get(metazone, {})
661        if width in metazone_info:
662            name = metazone_info[width].get(zone_variant)
663            if width == 'short' and name == NO_INHERITANCE_MARKER:
664                # If the short form is marked no-inheritance,
665                # try to fall back to the long name instead.
666                name = metazone_info.get('long', {}).get(zone_variant)
667            if name:
668                return name
669
670    # If we have a concrete datetime, we assume that the result can't be
671    # independent of daylight savings time, so we return the GMT offset
672    if dt is not None:
673        return get_timezone_gmt(dt, width=width, locale=locale)
674
675    return get_timezone_location(dt_or_tzinfo, locale=locale)
676
677
678def format_date(date=None, format='medium', locale=LC_TIME):
679    """Return a date formatted according to the given pattern.
680
681    >>> d = date(2007, 4, 1)
682    >>> format_date(d, locale='en_US')
683    u'Apr 1, 2007'
684    >>> format_date(d, format='full', locale='de_DE')
685    u'Sonntag, 1. April 2007'
686
687    If you don't want to use the locale default formats, you can specify a
688    custom date pattern:
689
690    >>> format_date(d, "EEE, MMM d, ''yy", locale='en')
691    u"Sun, Apr 1, '07"
692
693    :param date: the ``date`` or ``datetime`` object; if `None`, the current
694                 date is used
695    :param format: one of "full", "long", "medium", or "short", or a custom
696                   date/time pattern
697    :param locale: a `Locale` object or a locale identifier
698    """
699    if date is None:
700        date = date_.today()
701    elif isinstance(date, datetime):
702        date = date.date()
703
704    locale = Locale.parse(locale)
705    if format in ('full', 'long', 'medium', 'short'):
706        format = get_date_format(format, locale=locale)
707    pattern = parse_pattern(format)
708    return pattern.apply(date, locale)
709
710
711def format_datetime(datetime=None, format='medium', tzinfo=None,
712                    locale=LC_TIME):
713    r"""Return a date formatted according to the given pattern.
714
715    >>> dt = datetime(2007, 4, 1, 15, 30)
716    >>> format_datetime(dt, locale='en_US')
717    u'Apr 1, 2007, 3:30:00 PM'
718
719    For any pattern requiring the display of the time-zone, the third-party
720    ``pytz`` package is needed to explicitly specify the time-zone:
721
722    >>> format_datetime(dt, 'full', tzinfo=get_timezone('Europe/Paris'),
723    ...                 locale='fr_FR')
724    u'dimanche 1 avril 2007 \xe0 17:30:00 heure d\u2019\xe9t\xe9 d\u2019Europe centrale'
725    >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
726    ...                 tzinfo=get_timezone('US/Eastern'), locale='en')
727    u'2007.04.01 AD at 11:30:00 EDT'
728
729    :param datetime: the `datetime` object; if `None`, the current date and
730                     time is used
731    :param format: one of "full", "long", "medium", or "short", or a custom
732                   date/time pattern
733    :param tzinfo: the timezone to apply to the time for display
734    :param locale: a `Locale` object or a locale identifier
735    """
736    datetime = _ensure_datetime_tzinfo(_get_datetime(datetime), tzinfo)
737
738    locale = Locale.parse(locale)
739    if format in ('full', 'long', 'medium', 'short'):
740        return get_datetime_format(format, locale=locale) \
741            .replace("'", "") \
742            .replace('{0}', format_time(datetime, format, tzinfo=None,
743                                        locale=locale)) \
744            .replace('{1}', format_date(datetime, format, locale=locale))
745    else:
746        return parse_pattern(format).apply(datetime, locale)
747
748
749def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME):
750    r"""Return a time formatted according to the given pattern.
751
752    >>> t = time(15, 30)
753    >>> format_time(t, locale='en_US')
754    u'3:30:00 PM'
755    >>> format_time(t, format='short', locale='de_DE')
756    u'15:30'
757
758    If you don't want to use the locale default formats, you can specify a
759    custom time pattern:
760
761    >>> format_time(t, "hh 'o''clock' a", locale='en')
762    u"03 o'clock PM"
763
764    For any pattern requiring the display of the time-zone a
765    timezone has to be specified explicitly:
766
767    >>> t = datetime(2007, 4, 1, 15, 30)
768    >>> tzinfo = get_timezone('Europe/Paris')
769    >>> t = tzinfo.localize(t)
770    >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR')
771    u'15:30:00 heure d\u2019\xe9t\xe9 d\u2019Europe centrale'
772    >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=get_timezone('US/Eastern'),
773    ...             locale='en')
774    u"09 o'clock AM, Eastern Daylight Time"
775
776    As that example shows, when this function gets passed a
777    ``datetime.datetime`` value, the actual time in the formatted string is
778    adjusted to the timezone specified by the `tzinfo` parameter. If the
779    ``datetime`` is "naive" (i.e. it has no associated timezone information),
780    it is assumed to be in UTC.
781
782    These timezone calculations are **not** performed if the value is of type
783    ``datetime.time``, as without date information there's no way to determine
784    what a given time would translate to in a different timezone without
785    information about whether daylight savings time is in effect or not. This
786    means that time values are left as-is, and the value of the `tzinfo`
787    parameter is only used to display the timezone name if needed:
788
789    >>> t = time(15, 30)
790    >>> format_time(t, format='full', tzinfo=get_timezone('Europe/Paris'),
791    ...             locale='fr_FR')
792    u'15:30:00 heure normale d\u2019Europe centrale'
793    >>> format_time(t, format='full', tzinfo=get_timezone('US/Eastern'),
794    ...             locale='en_US')
795    u'3:30:00 PM Eastern Standard Time'
796
797    :param time: the ``time`` or ``datetime`` object; if `None`, the current
798                 time in UTC is used
799    :param format: one of "full", "long", "medium", or "short", or a custom
800                   date/time pattern
801    :param tzinfo: the time-zone to apply to the time for display
802    :param locale: a `Locale` object or a locale identifier
803    """
804    time = _get_time(time, tzinfo)
805
806    locale = Locale.parse(locale)
807    if format in ('full', 'long', 'medium', 'short'):
808        format = get_time_format(format, locale=locale)
809    return parse_pattern(format).apply(time, locale)
810
811
812def format_skeleton(skeleton, datetime=None, tzinfo=None, fuzzy=True, locale=LC_TIME):
813    r"""Return a time and/or date formatted according to the given pattern.
814
815    The skeletons are defined in the CLDR data and provide more flexibility
816    than the simple short/long/medium formats, but are a bit harder to use.
817    The are defined using the date/time symbols without order or punctuation
818    and map to a suitable format for the given locale.
819
820    >>> t = datetime(2007, 4, 1, 15, 30)
821    >>> format_skeleton('MMMEd', t, locale='fr')
822    u'dim. 1 avr.'
823    >>> format_skeleton('MMMEd', t, locale='en')
824    u'Sun, Apr 1'
825    >>> format_skeleton('yMMd', t, locale='fi')  # yMMd is not in the Finnish locale; yMd gets used
826    u'1.4.2007'
827    >>> format_skeleton('yMMd', t, fuzzy=False, locale='fi')  # yMMd is not in the Finnish locale, an error is thrown
828    Traceback (most recent call last):
829        ...
830    KeyError: yMMd
831
832    After the skeleton is resolved to a pattern `format_datetime` is called so
833    all timezone processing etc is the same as for that.
834
835    :param skeleton: A date time skeleton as defined in the cldr data.
836    :param datetime: the ``time`` or ``datetime`` object; if `None`, the current
837                 time in UTC is used
838    :param tzinfo: the time-zone to apply to the time for display
839    :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
840                  close enough to it.
841    :param locale: a `Locale` object or a locale identifier
842    """
843    locale = Locale.parse(locale)
844    if fuzzy and skeleton not in locale.datetime_skeletons:
845        skeleton = match_skeleton(skeleton, locale.datetime_skeletons)
846    format = locale.datetime_skeletons[skeleton]
847    return format_datetime(datetime, format, tzinfo, locale)
848
849
850TIMEDELTA_UNITS = (
851    ('year', 3600 * 24 * 365),
852    ('month', 3600 * 24 * 30),
853    ('week', 3600 * 24 * 7),
854    ('day', 3600 * 24),
855    ('hour', 3600),
856    ('minute', 60),
857    ('second', 1)
858)
859
860
861def format_timedelta(delta, granularity='second', threshold=.85,
862                     add_direction=False, format='long',
863                     locale=LC_TIME):
864    """Return a time delta according to the rules of the given locale.
865
866    >>> format_timedelta(timedelta(weeks=12), locale='en_US')
867    u'3 months'
868    >>> format_timedelta(timedelta(seconds=1), locale='es')
869    u'1 segundo'
870
871    The granularity parameter can be provided to alter the lowest unit
872    presented, which defaults to a second.
873
874    >>> format_timedelta(timedelta(hours=3), granularity='day',
875    ...                  locale='en_US')
876    u'1 day'
877
878    The threshold parameter can be used to determine at which value the
879    presentation switches to the next higher unit. A higher threshold factor
880    means the presentation will switch later. For example:
881
882    >>> format_timedelta(timedelta(hours=23), threshold=0.9, locale='en_US')
883    u'1 day'
884    >>> format_timedelta(timedelta(hours=23), threshold=1.1, locale='en_US')
885    u'23 hours'
886
887    In addition directional information can be provided that informs
888    the user if the date is in the past or in the future:
889
890    >>> format_timedelta(timedelta(hours=1), add_direction=True, locale='en')
891    u'in 1 hour'
892    >>> format_timedelta(timedelta(hours=-1), add_direction=True, locale='en')
893    u'1 hour ago'
894
895    The format parameter controls how compact or wide the presentation is:
896
897    >>> format_timedelta(timedelta(hours=3), format='short', locale='en')
898    u'3 hr'
899    >>> format_timedelta(timedelta(hours=3), format='narrow', locale='en')
900    u'3h'
901
902    :param delta: a ``timedelta`` object representing the time difference to
903                  format, or the delta in seconds as an `int` value
904    :param granularity: determines the smallest unit that should be displayed,
905                        the value can be one of "year", "month", "week", "day",
906                        "hour", "minute" or "second"
907    :param threshold: factor that determines at which point the presentation
908                      switches to the next higher unit
909    :param add_direction: if this flag is set to `True` the return value will
910                          include directional information.  For instance a
911                          positive timedelta will include the information about
912                          it being in the future, a negative will be information
913                          about the value being in the past.
914    :param format: the format, can be "narrow", "short" or "long". (
915                   "medium" is deprecated, currently converted to "long" to
916                   maintain compatibility)
917    :param locale: a `Locale` object or a locale identifier
918    """
919    if format not in ('narrow', 'short', 'medium', 'long'):
920        raise TypeError('Format must be one of "narrow", "short" or "long"')
921    if format == 'medium':
922        warnings.warn('"medium" value for format param of format_timedelta'
923                      ' is deprecated. Use "long" instead',
924                      category=DeprecationWarning)
925        format = 'long'
926    if isinstance(delta, timedelta):
927        seconds = int((delta.days * 86400) + delta.seconds)
928    else:
929        seconds = delta
930    locale = Locale.parse(locale)
931
932    def _iter_patterns(a_unit):
933        if add_direction:
934            unit_rel_patterns = locale._data['date_fields'][a_unit]
935            if seconds >= 0:
936                yield unit_rel_patterns['future']
937            else:
938                yield unit_rel_patterns['past']
939        a_unit = 'duration-' + a_unit
940        yield locale._data['unit_patterns'].get(a_unit, {}).get(format)
941
942    for unit, secs_per_unit in TIMEDELTA_UNITS:
943        value = abs(seconds) / secs_per_unit
944        if value >= threshold or unit == granularity:
945            if unit == granularity and value > 0:
946                value = max(1, value)
947            value = int(round(value))
948            plural_form = locale.plural_form(value)
949            pattern = None
950            for patterns in _iter_patterns(unit):
951                if patterns is not None:
952                    pattern = patterns[plural_form]
953                    break
954            # This really should not happen
955            if pattern is None:
956                return u''
957            return pattern.replace('{0}', str(value))
958
959    return u''
960
961
962def _format_fallback_interval(start, end, skeleton, tzinfo, locale):
963    if skeleton in locale.datetime_skeletons:  # Use the given skeleton
964        format = lambda dt: format_skeleton(skeleton, dt, tzinfo, locale=locale)
965    elif all((isinstance(d, date) and not isinstance(d, datetime)) for d in (start, end)):  # Both are just dates
966        format = lambda dt: format_date(dt, locale=locale)
967    elif all((isinstance(d, time) and not isinstance(d, date)) for d in (start, end)):  # Both are times
968        format = lambda dt: format_time(dt, tzinfo=tzinfo, locale=locale)
969    else:
970        format = lambda dt: format_datetime(dt, tzinfo=tzinfo, locale=locale)
971
972    formatted_start = format(start)
973    formatted_end = format(end)
974
975    if formatted_start == formatted_end:
976        return format(start)
977
978    return (
979        locale.interval_formats.get(None, "{0}-{1}").
980        replace("{0}", formatted_start).
981        replace("{1}", formatted_end)
982    )
983
984
985def format_interval(start, end, skeleton=None, tzinfo=None, fuzzy=True, locale=LC_TIME):
986    """
987    Format an interval between two instants according to the locale's rules.
988
989    >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "yMd", locale="fi")
990    u'15.\u201317.1.2016'
991
992    >>> format_interval(time(12, 12), time(16, 16), "Hm", locale="en_GB")
993    '12:12\u201316:16'
994
995    >>> format_interval(time(5, 12), time(16, 16), "hm", locale="en_US")
996    '5:12 AM \u2013 4:16 PM'
997
998    >>> format_interval(time(16, 18), time(16, 24), "Hm", locale="it")
999    '16:18\u201316:24'
1000
1001    If the start instant equals the end instant, the interval is formatted like the instant.
1002
1003    >>> format_interval(time(16, 18), time(16, 18), "Hm", locale="it")
1004    '16:18'
1005
1006    Unknown skeletons fall back to "default" formatting.
1007
1008    >>> format_interval(date(2015, 1, 1), date(2017, 1, 1), "wzq", locale="ja")
1009    '2015/01/01\uff5e2017/01/01'
1010
1011    >>> format_interval(time(16, 18), time(16, 24), "xxx", locale="ja")
1012    '16:18:00\uff5e16:24:00'
1013
1014    >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "xxx", locale="de")
1015    '15.01.2016 \u2013 17.01.2016'
1016
1017    :param start: First instant (datetime/date/time)
1018    :param end: Second instant (datetime/date/time)
1019    :param skeleton: The "skeleton format" to use for formatting.
1020    :param tzinfo: tzinfo to use (if none is already attached)
1021    :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
1022                  close enough to it.
1023    :param locale: A locale object or identifier.
1024    :return: Formatted interval
1025    """
1026    locale = Locale.parse(locale)
1027
1028    # NB: The quote comments below are from the algorithm description in
1029    #     https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats
1030
1031    # > Look for the intervalFormatItem element that matches the "skeleton",
1032    # > starting in the current locale and then following the locale fallback
1033    # > chain up to, but not including root.
1034
1035    interval_formats = locale.interval_formats
1036
1037    if skeleton not in interval_formats or not skeleton:
1038        # > If no match was found from the previous step, check what the closest
1039        # > match is in the fallback locale chain, as in availableFormats. That
1040        # > is, this allows for adjusting the string value field's width,
1041        # > including adjusting between "MMM" and "MMMM", and using different
1042        # > variants of the same field, such as 'v' and 'z'.
1043        if skeleton and fuzzy:
1044            skeleton = match_skeleton(skeleton, interval_formats)
1045        else:
1046            skeleton = None
1047        if not skeleton:  # Still no match whatsoever?
1048            # > Otherwise, format the start and end datetime using the fallback pattern.
1049            return _format_fallback_interval(start, end, skeleton, tzinfo, locale)
1050
1051    skel_formats = interval_formats[skeleton]
1052
1053    if start == end:
1054        return format_skeleton(skeleton, start, tzinfo, fuzzy=fuzzy, locale=locale)
1055
1056    start = _ensure_datetime_tzinfo(_get_datetime(start), tzinfo=tzinfo)
1057    end = _ensure_datetime_tzinfo(_get_datetime(end), tzinfo=tzinfo)
1058
1059    start_fmt = DateTimeFormat(start, locale=locale)
1060    end_fmt = DateTimeFormat(end, locale=locale)
1061
1062    # > If a match is found from previous steps, compute the calendar field
1063    # > with the greatest difference between start and end datetime. If there
1064    # > is no difference among any of the fields in the pattern, format as a
1065    # > single date using availableFormats, and return.
1066
1067    for field in PATTERN_CHAR_ORDER:  # These are in largest-to-smallest order
1068        if field in skel_formats:
1069            if start_fmt.extract(field) != end_fmt.extract(field):
1070                # > If there is a match, use the pieces of the corresponding pattern to
1071                # > format the start and end datetime, as above.
1072                return "".join(
1073                    parse_pattern(pattern).apply(instant, locale)
1074                    for pattern, instant
1075                    in zip(skel_formats[field], (start, end))
1076                )
1077
1078    # > Otherwise, format the start and end datetime using the fallback pattern.
1079
1080    return _format_fallback_interval(start, end, skeleton, tzinfo, locale)
1081
1082
1083def get_period_id(time, tzinfo=None, type=None, locale=LC_TIME):
1084    """
1085    Get the day period ID for a given time.
1086
1087    This ID can be used as a key for the period name dictionary.
1088
1089    >>> get_period_names(locale="de")[get_period_id(time(7, 42), locale="de")]
1090    u'Morgen'
1091
1092    :param time: The time to inspect.
1093    :param tzinfo: The timezone for the time. See ``format_time``.
1094    :param type: The period type to use. Either "selection" or None.
1095                 The selection type is used for selecting among phrases such as
1096                 “Your email arrived yesterday evening” or “Your email arrived last night”.
1097    :param locale: the `Locale` object, or a locale string
1098    :return: period ID. Something is always returned -- even if it's just "am" or "pm".
1099    """
1100    time = _get_time(time, tzinfo)
1101    seconds_past_midnight = int(time.hour * 60 * 60 + time.minute * 60 + time.second)
1102    locale = Locale.parse(locale)
1103
1104    # The LDML rules state that the rules may not overlap, so iterating in arbitrary
1105    # order should be alright, though `at` periods should be preferred.
1106    rulesets = locale.day_period_rules.get(type, {}).items()
1107
1108    for rule_id, rules in rulesets:
1109        for rule in rules:
1110            if "at" in rule and rule["at"] == seconds_past_midnight:
1111                return rule_id
1112
1113    for rule_id, rules in rulesets:
1114        for rule in rules:
1115            start_ok = end_ok = False
1116
1117            if "from" in rule and seconds_past_midnight >= rule["from"]:
1118                start_ok = True
1119            if "to" in rule and seconds_past_midnight <= rule["to"]:
1120                # This rule type does not exist in the present CLDR data;
1121                # excuse the lack of test coverage.
1122                end_ok = True
1123            if "before" in rule and seconds_past_midnight < rule["before"]:
1124                end_ok = True
1125            if "after" in rule:
1126                raise NotImplementedError("'after' is deprecated as of CLDR 29.")
1127
1128            if start_ok and end_ok:
1129                return rule_id
1130
1131    if seconds_past_midnight < 43200:
1132        return "am"
1133    else:
1134        return "pm"
1135
1136
1137def parse_date(string, locale=LC_TIME):
1138    """Parse a date from a string.
1139
1140    This function uses the date format for the locale as a hint to determine
1141    the order in which the date fields appear in the string.
1142
1143    >>> parse_date('4/1/04', locale='en_US')
1144    datetime.date(2004, 4, 1)
1145    >>> parse_date('01.04.2004', locale='de_DE')
1146    datetime.date(2004, 4, 1)
1147
1148    :param string: the string containing the date
1149    :param locale: a `Locale` object or a locale identifier
1150    """
1151    # TODO: try ISO format first?
1152    format = get_date_format(locale=locale).pattern.lower()
1153    year_idx = format.index('y')
1154    month_idx = format.index('m')
1155    if month_idx < 0:
1156        month_idx = format.index('l')
1157    day_idx = format.index('d')
1158
1159    indexes = [(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')]
1160    indexes.sort()
1161    indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)])
1162
1163    # FIXME: this currently only supports numbers, but should also support month
1164    #        names, both in the requested locale, and english
1165
1166    numbers = re.findall(r'(\d+)', string)
1167    year = numbers[indexes['Y']]
1168    if len(year) == 2:
1169        year = 2000 + int(year)
1170    else:
1171        year = int(year)
1172    month = int(numbers[indexes['M']])
1173    day = int(numbers[indexes['D']])
1174    if month > 12:
1175        month, day = day, month
1176    return date(year, month, day)
1177
1178
1179def parse_time(string, locale=LC_TIME):
1180    """Parse a time from a string.
1181
1182    This function uses the time format for the locale as a hint to determine
1183    the order in which the time fields appear in the string.
1184
1185    >>> parse_time('15:30:00', locale='en_US')
1186    datetime.time(15, 30)
1187
1188    :param string: the string containing the time
1189    :param locale: a `Locale` object or a locale identifier
1190    :return: the parsed time
1191    :rtype: `time`
1192    """
1193    # TODO: try ISO format first?
1194    format = get_time_format(locale=locale).pattern.lower()
1195    hour_idx = format.index('h')
1196    if hour_idx < 0:
1197        hour_idx = format.index('k')
1198    min_idx = format.index('m')
1199    sec_idx = format.index('s')
1200
1201    indexes = [(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')]
1202    indexes.sort()
1203    indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)])
1204
1205    # FIXME: support 12 hour clock, and 0-based hour specification
1206    #        and seconds should be optional, maybe minutes too
1207    #        oh, and time-zones, of course
1208
1209    numbers = re.findall(r'(\d+)', string)
1210    hour = int(numbers[indexes['H']])
1211    minute = int(numbers[indexes['M']])
1212    second = int(numbers[indexes['S']])
1213    return time(hour, minute, second)
1214
1215
1216class DateTimePattern(object):
1217
1218    def __init__(self, pattern, format):
1219        self.pattern = pattern
1220        self.format = format
1221
1222    def __repr__(self):
1223        return '<%s %r>' % (type(self).__name__, self.pattern)
1224
1225    def __unicode__(self):
1226        return self.pattern
1227
1228    def __str__(self):
1229        pat = self.pattern
1230        if PY2:
1231            pat = pat.encode('utf-8')
1232        return pat
1233
1234    def __mod__(self, other):
1235        if type(other) is not DateTimeFormat:
1236            return NotImplemented
1237        return self.format % other
1238
1239    def apply(self, datetime, locale):
1240        return self % DateTimeFormat(datetime, locale)
1241
1242
1243class DateTimeFormat(object):
1244
1245    def __init__(self, value, locale):
1246        assert isinstance(value, (date, datetime, time))
1247        if isinstance(value, (datetime, time)) and value.tzinfo is None:
1248            value = value.replace(tzinfo=UTC)
1249        self.value = value
1250        self.locale = Locale.parse(locale)
1251
1252    def __getitem__(self, name):
1253        char = name[0]
1254        num = len(name)
1255        if char == 'G':
1256            return self.format_era(char, num)
1257        elif char in ('y', 'Y', 'u'):
1258            return self.format_year(char, num)
1259        elif char in ('Q', 'q'):
1260            return self.format_quarter(char, num)
1261        elif char in ('M', 'L'):
1262            return self.format_month(char, num)
1263        elif char in ('w', 'W'):
1264            return self.format_week(char, num)
1265        elif char == 'd':
1266            return self.format(self.value.day, num)
1267        elif char == 'D':
1268            return self.format_day_of_year(num)
1269        elif char == 'F':
1270            return self.format_day_of_week_in_month()
1271        elif char in ('E', 'e', 'c'):
1272            return self.format_weekday(char, num)
1273        elif char == 'a':
1274            # TODO: Add support for the rest of the period formats (a*, b*, B*)
1275            return self.format_period(char)
1276        elif char == 'h':
1277            if self.value.hour % 12 == 0:
1278                return self.format(12, num)
1279            else:
1280                return self.format(self.value.hour % 12, num)
1281        elif char == 'H':
1282            return self.format(self.value.hour, num)
1283        elif char == 'K':
1284            return self.format(self.value.hour % 12, num)
1285        elif char == 'k':
1286            if self.value.hour == 0:
1287                return self.format(24, num)
1288            else:
1289                return self.format(self.value.hour, num)
1290        elif char == 'm':
1291            return self.format(self.value.minute, num)
1292        elif char == 's':
1293            return self.format(self.value.second, num)
1294        elif char == 'S':
1295            return self.format_frac_seconds(num)
1296        elif char == 'A':
1297            return self.format_milliseconds_in_day(num)
1298        elif char in ('z', 'Z', 'v', 'V', 'x', 'X', 'O'):
1299            return self.format_timezone(char, num)
1300        else:
1301            raise KeyError('Unsupported date/time field %r' % char)
1302
1303    def extract(self, char):
1304        char = str(char)[0]
1305        if char == 'y':
1306            return self.value.year
1307        elif char == 'M':
1308            return self.value.month
1309        elif char == 'd':
1310            return self.value.day
1311        elif char == 'H':
1312            return self.value.hour
1313        elif char == 'h':
1314            return self.value.hour % 12 or 12
1315        elif char == 'm':
1316            return self.value.minute
1317        elif char == 'a':
1318            return int(self.value.hour >= 12)  # 0 for am, 1 for pm
1319        else:
1320            raise NotImplementedError("Not implemented: extracting %r from %r" % (char, self.value))
1321
1322    def format_era(self, char, num):
1323        width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)]
1324        era = int(self.value.year >= 0)
1325        return get_era_names(width, self.locale)[era]
1326
1327    def format_year(self, char, num):
1328        value = self.value.year
1329        if char.isupper():
1330            value = self.value.isocalendar()[0]
1331        year = self.format(value, num)
1332        if num == 2:
1333            year = year[-2:]
1334        return year
1335
1336    def format_quarter(self, char, num):
1337        quarter = (self.value.month - 1) // 3 + 1
1338        if num <= 2:
1339            return '%0*d' % (num, quarter)
1340        width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
1341        context = {'Q': 'format', 'q': 'stand-alone'}[char]
1342        return get_quarter_names(width, context, self.locale)[quarter]
1343
1344    def format_month(self, char, num):
1345        if num <= 2:
1346            return '%0*d' % (num, self.value.month)
1347        width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
1348        context = {'M': 'format', 'L': 'stand-alone'}[char]
1349        return get_month_names(width, context, self.locale)[self.value.month]
1350
1351    def format_week(self, char, num):
1352        if char.islower():  # week of year
1353            day_of_year = self.get_day_of_year()
1354            week = self.get_week_number(day_of_year)
1355            if week == 0:
1356                date = self.value - timedelta(days=day_of_year)
1357                week = self.get_week_number(self.get_day_of_year(date),
1358                                            date.weekday())
1359            return self.format(week, num)
1360        else:  # week of month
1361            week = self.get_week_number(self.value.day)
1362            if week == 0:
1363                date = self.value - timedelta(days=self.value.day)
1364                week = self.get_week_number(date.day, date.weekday())
1365            return '%d' % week
1366
1367    def format_weekday(self, char='E', num=4):
1368        """
1369        Return weekday from parsed datetime according to format pattern.
1370
1371        >>> format = DateTimeFormat(date(2016, 2, 28), Locale.parse('en_US'))
1372        >>> format.format_weekday()
1373        u'Sunday'
1374
1375        'E': Day of week - Use one through three letters for the abbreviated day name, four for the full (wide) name,
1376             five for the narrow name, or six for the short name.
1377        >>> format.format_weekday('E',2)
1378        u'Sun'
1379
1380        'e': Local day of week. Same as E except adds a numeric value that will depend on the local starting day of the
1381             week, using one or two letters. For this example, Monday is the first day of the week.
1382        >>> format.format_weekday('e',2)
1383        '01'
1384
1385        'c': Stand-Alone local day of week - Use one letter for the local numeric value (same as 'e'), three for the
1386             abbreviated day name, four for the full (wide) name, five for the narrow name, or six for the short name.
1387        >>> format.format_weekday('c',1)
1388        '1'
1389
1390        :param char: pattern format character ('e','E','c')
1391        :param num: count of format character
1392
1393        """
1394        if num < 3:
1395            if char.islower():
1396                value = 7 - self.locale.first_week_day + self.value.weekday()
1397                return self.format(value % 7 + 1, num)
1398            num = 3
1399        weekday = self.value.weekday()
1400        width = {3: 'abbreviated', 4: 'wide', 5: 'narrow', 6: 'short'}[num]
1401        if char == 'c':
1402            context = 'stand-alone'
1403        else:
1404            context = 'format'
1405        return get_day_names(width, context, self.locale)[weekday]
1406
1407    def format_day_of_year(self, num):
1408        return self.format(self.get_day_of_year(), num)
1409
1410    def format_day_of_week_in_month(self):
1411        return '%d' % ((self.value.day - 1) // 7 + 1)
1412
1413    def format_period(self, char):
1414        period = {0: 'am', 1: 'pm'}[int(self.value.hour >= 12)]
1415        for width in ('wide', 'narrow', 'abbreviated'):
1416            period_names = get_period_names(context='format', width=width, locale=self.locale)
1417            if period in period_names:
1418                return period_names[period]
1419        raise ValueError('Could not format period %s in %s' % (period, self.locale))
1420
1421    def format_frac_seconds(self, num):
1422        """ Return fractional seconds.
1423
1424        Rounds the time's microseconds to the precision given by the number \
1425        of digits passed in.
1426        """
1427        value = self.value.microsecond / 1000000
1428        return self.format(round(value, num) * 10**num, num)
1429
1430    def format_milliseconds_in_day(self, num):
1431        msecs = self.value.microsecond // 1000 + self.value.second * 1000 + \
1432            self.value.minute * 60000 + self.value.hour * 3600000
1433        return self.format(msecs, num)
1434
1435    def format_timezone(self, char, num):
1436        width = {3: 'short', 4: 'long', 5: 'iso8601'}[max(3, num)]
1437        if char == 'z':
1438            return get_timezone_name(self.value, width, locale=self.locale)
1439        elif char == 'Z':
1440            if num == 5:
1441                return get_timezone_gmt(self.value, width, locale=self.locale, return_z=True)
1442            return get_timezone_gmt(self.value, width, locale=self.locale)
1443        elif char == 'O':
1444            if num == 4:
1445                return get_timezone_gmt(self.value, width, locale=self.locale)
1446        # TODO: To add support for O:1
1447        elif char == 'v':
1448            return get_timezone_name(self.value.tzinfo, width,
1449                                     locale=self.locale)
1450        elif char == 'V':
1451            if num == 1:
1452                return get_timezone_name(self.value.tzinfo, width,
1453                                         uncommon=True, locale=self.locale)
1454            elif num == 2:
1455                return get_timezone_name(self.value.tzinfo, locale=self.locale, return_zone=True)
1456            elif num == 3:
1457                return get_timezone_location(self.value.tzinfo, locale=self.locale, return_city=True)
1458            return get_timezone_location(self.value.tzinfo, locale=self.locale)
1459        # Included additional elif condition to add support for 'Xx' in timezone format
1460        elif char == 'X':
1461            if num == 1:
1462                return get_timezone_gmt(self.value, width='iso8601_short', locale=self.locale,
1463                                        return_z=True)
1464            elif num in (2, 4):
1465                return get_timezone_gmt(self.value, width='short', locale=self.locale,
1466                                        return_z=True)
1467            elif num in (3, 5):
1468                return get_timezone_gmt(self.value, width='iso8601', locale=self.locale,
1469                                        return_z=True)
1470        elif char == 'x':
1471            if num == 1:
1472                return get_timezone_gmt(self.value, width='iso8601_short', locale=self.locale)
1473            elif num in (2, 4):
1474                return get_timezone_gmt(self.value, width='short', locale=self.locale)
1475            elif num in (3, 5):
1476                return get_timezone_gmt(self.value, width='iso8601', locale=self.locale)
1477
1478    def format(self, value, length):
1479        return '%0*d' % (length, value)
1480
1481    def get_day_of_year(self, date=None):
1482        if date is None:
1483            date = self.value
1484        return (date - date.replace(month=1, day=1)).days + 1
1485
1486    def get_week_number(self, day_of_period, day_of_week=None):
1487        """Return the number of the week of a day within a period. This may be
1488        the week number in a year or the week number in a month.
1489
1490        Usually this will return a value equal to or greater than 1, but if the
1491        first week of the period is so short that it actually counts as the last
1492        week of the previous period, this function will return 0.
1493
1494        >>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('de_DE'))
1495        >>> format.get_week_number(6)
1496        1
1497
1498        >>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('en_US'))
1499        >>> format.get_week_number(6)
1500        2
1501
1502        :param day_of_period: the number of the day in the period (usually
1503                              either the day of month or the day of year)
1504        :param day_of_week: the week day; if ommitted, the week day of the
1505                            current date is assumed
1506        """
1507        if day_of_week is None:
1508            day_of_week = self.value.weekday()
1509        first_day = (day_of_week - self.locale.first_week_day -
1510                     day_of_period + 1) % 7
1511        if first_day < 0:
1512            first_day += 7
1513        week_number = (day_of_period + first_day - 1) // 7
1514
1515        if 7 - first_day >= self.locale.min_week_days:
1516            week_number += 1
1517
1518        if self.locale.first_week_day == 0:
1519            # Correct the weeknumber in case of iso-calendar usage (first_week_day=0).
1520            # If the weeknumber exceeds the maximum number of weeks for the given year
1521            # we must count from zero.For example the above calculation gives week 53
1522            # for 2018-12-31. By iso-calender definition 2018 has a max of 52
1523            # weeks, thus the weeknumber must be 53-52=1.
1524            max_weeks = date(year=self.value.year, day=28, month=12).isocalendar()[1]
1525            if week_number > max_weeks:
1526                week_number -= max_weeks
1527
1528        return week_number
1529
1530
1531PATTERN_CHARS = {
1532    'G': [1, 2, 3, 4, 5],                                               # era
1533    'y': None, 'Y': None, 'u': None,                                    # year
1534    'Q': [1, 2, 3, 4, 5], 'q': [1, 2, 3, 4, 5],                         # quarter
1535    'M': [1, 2, 3, 4, 5], 'L': [1, 2, 3, 4, 5],                         # month
1536    'w': [1, 2], 'W': [1],                                              # week
1537    'd': [1, 2], 'D': [1, 2, 3], 'F': [1], 'g': None,                   # day
1538    'E': [1, 2, 3, 4, 5, 6], 'e': [1, 2, 3, 4, 5, 6], 'c': [1, 3, 4, 5, 6],  # week day
1539    'a': [1],                                                           # period
1540    'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2],                 # hour
1541    'm': [1, 2],                                                        # minute
1542    's': [1, 2], 'S': None, 'A': None,                                  # second
1543    'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4, 5], 'O': [1, 4], 'v': [1, 4],  # zone
1544    'V': [1, 2, 3, 4], 'x': [1, 2, 3, 4, 5], 'X': [1, 2, 3, 4, 5]       # zone
1545}
1546
1547#: The pattern characters declared in the Date Field Symbol Table
1548#: (https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table)
1549#: in order of decreasing magnitude.
1550PATTERN_CHAR_ORDER = "GyYuUQqMLlwWdDFgEecabBChHKkjJmsSAzZOvVXx"
1551
1552_pattern_cache = {}
1553
1554
1555def parse_pattern(pattern):
1556    """Parse date, time, and datetime format patterns.
1557
1558    >>> parse_pattern("MMMMd").format
1559    u'%(MMMM)s%(d)s'
1560    >>> parse_pattern("MMM d, yyyy").format
1561    u'%(MMM)s %(d)s, %(yyyy)s'
1562
1563    Pattern can contain literal strings in single quotes:
1564
1565    >>> parse_pattern("H:mm' Uhr 'z").format
1566    u'%(H)s:%(mm)s Uhr %(z)s'
1567
1568    An actual single quote can be used by using two adjacent single quote
1569    characters:
1570
1571    >>> parse_pattern("hh' o''clock'").format
1572    u"%(hh)s o'clock"
1573
1574    :param pattern: the formatting pattern to parse
1575    """
1576    if type(pattern) is DateTimePattern:
1577        return pattern
1578
1579    if pattern in _pattern_cache:
1580        return _pattern_cache[pattern]
1581
1582    result = []
1583
1584    for tok_type, tok_value in tokenize_pattern(pattern):
1585        if tok_type == "chars":
1586            result.append(tok_value.replace('%', '%%'))
1587        elif tok_type == "field":
1588            fieldchar, fieldnum = tok_value
1589            limit = PATTERN_CHARS[fieldchar]
1590            if limit and fieldnum not in limit:
1591                raise ValueError('Invalid length for field: %r'
1592                                 % (fieldchar * fieldnum))
1593            result.append('%%(%s)s' % (fieldchar * fieldnum))
1594        else:
1595            raise NotImplementedError("Unknown token type: %s" % tok_type)
1596
1597    _pattern_cache[pattern] = pat = DateTimePattern(pattern, u''.join(result))
1598    return pat
1599
1600
1601def tokenize_pattern(pattern):
1602    """
1603    Tokenize date format patterns.
1604
1605    Returns a list of (token_type, token_value) tuples.
1606
1607    ``token_type`` may be either "chars" or "field".
1608
1609    For "chars" tokens, the value is the literal value.
1610
1611    For "field" tokens, the value is a tuple of (field character, repetition count).
1612
1613    :param pattern: Pattern string
1614    :type pattern: str
1615    :rtype: list[tuple]
1616    """
1617    result = []
1618    quotebuf = None
1619    charbuf = []
1620    fieldchar = ['']
1621    fieldnum = [0]
1622
1623    def append_chars():
1624        result.append(('chars', ''.join(charbuf).replace('\0', "'")))
1625        del charbuf[:]
1626
1627    def append_field():
1628        result.append(('field', (fieldchar[0], fieldnum[0])))
1629        fieldchar[0] = ''
1630        fieldnum[0] = 0
1631
1632    for idx, char in enumerate(pattern.replace("''", '\0')):
1633        if quotebuf is None:
1634            if char == "'":  # quote started
1635                if fieldchar[0]:
1636                    append_field()
1637                elif charbuf:
1638                    append_chars()
1639                quotebuf = []
1640            elif char in PATTERN_CHARS:
1641                if charbuf:
1642                    append_chars()
1643                if char == fieldchar[0]:
1644                    fieldnum[0] += 1
1645                else:
1646                    if fieldchar[0]:
1647                        append_field()
1648                    fieldchar[0] = char
1649                    fieldnum[0] = 1
1650            else:
1651                if fieldchar[0]:
1652                    append_field()
1653                charbuf.append(char)
1654
1655        elif quotebuf is not None:
1656            if char == "'":  # end of quote
1657                charbuf.extend(quotebuf)
1658                quotebuf = None
1659            else:  # inside quote
1660                quotebuf.append(char)
1661
1662    if fieldchar[0]:
1663        append_field()
1664    elif charbuf:
1665        append_chars()
1666
1667    return result
1668
1669
1670def untokenize_pattern(tokens):
1671    """
1672    Turn a date format pattern token stream back into a string.
1673
1674    This is the reverse operation of ``tokenize_pattern``.
1675
1676    :type tokens: Iterable[tuple]
1677    :rtype: str
1678    """
1679    output = []
1680    for tok_type, tok_value in tokens:
1681        if tok_type == "field":
1682            output.append(tok_value[0] * tok_value[1])
1683        elif tok_type == "chars":
1684            if not any(ch in PATTERN_CHARS for ch in tok_value):  # No need to quote
1685                output.append(tok_value)
1686            else:
1687                output.append("'%s'" % tok_value.replace("'", "''"))
1688    return "".join(output)
1689
1690
1691def split_interval_pattern(pattern):
1692    """
1693    Split an interval-describing datetime pattern into multiple pieces.
1694
1695    > The pattern is then designed to be broken up into two pieces by determining the first repeating field.
1696    - https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats
1697
1698    >>> split_interval_pattern(u'E d.M. \u2013 E d.M.')
1699    [u'E d.M. \u2013 ', 'E d.M.']
1700    >>> split_interval_pattern("Y 'text' Y 'more text'")
1701    ["Y 'text '", "Y 'more text'"]
1702    >>> split_interval_pattern(u"E, MMM d \u2013 E")
1703    [u'E, MMM d \u2013 ', u'E']
1704    >>> split_interval_pattern("MMM d")
1705    ['MMM d']
1706    >>> split_interval_pattern("y G")
1707    ['y G']
1708    >>> split_interval_pattern(u"MMM d \u2013 d")
1709    [u'MMM d \u2013 ', u'd']
1710
1711    :param pattern: Interval pattern string
1712    :return: list of "subpatterns"
1713    """
1714
1715    seen_fields = set()
1716    parts = [[]]
1717
1718    for tok_type, tok_value in tokenize_pattern(pattern):
1719        if tok_type == "field":
1720            if tok_value[0] in seen_fields:  # Repeated field
1721                parts.append([])
1722                seen_fields.clear()
1723            seen_fields.add(tok_value[0])
1724        parts[-1].append((tok_type, tok_value))
1725
1726    return [untokenize_pattern(tokens) for tokens in parts]
1727
1728
1729def match_skeleton(skeleton, options, allow_different_fields=False):
1730    """
1731    Find the closest match for the given datetime skeleton among the options given.
1732
1733    This uses the rules outlined in the TR35 document.
1734
1735    >>> match_skeleton('yMMd', ('yMd', 'yMMMd'))
1736    'yMd'
1737
1738    >>> match_skeleton('yMMd', ('jyMMd',), allow_different_fields=True)
1739    'jyMMd'
1740
1741    >>> match_skeleton('yMMd', ('qyMMd',), allow_different_fields=False)
1742
1743    >>> match_skeleton('hmz', ('hmv',))
1744    'hmv'
1745
1746    :param skeleton: The skeleton to match
1747    :type skeleton: str
1748    :param options: An iterable of other skeletons to match against
1749    :type options: Iterable[str]
1750    :return: The closest skeleton match, or if no match was found, None.
1751    :rtype: str|None
1752    """
1753
1754    # TODO: maybe implement pattern expansion?
1755
1756    # Based on the implementation in
1757    # http://source.icu-project.org/repos/icu/icu4j/trunk/main/classes/core/src/com/ibm/icu/text/DateIntervalInfo.java
1758
1759    # Filter out falsy values and sort for stability; when `interval_formats` is passed in, there may be a None key.
1760    options = sorted(option for option in options if option)
1761
1762    if 'z' in skeleton and not any('z' in option for option in options):
1763        skeleton = skeleton.replace('z', 'v')
1764
1765    get_input_field_width = dict(t[1] for t in tokenize_pattern(skeleton) if t[0] == "field").get
1766    best_skeleton = None
1767    best_distance = None
1768    for option in options:
1769        get_opt_field_width = dict(t[1] for t in tokenize_pattern(option) if t[0] == "field").get
1770        distance = 0
1771        for field in PATTERN_CHARS:
1772            input_width = get_input_field_width(field, 0)
1773            opt_width = get_opt_field_width(field, 0)
1774            if input_width == opt_width:
1775                continue
1776            if opt_width == 0 or input_width == 0:
1777                if not allow_different_fields:  # This one is not okay
1778                    option = None
1779                    break
1780                distance += 0x1000  # Magic weight constant for "entirely different fields"
1781            elif field == 'M' and ((input_width > 2 and opt_width <= 2) or (input_width <= 2 and opt_width > 2)):
1782                distance += 0x100  # Magic weight for "text turns into a number"
1783            else:
1784                distance += abs(input_width - opt_width)
1785
1786        if not option:  # We lost the option along the way (probably due to "allow_different_fields")
1787            continue
1788
1789        if not best_skeleton or distance < best_distance:
1790            best_skeleton = option
1791            best_distance = distance
1792
1793        if distance == 0:  # Found a perfect match!
1794            break
1795
1796    return best_skeleton
1797