1from datetime import timedelta, timezone
2
3from cpython.datetime cimport datetime, timedelta, tzinfo
4
5# dateutil compat
6
7from dateutil.tz import (
8    gettz as dateutil_gettz,
9    tzfile as _dateutil_tzfile,
10    tzlocal as _dateutil_tzlocal,
11    tzutc as _dateutil_tzutc,
12)
13import pytz
14from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
15
16UTC = pytz.utc
17
18
19import numpy as np
20
21cimport numpy as cnp
22from numpy cimport int64_t
23
24cnp.import_array()
25
26# ----------------------------------------------------------------------
27from pandas._libs.tslibs.util cimport get_nat, is_integer_object
28
29
30cdef int64_t NPY_NAT = get_nat()
31cdef tzinfo utc_stdlib = timezone.utc
32cdef tzinfo utc_pytz = UTC
33
34# ----------------------------------------------------------------------
35
36cpdef inline bint is_utc(tzinfo tz):
37    return tz is utc_pytz or tz is utc_stdlib or isinstance(tz, _dateutil_tzutc)
38
39
40cdef inline bint is_tzlocal(tzinfo tz):
41    return isinstance(tz, _dateutil_tzlocal)
42
43
44cdef inline bint treat_tz_as_pytz(tzinfo tz):
45    return (hasattr(tz, '_utc_transition_times') and
46            hasattr(tz, '_transition_info'))
47
48
49cdef inline bint treat_tz_as_dateutil(tzinfo tz):
50    return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx')
51
52
53cpdef inline object get_timezone(tzinfo tz):
54    """
55    We need to do several things here:
56    1) Distinguish between pytz and dateutil timezones
57    2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone*
58       but a different tz object)
59    3) Provide something to serialize when we're storing a datetime object
60       in pytables.
61
62    We return a string prefaced with dateutil if it's a dateutil tz, else just
63    the tz name. It needs to be a string so that we can serialize it with
64    UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
65    """
66    if is_utc(tz):
67        return tz
68    else:
69        if treat_tz_as_dateutil(tz):
70            if '.tar.gz' in tz._filename:
71                raise ValueError(
72                    'Bad tz filename. Dateutil on python 3 on windows has a '
73                    'bug which causes tzfile._filename to be the same for all '
74                    'timezone files. Please construct dateutil timezones '
75                    'implicitly by passing a string like "dateutil/Europe'
76                    '/London" when you construct your pandas objects instead '
77                    'of passing a timezone object. See '
78                    'https://github.com/pandas-dev/pandas/pull/7362')
79            return 'dateutil/' + tz._filename
80        else:
81            # tz is a pytz timezone or unknown.
82            try:
83                zone = tz.zone
84                if zone is None:
85                    return tz
86                return zone
87            except AttributeError:
88                return tz
89
90
91cpdef inline tzinfo maybe_get_tz(object tz):
92    """
93    (Maybe) Construct a timezone object from a string. If tz is a string, use
94    it to construct a timezone object. Otherwise, just return tz.
95    """
96    if isinstance(tz, str):
97        if tz == 'tzlocal()':
98            tz = _dateutil_tzlocal()
99        elif tz.startswith('dateutil/'):
100            zone = tz[9:]
101            tz = dateutil_gettz(zone)
102            # On Python 3 on Windows, the filename is not always set correctly.
103            if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename:
104                tz._filename = zone
105        elif tz[0] in {'-', '+'}:
106            hours = int(tz[0:3])
107            minutes = int(tz[0] + tz[4:6])
108            tz = timezone(timedelta(hours=hours, minutes=minutes))
109        elif tz[0:4] in {'UTC-', 'UTC+'}:
110            hours = int(tz[3:6])
111            minutes = int(tz[3] + tz[7:9])
112            tz = timezone(timedelta(hours=hours, minutes=minutes))
113        else:
114            tz = pytz.timezone(tz)
115    elif is_integer_object(tz):
116        tz = pytz.FixedOffset(tz / 60)
117    elif isinstance(tz, tzinfo):
118        pass
119    elif tz is None:
120        pass
121    else:
122        raise TypeError(type(tz))
123    return tz
124
125
126def _p_tz_cache_key(tz):
127    """
128    Python interface for cache function to facilitate testing.
129    """
130    return tz_cache_key(tz)
131
132
133# Timezone data caches, key is the pytz string or dateutil file name.
134dst_cache = {}
135
136
137cdef inline object tz_cache_key(tzinfo tz):
138    """
139    Return the key in the cache for the timezone info object or None
140    if unknown.
141
142    The key is currently the tz string for pytz timezones, the filename for
143    dateutil timezones.
144
145    Notes
146    -----
147    This cannot just be the hash of a timezone object. Unfortunately, the
148    hashes of two dateutil tz objects which represent the same timezone are
149    not equal (even though the tz objects will compare equal and represent
150    the same tz file). Also, pytz objects are not always hashable so we use
151    str(tz) instead.
152    """
153    if isinstance(tz, _pytz_BaseTzInfo):
154        return tz.zone
155    elif isinstance(tz, _dateutil_tzfile):
156        if '.tar.gz' in tz._filename:
157            raise ValueError('Bad tz filename. Dateutil on python 3 on '
158                             'windows has a bug which causes tzfile._filename '
159                             'to be the same for all timezone files. Please '
160                             'construct dateutil timezones implicitly by '
161                             'passing a string like "dateutil/Europe/London" '
162                             'when you construct your pandas objects instead '
163                             'of passing a timezone object. See '
164                             'https://github.com/pandas-dev/pandas/pull/7362')
165        return 'dateutil' + tz._filename
166    else:
167        return None
168
169
170# ----------------------------------------------------------------------
171# UTC Offsets
172
173
174cdef timedelta get_utcoffset(tzinfo tz, datetime obj):
175    try:
176        return tz._utcoffset
177    except AttributeError:
178        return tz.utcoffset(obj)
179
180
181cdef inline bint is_fixed_offset(tzinfo tz):
182    if treat_tz_as_dateutil(tz):
183        if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
184            return 1
185        else:
186            return 0
187    elif treat_tz_as_pytz(tz):
188        if (len(tz._transition_info) == 0
189                and len(tz._utc_transition_times) == 0):
190            return 1
191        else:
192            return 0
193    # This also implicitly accepts datetime.timezone objects which are
194    # considered fixed
195    return 1
196
197
198cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
199    """
200    Transition times in dateutil timezones are stored in local non-dst
201    time.  This code converts them to UTC. It's the reverse of the code
202    in dateutil.tz.tzfile.__init__.
203    """
204    new_trans = list(tz._trans_list)
205    last_std_offset = 0
206    for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
207        if not tti.isdst:
208            last_std_offset = tti.offset
209        new_trans[i] = trans - last_std_offset
210    return new_trans
211
212
213cdef int64_t[:] unbox_utcoffsets(object transinfo):
214    cdef:
215        Py_ssize_t i, sz
216        int64_t[:] arr
217
218    sz = len(transinfo)
219    arr = np.empty(sz, dtype='i8')
220
221    for i in range(sz):
222        arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000
223
224    return arr
225
226
227# ----------------------------------------------------------------------
228# Daylight Savings
229
230
231cdef object get_dst_info(tzinfo tz):
232    """
233    Returns
234    -------
235    ndarray[int64_t]
236        Nanosecond UTC times of DST transitions.
237    ndarray[int64_t]
238        Nanosecond UTC offsets corresponding to DST transitions.
239    str
240        Desscribing the type of tzinfo object.
241    """
242    cache_key = tz_cache_key(tz)
243    if cache_key is None:
244        # e.g. pytz.FixedOffset, matplotlib.dates._UTC,
245        # psycopg2.tz.FixedOffsetTimezone
246        num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
247        return (np.array([NPY_NAT + 1], dtype=np.int64),
248                np.array([num], dtype=np.int64),
249                "unknown")
250
251    if cache_key not in dst_cache:
252        if treat_tz_as_pytz(tz):
253            trans = np.array(tz._utc_transition_times, dtype='M8[ns]')
254            trans = trans.view('i8')
255            if tz._utc_transition_times[0].year == 1:
256                trans[0] = NPY_NAT + 1
257            deltas = unbox_utcoffsets(tz._transition_info)
258            typ = 'pytz'
259
260        elif treat_tz_as_dateutil(tz):
261            if len(tz._trans_list):
262                # get utc trans times
263                trans_list = _get_utc_trans_times_from_dateutil_tz(tz)
264                trans = np.hstack([
265                    np.array([0], dtype='M8[s]'),  # place holder for 1st item
266                    np.array(trans_list, dtype='M8[s]')]).astype(
267                    'M8[ns]')  # all trans listed
268                trans = trans.view('i8')
269                trans[0] = NPY_NAT + 1
270
271                # deltas
272                deltas = np.array([v.offset for v in (
273                    tz._ttinfo_before,) + tz._trans_idx], dtype='i8')
274                deltas *= 1000000000
275                typ = 'dateutil'
276
277            elif is_fixed_offset(tz):
278                trans = np.array([NPY_NAT + 1], dtype=np.int64)
279                deltas = np.array([tz._ttinfo_std.offset],
280                                  dtype='i8') * 1000000000
281                typ = 'fixed'
282            else:
283                # 2018-07-12 this is not reached in the tests, and this case
284                # is not handled in any of the functions that call
285                # get_dst_info.  If this case _were_ hit the calling
286                # functions would then hit an IndexError because they assume
287                # `deltas` is non-empty.
288                # (under the just-deleted code that returned empty arrays)
289                raise AssertionError("dateutil tzinfo is not a FixedOffset "
290                                     "and has an empty `_trans_list`.", tz)
291        else:
292            # static tzinfo, we can get here with pytz.StaticTZInfo
293            #  which are not caught by treat_tz_as_pytz
294            trans = np.array([NPY_NAT + 1], dtype=np.int64)
295            num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
296            deltas = np.array([num], dtype=np.int64)
297            typ = "static"
298
299        dst_cache[cache_key] = (trans, deltas, typ)
300
301    return dst_cache[cache_key]
302
303
304def infer_tzinfo(datetime start, datetime end):
305    if start is not None and end is not None:
306        tz = start.tzinfo
307        if not tz_compare(tz, end.tzinfo):
308            raise AssertionError(f'Inputs must both have the same timezone, '
309                                 f'{tz} != {end.tzinfo}')
310    elif start is not None:
311        tz = start.tzinfo
312    elif end is not None:
313        tz = end.tzinfo
314    else:
315        tz = None
316    return tz
317
318
319cpdef bint tz_compare(tzinfo start, tzinfo end):
320    """
321    Compare string representations of timezones
322
323    The same timezone can be represented as different instances of
324    timezones. For example
325    `<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>` and
326    `<DstTzInfo 'Europe/Paris' CET+1:00:00 STD>` are essentially same
327    timezones but aren't evaluated such, but the string representation
328    for both of these is `'Europe/Paris'`.
329
330    This exists only to add a notion of equality to pytz-style zones
331    that is compatible with the notion of equality expected of tzinfo
332    subclasses.
333
334    Parameters
335    ----------
336    start : tzinfo
337    end : tzinfo
338
339    Returns:
340    -------
341    bool
342    """
343    # GH 18523
344    return get_timezone(start) == get_timezone(end)
345
346
347def tz_standardize(tz: tzinfo):
348    """
349    If the passed tz is a pytz timezone object, "normalize" it to the a
350    consistent version
351
352    Parameters
353    ----------
354    tz : tz object
355
356    Returns:
357    -------
358    tz object
359
360    Examples:
361    --------
362    >>> tz
363    <DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>
364
365    >>> tz_standardize(tz)
366    <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
367
368    >>> tz
369    <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
370
371    >>> tz_standardize(tz)
372    <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
373
374    >>> tz
375    dateutil.tz.tz.tzutc
376
377    >>> tz_standardize(tz)
378    dateutil.tz.tz.tzutc
379    """
380    if treat_tz_as_pytz(tz):
381        return pytz.timezone(str(tz))
382    return tz
383