1from datetime import timedelta, timezone 2 3from cpython.datetime cimport datetime, timedelta, tzinfo 4 5# dateutil compat 6 7from dateutil.tz import ( 8 gettz as dateutil_gettz, 9 tzfile as _dateutil_tzfile, 10 tzlocal as _dateutil_tzlocal, 11 tzutc as _dateutil_tzutc, 12) 13import pytz 14from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo 15 16UTC = pytz.utc 17 18 19import numpy as np 20 21cimport numpy as cnp 22from numpy cimport int64_t 23 24cnp.import_array() 25 26# ---------------------------------------------------------------------- 27from pandas._libs.tslibs.util cimport get_nat, is_integer_object 28 29 30cdef int64_t NPY_NAT = get_nat() 31cdef tzinfo utc_stdlib = timezone.utc 32cdef tzinfo utc_pytz = UTC 33 34# ---------------------------------------------------------------------- 35 36cpdef inline bint is_utc(tzinfo tz): 37 return tz is utc_pytz or tz is utc_stdlib or isinstance(tz, _dateutil_tzutc) 38 39 40cdef inline bint is_tzlocal(tzinfo tz): 41 return isinstance(tz, _dateutil_tzlocal) 42 43 44cdef inline bint treat_tz_as_pytz(tzinfo tz): 45 return (hasattr(tz, '_utc_transition_times') and 46 hasattr(tz, '_transition_info')) 47 48 49cdef inline bint treat_tz_as_dateutil(tzinfo tz): 50 return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx') 51 52 53cpdef inline object get_timezone(tzinfo tz): 54 """ 55 We need to do several things here: 56 1) Distinguish between pytz and dateutil timezones 57 2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone* 58 but a different tz object) 59 3) Provide something to serialize when we're storing a datetime object 60 in pytables. 61 62 We return a string prefaced with dateutil if it's a dateutil tz, else just 63 the tz name. It needs to be a string so that we can serialize it with 64 UJSON/pytables. maybe_get_tz (below) is the inverse of this process. 65 """ 66 if is_utc(tz): 67 return tz 68 else: 69 if treat_tz_as_dateutil(tz): 70 if '.tar.gz' in tz._filename: 71 raise ValueError( 72 'Bad tz filename. Dateutil on python 3 on windows has a ' 73 'bug which causes tzfile._filename to be the same for all ' 74 'timezone files. Please construct dateutil timezones ' 75 'implicitly by passing a string like "dateutil/Europe' 76 '/London" when you construct your pandas objects instead ' 77 'of passing a timezone object. See ' 78 'https://github.com/pandas-dev/pandas/pull/7362') 79 return 'dateutil/' + tz._filename 80 else: 81 # tz is a pytz timezone or unknown. 82 try: 83 zone = tz.zone 84 if zone is None: 85 return tz 86 return zone 87 except AttributeError: 88 return tz 89 90 91cpdef inline tzinfo maybe_get_tz(object tz): 92 """ 93 (Maybe) Construct a timezone object from a string. If tz is a string, use 94 it to construct a timezone object. Otherwise, just return tz. 95 """ 96 if isinstance(tz, str): 97 if tz == 'tzlocal()': 98 tz = _dateutil_tzlocal() 99 elif tz.startswith('dateutil/'): 100 zone = tz[9:] 101 tz = dateutil_gettz(zone) 102 # On Python 3 on Windows, the filename is not always set correctly. 103 if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: 104 tz._filename = zone 105 elif tz[0] in {'-', '+'}: 106 hours = int(tz[0:3]) 107 minutes = int(tz[0] + tz[4:6]) 108 tz = timezone(timedelta(hours=hours, minutes=minutes)) 109 elif tz[0:4] in {'UTC-', 'UTC+'}: 110 hours = int(tz[3:6]) 111 minutes = int(tz[3] + tz[7:9]) 112 tz = timezone(timedelta(hours=hours, minutes=minutes)) 113 else: 114 tz = pytz.timezone(tz) 115 elif is_integer_object(tz): 116 tz = pytz.FixedOffset(tz / 60) 117 elif isinstance(tz, tzinfo): 118 pass 119 elif tz is None: 120 pass 121 else: 122 raise TypeError(type(tz)) 123 return tz 124 125 126def _p_tz_cache_key(tz): 127 """ 128 Python interface for cache function to facilitate testing. 129 """ 130 return tz_cache_key(tz) 131 132 133# Timezone data caches, key is the pytz string or dateutil file name. 134dst_cache = {} 135 136 137cdef inline object tz_cache_key(tzinfo tz): 138 """ 139 Return the key in the cache for the timezone info object or None 140 if unknown. 141 142 The key is currently the tz string for pytz timezones, the filename for 143 dateutil timezones. 144 145 Notes 146 ----- 147 This cannot just be the hash of a timezone object. Unfortunately, the 148 hashes of two dateutil tz objects which represent the same timezone are 149 not equal (even though the tz objects will compare equal and represent 150 the same tz file). Also, pytz objects are not always hashable so we use 151 str(tz) instead. 152 """ 153 if isinstance(tz, _pytz_BaseTzInfo): 154 return tz.zone 155 elif isinstance(tz, _dateutil_tzfile): 156 if '.tar.gz' in tz._filename: 157 raise ValueError('Bad tz filename. Dateutil on python 3 on ' 158 'windows has a bug which causes tzfile._filename ' 159 'to be the same for all timezone files. Please ' 160 'construct dateutil timezones implicitly by ' 161 'passing a string like "dateutil/Europe/London" ' 162 'when you construct your pandas objects instead ' 163 'of passing a timezone object. See ' 164 'https://github.com/pandas-dev/pandas/pull/7362') 165 return 'dateutil' + tz._filename 166 else: 167 return None 168 169 170# ---------------------------------------------------------------------- 171# UTC Offsets 172 173 174cdef timedelta get_utcoffset(tzinfo tz, datetime obj): 175 try: 176 return tz._utcoffset 177 except AttributeError: 178 return tz.utcoffset(obj) 179 180 181cdef inline bint is_fixed_offset(tzinfo tz): 182 if treat_tz_as_dateutil(tz): 183 if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: 184 return 1 185 else: 186 return 0 187 elif treat_tz_as_pytz(tz): 188 if (len(tz._transition_info) == 0 189 and len(tz._utc_transition_times) == 0): 190 return 1 191 else: 192 return 0 193 # This also implicitly accepts datetime.timezone objects which are 194 # considered fixed 195 return 1 196 197 198cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz): 199 """ 200 Transition times in dateutil timezones are stored in local non-dst 201 time. This code converts them to UTC. It's the reverse of the code 202 in dateutil.tz.tzfile.__init__. 203 """ 204 new_trans = list(tz._trans_list) 205 last_std_offset = 0 206 for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)): 207 if not tti.isdst: 208 last_std_offset = tti.offset 209 new_trans[i] = trans - last_std_offset 210 return new_trans 211 212 213cdef int64_t[:] unbox_utcoffsets(object transinfo): 214 cdef: 215 Py_ssize_t i, sz 216 int64_t[:] arr 217 218 sz = len(transinfo) 219 arr = np.empty(sz, dtype='i8') 220 221 for i in range(sz): 222 arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000 223 224 return arr 225 226 227# ---------------------------------------------------------------------- 228# Daylight Savings 229 230 231cdef object get_dst_info(tzinfo tz): 232 """ 233 Returns 234 ------- 235 ndarray[int64_t] 236 Nanosecond UTC times of DST transitions. 237 ndarray[int64_t] 238 Nanosecond UTC offsets corresponding to DST transitions. 239 str 240 Desscribing the type of tzinfo object. 241 """ 242 cache_key = tz_cache_key(tz) 243 if cache_key is None: 244 # e.g. pytz.FixedOffset, matplotlib.dates._UTC, 245 # psycopg2.tz.FixedOffsetTimezone 246 num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000 247 return (np.array([NPY_NAT + 1], dtype=np.int64), 248 np.array([num], dtype=np.int64), 249 "unknown") 250 251 if cache_key not in dst_cache: 252 if treat_tz_as_pytz(tz): 253 trans = np.array(tz._utc_transition_times, dtype='M8[ns]') 254 trans = trans.view('i8') 255 if tz._utc_transition_times[0].year == 1: 256 trans[0] = NPY_NAT + 1 257 deltas = unbox_utcoffsets(tz._transition_info) 258 typ = 'pytz' 259 260 elif treat_tz_as_dateutil(tz): 261 if len(tz._trans_list): 262 # get utc trans times 263 trans_list = _get_utc_trans_times_from_dateutil_tz(tz) 264 trans = np.hstack([ 265 np.array([0], dtype='M8[s]'), # place holder for 1st item 266 np.array(trans_list, dtype='M8[s]')]).astype( 267 'M8[ns]') # all trans listed 268 trans = trans.view('i8') 269 trans[0] = NPY_NAT + 1 270 271 # deltas 272 deltas = np.array([v.offset for v in ( 273 tz._ttinfo_before,) + tz._trans_idx], dtype='i8') 274 deltas *= 1000000000 275 typ = 'dateutil' 276 277 elif is_fixed_offset(tz): 278 trans = np.array([NPY_NAT + 1], dtype=np.int64) 279 deltas = np.array([tz._ttinfo_std.offset], 280 dtype='i8') * 1000000000 281 typ = 'fixed' 282 else: 283 # 2018-07-12 this is not reached in the tests, and this case 284 # is not handled in any of the functions that call 285 # get_dst_info. If this case _were_ hit the calling 286 # functions would then hit an IndexError because they assume 287 # `deltas` is non-empty. 288 # (under the just-deleted code that returned empty arrays) 289 raise AssertionError("dateutil tzinfo is not a FixedOffset " 290 "and has an empty `_trans_list`.", tz) 291 else: 292 # static tzinfo, we can get here with pytz.StaticTZInfo 293 # which are not caught by treat_tz_as_pytz 294 trans = np.array([NPY_NAT + 1], dtype=np.int64) 295 num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000 296 deltas = np.array([num], dtype=np.int64) 297 typ = "static" 298 299 dst_cache[cache_key] = (trans, deltas, typ) 300 301 return dst_cache[cache_key] 302 303 304def infer_tzinfo(datetime start, datetime end): 305 if start is not None and end is not None: 306 tz = start.tzinfo 307 if not tz_compare(tz, end.tzinfo): 308 raise AssertionError(f'Inputs must both have the same timezone, ' 309 f'{tz} != {end.tzinfo}') 310 elif start is not None: 311 tz = start.tzinfo 312 elif end is not None: 313 tz = end.tzinfo 314 else: 315 tz = None 316 return tz 317 318 319cpdef bint tz_compare(tzinfo start, tzinfo end): 320 """ 321 Compare string representations of timezones 322 323 The same timezone can be represented as different instances of 324 timezones. For example 325 `<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>` and 326 `<DstTzInfo 'Europe/Paris' CET+1:00:00 STD>` are essentially same 327 timezones but aren't evaluated such, but the string representation 328 for both of these is `'Europe/Paris'`. 329 330 This exists only to add a notion of equality to pytz-style zones 331 that is compatible with the notion of equality expected of tzinfo 332 subclasses. 333 334 Parameters 335 ---------- 336 start : tzinfo 337 end : tzinfo 338 339 Returns: 340 ------- 341 bool 342 """ 343 # GH 18523 344 return get_timezone(start) == get_timezone(end) 345 346 347def tz_standardize(tz: tzinfo): 348 """ 349 If the passed tz is a pytz timezone object, "normalize" it to the a 350 consistent version 351 352 Parameters 353 ---------- 354 tz : tz object 355 356 Returns: 357 ------- 358 tz object 359 360 Examples: 361 -------- 362 >>> tz 363 <DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD> 364 365 >>> tz_standardize(tz) 366 <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD> 367 368 >>> tz 369 <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD> 370 371 >>> tz_standardize(tz) 372 <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD> 373 374 >>> tz 375 dateutil.tz.tz.tzutc 376 377 >>> tz_standardize(tz) 378 dateutil.tz.tz.tzutc 379 """ 380 if treat_tz_as_pytz(tz): 381 return pytz.timezone(str(tz)) 382 return tz 383