1import functools 2 3import numpy as np 4 5from . import utils 6 7# Use as a sentinel value to indicate a dtype appropriate NA value. 8NA = utils.ReprObject("<NA>") 9 10 11@functools.total_ordering 12class AlwaysGreaterThan: 13 def __gt__(self, other): 14 return True 15 16 def __eq__(self, other): 17 return isinstance(other, type(self)) 18 19 20@functools.total_ordering 21class AlwaysLessThan: 22 def __lt__(self, other): 23 return True 24 25 def __eq__(self, other): 26 return isinstance(other, type(self)) 27 28 29# Equivalence to np.inf (-np.inf) for object-type 30INF = AlwaysGreaterThan() 31NINF = AlwaysLessThan() 32 33 34# Pairs of types that, if both found, should be promoted to object dtype 35# instead of following NumPy's own type-promotion rules. These type promotion 36# rules match pandas instead. For reference, see the NumPy type hierarchy: 37# https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html 38PROMOTE_TO_OBJECT = [ 39 {np.number, np.character}, # numpy promotes to character 40 {np.bool_, np.character}, # numpy promotes to character 41 {np.bytes_, np.unicode_}, # numpy promotes to unicode 42] 43 44 45def maybe_promote(dtype): 46 """Simpler equivalent of pandas.core.common._maybe_promote 47 48 Parameters 49 ---------- 50 dtype : np.dtype 51 52 Returns 53 ------- 54 dtype : Promoted dtype that can hold missing values. 55 fill_value : Valid missing value for the promoted dtype. 56 """ 57 # N.B. these casting rules should match pandas 58 if np.issubdtype(dtype, np.floating): 59 fill_value = np.nan 60 elif np.issubdtype(dtype, np.timedelta64): 61 # See https://github.com/numpy/numpy/issues/10685 62 # np.timedelta64 is a subclass of np.integer 63 # Check np.timedelta64 before np.integer 64 fill_value = np.timedelta64("NaT") 65 elif np.issubdtype(dtype, np.integer): 66 dtype = np.float32 if dtype.itemsize <= 2 else np.float64 67 fill_value = np.nan 68 elif np.issubdtype(dtype, np.complexfloating): 69 fill_value = np.nan + np.nan * 1j 70 elif np.issubdtype(dtype, np.datetime64): 71 fill_value = np.datetime64("NaT") 72 else: 73 dtype = object 74 fill_value = np.nan 75 return np.dtype(dtype), fill_value 76 77 78NAT_TYPES = {np.datetime64("NaT").dtype, np.timedelta64("NaT").dtype} 79 80 81def get_fill_value(dtype): 82 """Return an appropriate fill value for this dtype. 83 84 Parameters 85 ---------- 86 dtype : np.dtype 87 88 Returns 89 ------- 90 fill_value : Missing value corresponding to this dtype. 91 """ 92 _, fill_value = maybe_promote(dtype) 93 return fill_value 94 95 96def get_pos_infinity(dtype, max_for_int=False): 97 """Return an appropriate positive infinity for this dtype. 98 99 Parameters 100 ---------- 101 dtype : np.dtype 102 max_for_int : bool 103 Return np.iinfo(dtype).max instead of np.inf 104 105 Returns 106 ------- 107 fill_value : positive infinity value corresponding to this dtype. 108 """ 109 if issubclass(dtype.type, np.floating): 110 return np.inf 111 112 if issubclass(dtype.type, np.integer): 113 if max_for_int: 114 return np.iinfo(dtype).max 115 else: 116 return np.inf 117 118 if issubclass(dtype.type, np.complexfloating): 119 return np.inf + 1j * np.inf 120 121 return INF 122 123 124def get_neg_infinity(dtype, min_for_int=False): 125 """Return an appropriate positive infinity for this dtype. 126 127 Parameters 128 ---------- 129 dtype : np.dtype 130 min_for_int : bool 131 Return np.iinfo(dtype).min instead of -np.inf 132 133 Returns 134 ------- 135 fill_value : positive infinity value corresponding to this dtype. 136 """ 137 if issubclass(dtype.type, np.floating): 138 return -np.inf 139 140 if issubclass(dtype.type, np.integer): 141 if min_for_int: 142 return np.iinfo(dtype).min 143 else: 144 return -np.inf 145 146 if issubclass(dtype.type, np.complexfloating): 147 return -np.inf - 1j * np.inf 148 149 return NINF 150 151 152def is_datetime_like(dtype): 153 """Check if a dtype is a subclass of the numpy datetime types""" 154 return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64) 155 156 157def result_type(*arrays_and_dtypes): 158 """Like np.result_type, but with type promotion rules matching pandas. 159 160 Examples of changed behavior: 161 number + string -> object (not string) 162 bytes + unicode -> object (not unicode) 163 164 Parameters 165 ---------- 166 *arrays_and_dtypes : list of arrays and dtypes 167 The dtype is extracted from both numpy and dask arrays. 168 169 Returns 170 ------- 171 numpy.dtype for the result. 172 """ 173 types = {np.result_type(t).type for t in arrays_and_dtypes} 174 175 for left, right in PROMOTE_TO_OBJECT: 176 if any(issubclass(t, left) for t in types) and any( 177 issubclass(t, right) for t in types 178 ): 179 return np.dtype(object) 180 181 return np.result_type(*arrays_and_dtypes) 182