1from typing import Any, Optional 2from collections.abc import Mapping 3 4import numpy as np 5import pandas as pd 6 7 8def _right_squeeze(arr, stop_dim=0): 9 """ 10 Remove trailing singleton dimensions 11 12 Parameters 13 ---------- 14 arr : ndarray 15 Input array 16 stop_dim : int 17 Dimension where checking should stop so that shape[i] is not checked 18 for i < stop_dim 19 20 Returns 21 ------- 22 squeezed : ndarray 23 Array with all trailing singleton dimensions (0 or 1) removed. 24 Singleton dimensions for dimension < stop_dim are retained. 25 """ 26 last = arr.ndim 27 for s in reversed(arr.shape): 28 if s > 1: 29 break 30 last -= 1 31 last = max(last, stop_dim) 32 33 return arr.reshape(arr.shape[:last]) 34 35 36def array_like( 37 obj, 38 name, 39 dtype=np.double, 40 ndim=1, 41 maxdim=None, 42 shape=None, 43 order=None, 44 contiguous=False, 45 optional=False, 46): 47 """ 48 Convert array-like to a ndarray and check conditions 49 50 Parameters 51 ---------- 52 obj : array_like 53 An array, any object exposing the array interface, an object whose 54 __array__ method returns an array, or any (nested) sequence. 55 name : str 56 Name of the variable to use in exceptions 57 dtype : {None, numpy.dtype, str} 58 Required dtype. Default is double. If None, does not change the dtype 59 of obj (if present) or uses NumPy to automatically detect the dtype 60 ndim : {int, None} 61 Required number of dimensions of obj. If None, no check is performed. 62 If the number of dimensions of obj is less than ndim, additional axes 63 are inserted on the right. See examples. 64 maxdim : {int, None} 65 Maximum allowed dimension. Use ``maxdim`` instead of ``ndim`` when 66 inputs are allowed to have ndim 1, 2, ..., or maxdim. 67 shape : {tuple[int], None} 68 Required shape obj. If None, no check is performed. Partially 69 restricted shapes can be checked using None. See examples. 70 order : {'C', 'F', None} 71 Order of the array 72 contiguous : bool 73 Ensure that the array's data is contiguous with order ``order`` 74 optional : bool 75 Flag indicating whether None is allowed 76 77 Returns 78 ------- 79 ndarray 80 The converted input. 81 82 Examples 83 -------- 84 Convert a list or pandas series to an array 85 >>> import pandas as pd 86 >>> x = [0, 1, 2, 3] 87 >>> a = array_like(x, 'x', ndim=1) 88 >>> a.shape 89 (4,) 90 91 >>> a = array_like(pd.Series(x), 'x', ndim=1) 92 >>> a.shape 93 (4,) 94 95 >>> type(a.orig) 96 pandas.core.series.Series 97 98 Squeezes singleton dimensions when required 99 >>> x = np.array(x).reshape((4, 1)) 100 >>> a = array_like(x, 'x', ndim=1) 101 >>> a.shape 102 (4,) 103 104 Right-appends when required size is larger than actual 105 >>> x = [0, 1, 2, 3] 106 >>> a = array_like(x, 'x', ndim=2) 107 >>> a.shape 108 (4, 1) 109 110 Check only the first and last dimension of the input 111 >>> x = np.arange(4*10*4).reshape((4, 10, 4)) 112 >>> y = array_like(x, 'x', ndim=3, shape=(4, None, 4)) 113 114 Check only the first two dimensions 115 >>> z = array_like(x, 'x', ndim=3, shape=(4, 10)) 116 117 Raises ValueError if constraints are not satisfied 118 >>> z = array_like(x, 'x', ndim=2) 119 Traceback (most recent call last): 120 ... 121 ValueError: x is required to have ndim 2 but has ndim 3 122 123 >>> z = array_like(x, 'x', shape=(10, 4, 4)) 124 Traceback (most recent call last): 125 ... 126 ValueError: x is required to have shape (10, 4, 4) but has shape (4, 10, 4) 127 128 >>> z = array_like(x, 'x', shape=(None, 4, 4)) 129 Traceback (most recent call last): 130 ... 131 ValueError: x is required to have shape (*, 4, 4) but has shape (4, 10, 4) 132 """ 133 if optional and obj is None: 134 return None 135 arr = np.asarray(obj, dtype=dtype, order=order) 136 if maxdim is not None: 137 if arr.ndim > maxdim: 138 msg = "{0} must have ndim <= {1}".format(name, maxdim) 139 raise ValueError(msg) 140 elif ndim is not None: 141 if arr.ndim > ndim: 142 arr = _right_squeeze(arr, stop_dim=ndim) 143 elif arr.ndim < ndim: 144 arr = np.reshape(arr, arr.shape + (1,) * (ndim - arr.ndim)) 145 if arr.ndim != ndim: 146 msg = "{0} is required to have ndim {1} but has ndim {2}" 147 raise ValueError(msg.format(name, ndim, arr.ndim)) 148 if shape is not None: 149 for actual, req in zip(arr.shape, shape): 150 if req is not None and actual != req: 151 req_shape = str(shape).replace("None, ", "*, ") 152 msg = "{0} is required to have shape {1} but has shape {2}" 153 raise ValueError(msg.format(name, req_shape, arr.shape)) 154 if contiguous: 155 arr = np.ascontiguousarray(arr, dtype=dtype) 156 return arr 157 158 159class PandasWrapper(object): 160 """ 161 Wrap array_like using the index from the original input, if pandas 162 163 Parameters 164 ---------- 165 pandas_obj : {Series, DataFrame} 166 Object to extract the index from for wrapping 167 168 Notes 169 ----- 170 Raises if ``orig`` is a pandas type but obj and and ``orig`` have 171 different numbers of elements in axis 0. Also raises if the ndim of obj 172 is larger than 2. 173 """ 174 175 def __init__(self, pandas_obj): 176 self._pandas_obj = pandas_obj 177 self._is_pandas = isinstance(pandas_obj, (pd.Series, pd.DataFrame)) 178 179 def wrap(self, obj, columns=None, append=None, trim_start=0, trim_end=0): 180 """ 181 Parameters 182 ---------- 183 obj : {array_like} 184 The value to wrap like to a pandas Series or DataFrame. 185 columns : {str, list[str]} 186 Column names or series name, if obj is 1d. 187 append : str 188 String to append to the columns to create a new column name. 189 trim_start : int 190 The number of observations to drop from the start of the index, so 191 that the index applied is index[trim_start:]. 192 trim_end : int 193 The number of observations to drop from the end of the index , so 194 that the index applied is index[:nobs - trim_end]. 195 196 Returns 197 ------- 198 array_like 199 A pandas Series or DataFrame, depending on the shape of obj. 200 """ 201 obj = np.asarray(obj) 202 if not self._is_pandas: 203 return obj 204 205 if obj.shape[0] + trim_start + trim_end != self._pandas_obj.shape[0]: 206 raise ValueError( 207 "obj must have the same number of elements in " 208 "axis 0 as orig" 209 ) 210 index = self._pandas_obj.index 211 index = index[trim_start: index.shape[0] - trim_end] 212 if obj.ndim == 1: 213 if columns is None: 214 name = getattr(self._pandas_obj, "name", None) 215 elif isinstance(columns, str): 216 name = columns 217 else: 218 name = columns[0] 219 if append is not None: 220 name = append if name is None else f"{name}_{append}" 221 222 return pd.Series(obj, name=name, index=index) 223 elif obj.ndim == 2: 224 if columns is None: 225 columns = getattr(self._pandas_obj, "columns", None) 226 if append is not None: 227 new = [] 228 for c in columns: 229 new.append(append if c is None else f"{c}_{append}") 230 columns = new 231 return pd.DataFrame(obj, columns=columns, index=index) 232 else: 233 raise ValueError("Can only wrap 1 or 2-d array_like") 234 235 236def bool_like(value, name, optional=False, strict=False): 237 """ 238 Convert to bool or raise if not bool_like 239 240 Parameters 241 ---------- 242 value : object 243 Value to verify 244 name : str 245 Variable name for exceptions 246 optional : bool 247 Flag indicating whether None is allowed 248 strict : bool 249 If True, then only allow bool. If False, allow types that support 250 casting to bool. 251 252 Returns 253 ------- 254 converted : bool 255 value converted to a bool 256 """ 257 if optional and value is None: 258 return value 259 extra_text = " or None" if optional else "" 260 if strict: 261 if isinstance(value, bool): 262 return value 263 else: 264 raise TypeError("{0} must be a bool{1}".format(name, extra_text)) 265 266 if hasattr(value, "squeeze") and callable(value.squeeze): 267 value = value.squeeze() 268 try: 269 return bool(value) 270 except Exception: 271 raise TypeError( 272 "{0} must be a bool (or bool-compatible)" 273 "{1}".format(name, extra_text) 274 ) 275 276 277def int_like( 278 value: Any, name: str, optional: bool = False, strict: bool = False 279) -> Optional[int]: 280 """ 281 Convert to int or raise if not int_like 282 283 Parameters 284 ---------- 285 value : object 286 Value to verify 287 name : str 288 Variable name for exceptions 289 optional : bool 290 Flag indicating whether None is allowed 291 strict : bool 292 If True, then only allow int or np.integer that are not bool. If False, 293 allow types that support integer division by 1 and conversion to int. 294 295 Returns 296 ------- 297 converted : int 298 value converted to a int 299 """ 300 if optional and value is None: 301 return None 302 is_bool_timedelta = isinstance(value, (bool, np.timedelta64)) 303 304 if hasattr(value, "squeeze") and callable(value.squeeze): 305 value = value.squeeze() 306 307 if isinstance(value, (int, np.integer)) and not is_bool_timedelta: 308 return int(value) 309 elif not strict and not is_bool_timedelta: 310 try: 311 if value == (value // 1): 312 return int(value) 313 except Exception: 314 pass 315 extra_text = " or None" if optional else "" 316 raise TypeError( 317 "{0} must be integer_like (int or np.integer, but not bool" 318 " or timedelta64){1}".format(name, extra_text) 319 ) 320 321 322def required_int_like(value: Any, name: str, strict: bool = False) -> int: 323 """ 324 Convert to int or raise if not int_like 325 326 Parameters 327 ---------- 328 value : object 329 Value to verify 330 name : str 331 Variable name for exceptions 332 optional : bool 333 Flag indicating whether None is allowed 334 strict : bool 335 If True, then only allow int or np.integer that are not bool. If False, 336 allow types that support integer division by 1 and conversion to int. 337 338 Returns 339 ------- 340 converted : int 341 value converted to a int 342 """ 343 _int = int_like(value, name, optional=False, strict=strict) 344 assert _int is not None 345 return _int 346 347 348def float_like(value, name, optional=False, strict=False): 349 """ 350 Convert to float or raise if not float_like 351 352 Parameters 353 ---------- 354 value : object 355 Value to verify 356 name : str 357 Variable name for exceptions 358 optional : bool 359 Flag indicating whether None is allowed 360 strict : bool 361 If True, then only allow int, np.integer, float or np.inexact that are 362 not bool or complex. If False, allow complex types with 0 imag part or 363 any other type that is float like in the sense that it support 364 multiplication by 1.0 and conversion to float. 365 366 Returns 367 ------- 368 converted : float 369 value converted to a float 370 """ 371 if optional and value is None: 372 return None 373 is_bool = isinstance(value, bool) 374 is_complex = isinstance(value, (complex, np.complexfloating)) 375 if hasattr(value, "squeeze") and callable(value.squeeze): 376 value = value.squeeze() 377 378 if isinstance(value, (int, np.integer, float, np.inexact)) and not ( 379 is_bool or is_complex 380 ): 381 return float(value) 382 elif not strict and is_complex: 383 imag = np.imag(value) 384 if imag == 0: 385 return float(np.real(value)) 386 elif not strict and not is_bool: 387 try: 388 return float(value / 1.0) 389 except Exception: 390 pass 391 extra_text = " or None" if optional else "" 392 raise TypeError( 393 "{0} must be float_like (float or np.inexact)" 394 "{1}".format(name, extra_text) 395 ) 396 397 398def string_like(value, name, optional=False, options=None, lower=True): 399 """ 400 Check if object is string-like and raise if not 401 402 Parameters 403 ---------- 404 value : object 405 Value to verify. 406 name : str 407 Variable name for exceptions. 408 optional : bool 409 Flag indicating whether None is allowed. 410 options : tuple[str] 411 Allowed values for input parameter `value`. 412 lower : bool 413 Convert all case-based characters in `value` into lowercase. 414 415 Returns 416 ------- 417 str 418 The validated input 419 420 Raises 421 ------ 422 TypeError 423 If the value is not a string or None when optional is True. 424 ValueError 425 If the input is not in ``options`` when ``options`` is set. 426 """ 427 if value is None: 428 return None 429 if not isinstance(value, str): 430 extra_text = " or None" if optional else "" 431 raise TypeError("{0} must be a string{1}".format(name, extra_text)) 432 if lower: 433 value = value.lower() 434 if options is not None and value not in options: 435 extra_text = "If not None, " if optional else "" 436 options_text = "'" + "', '".join(options) + "'" 437 msg = "{0}{1} must be one of: {2}".format( 438 extra_text, name, options_text 439 ) 440 raise ValueError(msg) 441 return value 442 443 444def dict_like(value, name, optional=False, strict=True): 445 """ 446 Check if dict_like (dict, Mapping) or raise if not 447 448 Parameters 449 ---------- 450 value : object 451 Value to verify 452 name : str 453 Variable name for exceptions 454 optional : bool 455 Flag indicating whether None is allowed 456 strict : bool 457 If True, then only allow dict. If False, allow any Mapping-like object. 458 459 Returns 460 ------- 461 converted : dict_like 462 value 463 """ 464 if optional and value is None: 465 return None 466 if not isinstance(value, Mapping) or ( 467 strict and not (isinstance(value, dict)) 468 ): 469 extra_text = "If not None, " if optional else "" 470 strict_text = " or dict_like (i.e., a Mapping)" if strict else "" 471 msg = "{0}{1} must be a dict{2}".format(extra_text, name, strict_text) 472 raise TypeError(msg) 473 return value 474