1""" 2Functions that ignore NaN. 3 4Functions 5--------- 6 7- `nanmin` -- minimum non-NaN value 8- `nanmax` -- maximum non-NaN value 9- `nanargmin` -- index of minimum non-NaN value 10- `nanargmax` -- index of maximum non-NaN value 11- `nansum` -- sum of non-NaN values 12- `nanprod` -- product of non-NaN values 13- `nancumsum` -- cumulative sum of non-NaN values 14- `nancumprod` -- cumulative product of non-NaN values 15- `nanmean` -- mean of non-NaN values 16- `nanvar` -- variance of non-NaN values 17- `nanstd` -- standard deviation of non-NaN values 18- `nanmedian` -- median of non-NaN values 19- `nanquantile` -- qth quantile of non-NaN values 20- `nanpercentile` -- qth percentile of non-NaN values 21 22""" 23import functools 24import warnings 25import numpy as np 26from numpy.lib import function_base 27from numpy.core import overrides 28 29 30array_function_dispatch = functools.partial( 31 overrides.array_function_dispatch, module='numpy') 32 33 34__all__ = [ 35 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', 36 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod', 37 'nancumsum', 'nancumprod', 'nanquantile' 38 ] 39 40 41def _nan_mask(a, out=None): 42 """ 43 Parameters 44 ---------- 45 a : array-like 46 Input array with at least 1 dimension. 47 out : ndarray, optional 48 Alternate output array in which to place the result. The default 49 is ``None``; if provided, it must have the same shape as the 50 expected output and will prevent the allocation of a new array. 51 52 Returns 53 ------- 54 y : bool ndarray or True 55 A bool array where ``np.nan`` positions are marked with ``False`` 56 and other positions are marked with ``True``. If the type of ``a`` 57 is such that it can't possibly contain ``np.nan``, returns ``True``. 58 """ 59 # we assume that a is an array for this private function 60 61 if a.dtype.kind not in 'fc': 62 return True 63 64 y = np.isnan(a, out=out) 65 y = np.invert(y, out=y) 66 return y 67 68def _replace_nan(a, val): 69 """ 70 If `a` is of inexact type, make a copy of `a`, replace NaNs with 71 the `val` value, and return the copy together with a boolean mask 72 marking the locations where NaNs were present. If `a` is not of 73 inexact type, do nothing and return `a` together with a mask of None. 74 75 Note that scalars will end up as array scalars, which is important 76 for using the result as the value of the out argument in some 77 operations. 78 79 Parameters 80 ---------- 81 a : array-like 82 Input array. 83 val : float 84 NaN values are set to val before doing the operation. 85 86 Returns 87 ------- 88 y : ndarray 89 If `a` is of inexact type, return a copy of `a` with the NaNs 90 replaced by the fill value, otherwise return `a`. 91 mask: {bool, None} 92 If `a` is of inexact type, return a boolean mask marking locations of 93 NaNs, otherwise return None. 94 95 """ 96 a = np.asanyarray(a) 97 98 if a.dtype == np.object_: 99 # object arrays do not support `isnan` (gh-9009), so make a guess 100 mask = np.not_equal(a, a, dtype=bool) 101 elif issubclass(a.dtype.type, np.inexact): 102 mask = np.isnan(a) 103 else: 104 mask = None 105 106 if mask is not None: 107 a = np.array(a, subok=True, copy=True) 108 np.copyto(a, val, where=mask) 109 110 return a, mask 111 112 113def _copyto(a, val, mask): 114 """ 115 Replace values in `a` with NaN where `mask` is True. This differs from 116 copyto in that it will deal with the case where `a` is a numpy scalar. 117 118 Parameters 119 ---------- 120 a : ndarray or numpy scalar 121 Array or numpy scalar some of whose values are to be replaced 122 by val. 123 val : numpy scalar 124 Value used a replacement. 125 mask : ndarray, scalar 126 Boolean array. Where True the corresponding element of `a` is 127 replaced by `val`. Broadcasts. 128 129 Returns 130 ------- 131 res : ndarray, scalar 132 Array with elements replaced or scalar `val`. 133 134 """ 135 if isinstance(a, np.ndarray): 136 np.copyto(a, val, where=mask, casting='unsafe') 137 else: 138 a = a.dtype.type(val) 139 return a 140 141 142def _remove_nan_1d(arr1d, overwrite_input=False): 143 """ 144 Equivalent to arr1d[~arr1d.isnan()], but in a different order 145 146 Presumably faster as it incurs fewer copies 147 148 Parameters 149 ---------- 150 arr1d : ndarray 151 Array to remove nans from 152 overwrite_input : bool 153 True if `arr1d` can be modified in place 154 155 Returns 156 ------- 157 res : ndarray 158 Array with nan elements removed 159 overwrite_input : bool 160 True if `res` can be modified in place, given the constraint on the 161 input 162 """ 163 164 c = np.isnan(arr1d) 165 s = np.nonzero(c)[0] 166 if s.size == arr1d.size: 167 warnings.warn("All-NaN slice encountered", RuntimeWarning, 168 stacklevel=5) 169 return arr1d[:0], True 170 elif s.size == 0: 171 return arr1d, overwrite_input 172 else: 173 if not overwrite_input: 174 arr1d = arr1d.copy() 175 # select non-nans at end of array 176 enonan = arr1d[-s.size:][~c[-s.size:]] 177 # fill nans in beginning of array with non-nans of end 178 arr1d[s[:enonan.size]] = enonan 179 180 return arr1d[:-s.size], True 181 182 183def _divide_by_count(a, b, out=None): 184 """ 185 Compute a/b ignoring invalid results. If `a` is an array the division 186 is done in place. If `a` is a scalar, then its type is preserved in the 187 output. If out is None, then then a is used instead so that the 188 division is in place. Note that this is only called with `a` an inexact 189 type. 190 191 Parameters 192 ---------- 193 a : {ndarray, numpy scalar} 194 Numerator. Expected to be of inexact type but not checked. 195 b : {ndarray, numpy scalar} 196 Denominator. 197 out : ndarray, optional 198 Alternate output array in which to place the result. The default 199 is ``None``; if provided, it must have the same shape as the 200 expected output, but the type will be cast if necessary. 201 202 Returns 203 ------- 204 ret : {ndarray, numpy scalar} 205 The return value is a/b. If `a` was an ndarray the division is done 206 in place. If `a` is a numpy scalar, the division preserves its type. 207 208 """ 209 with np.errstate(invalid='ignore', divide='ignore'): 210 if isinstance(a, np.ndarray): 211 if out is None: 212 return np.divide(a, b, out=a, casting='unsafe') 213 else: 214 return np.divide(a, b, out=out, casting='unsafe') 215 else: 216 if out is None: 217 return a.dtype.type(a / b) 218 else: 219 # This is questionable, but currently a numpy scalar can 220 # be output to a zero dimensional array. 221 return np.divide(a, b, out=out, casting='unsafe') 222 223 224def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None): 225 return (a, out) 226 227 228@array_function_dispatch(_nanmin_dispatcher) 229def nanmin(a, axis=None, out=None, keepdims=np._NoValue): 230 """ 231 Return minimum of an array or minimum along an axis, ignoring any NaNs. 232 When all-NaN slices are encountered a ``RuntimeWarning`` is raised and 233 Nan is returned for that slice. 234 235 Parameters 236 ---------- 237 a : array_like 238 Array containing numbers whose minimum is desired. If `a` is not an 239 array, a conversion is attempted. 240 axis : {int, tuple of int, None}, optional 241 Axis or axes along which the minimum is computed. The default is to compute 242 the minimum of the flattened array. 243 out : ndarray, optional 244 Alternate output array in which to place the result. The default 245 is ``None``; if provided, it must have the same shape as the 246 expected output, but the type will be cast if necessary. See 247 :ref:`ufuncs-output-type` for more details. 248 249 .. versionadded:: 1.8.0 250 keepdims : bool, optional 251 If this is set to True, the axes which are reduced are left 252 in the result as dimensions with size one. With this option, 253 the result will broadcast correctly against the original `a`. 254 255 If the value is anything but the default, then 256 `keepdims` will be passed through to the `min` method 257 of sub-classes of `ndarray`. If the sub-classes methods 258 does not implement `keepdims` any exceptions will be raised. 259 260 .. versionadded:: 1.8.0 261 262 Returns 263 ------- 264 nanmin : ndarray 265 An array with the same shape as `a`, with the specified axis 266 removed. If `a` is a 0-d array, or if axis is None, an ndarray 267 scalar is returned. The same dtype as `a` is returned. 268 269 See Also 270 -------- 271 nanmax : 272 The maximum value of an array along a given axis, ignoring any NaNs. 273 amin : 274 The minimum value of an array along a given axis, propagating any NaNs. 275 fmin : 276 Element-wise minimum of two arrays, ignoring any NaNs. 277 minimum : 278 Element-wise minimum of two arrays, propagating any NaNs. 279 isnan : 280 Shows which elements are Not a Number (NaN). 281 isfinite: 282 Shows which elements are neither NaN nor infinity. 283 284 amax, fmax, maximum 285 286 Notes 287 ----- 288 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic 289 (IEEE 754). This means that Not a Number is not equivalent to infinity. 290 Positive infinity is treated as a very large number and negative 291 infinity is treated as a very small (i.e. negative) number. 292 293 If the input has a integer type the function is equivalent to np.min. 294 295 Examples 296 -------- 297 >>> a = np.array([[1, 2], [3, np.nan]]) 298 >>> np.nanmin(a) 299 1.0 300 >>> np.nanmin(a, axis=0) 301 array([1., 2.]) 302 >>> np.nanmin(a, axis=1) 303 array([1., 3.]) 304 305 When positive infinity and negative infinity are present: 306 307 >>> np.nanmin([1, 2, np.nan, np.inf]) 308 1.0 309 >>> np.nanmin([1, 2, np.nan, np.NINF]) 310 -inf 311 312 """ 313 kwargs = {} 314 if keepdims is not np._NoValue: 315 kwargs['keepdims'] = keepdims 316 if type(a) is np.ndarray and a.dtype != np.object_: 317 # Fast, but not safe for subclasses of ndarray, or object arrays, 318 # which do not implement isnan (gh-9009), or fmin correctly (gh-8975) 319 res = np.fmin.reduce(a, axis=axis, out=out, **kwargs) 320 if np.isnan(res).any(): 321 warnings.warn("All-NaN slice encountered", RuntimeWarning, 322 stacklevel=3) 323 else: 324 # Slow, but safe for subclasses of ndarray 325 a, mask = _replace_nan(a, +np.inf) 326 res = np.amin(a, axis=axis, out=out, **kwargs) 327 if mask is None: 328 return res 329 330 # Check for all-NaN axis 331 mask = np.all(mask, axis=axis, **kwargs) 332 if np.any(mask): 333 res = _copyto(res, np.nan, mask) 334 warnings.warn("All-NaN axis encountered", RuntimeWarning, 335 stacklevel=3) 336 return res 337 338 339def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None): 340 return (a, out) 341 342 343@array_function_dispatch(_nanmax_dispatcher) 344def nanmax(a, axis=None, out=None, keepdims=np._NoValue): 345 """ 346 Return the maximum of an array or maximum along an axis, ignoring any 347 NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is 348 raised and NaN is returned for that slice. 349 350 Parameters 351 ---------- 352 a : array_like 353 Array containing numbers whose maximum is desired. If `a` is not an 354 array, a conversion is attempted. 355 axis : {int, tuple of int, None}, optional 356 Axis or axes along which the maximum is computed. The default is to compute 357 the maximum of the flattened array. 358 out : ndarray, optional 359 Alternate output array in which to place the result. The default 360 is ``None``; if provided, it must have the same shape as the 361 expected output, but the type will be cast if necessary. See 362 :ref:`ufuncs-output-type` for more details. 363 364 .. versionadded:: 1.8.0 365 keepdims : bool, optional 366 If this is set to True, the axes which are reduced are left 367 in the result as dimensions with size one. With this option, 368 the result will broadcast correctly against the original `a`. 369 370 If the value is anything but the default, then 371 `keepdims` will be passed through to the `max` method 372 of sub-classes of `ndarray`. If the sub-classes methods 373 does not implement `keepdims` any exceptions will be raised. 374 375 .. versionadded:: 1.8.0 376 377 Returns 378 ------- 379 nanmax : ndarray 380 An array with the same shape as `a`, with the specified axis removed. 381 If `a` is a 0-d array, or if axis is None, an ndarray scalar is 382 returned. The same dtype as `a` is returned. 383 384 See Also 385 -------- 386 nanmin : 387 The minimum value of an array along a given axis, ignoring any NaNs. 388 amax : 389 The maximum value of an array along a given axis, propagating any NaNs. 390 fmax : 391 Element-wise maximum of two arrays, ignoring any NaNs. 392 maximum : 393 Element-wise maximum of two arrays, propagating any NaNs. 394 isnan : 395 Shows which elements are Not a Number (NaN). 396 isfinite: 397 Shows which elements are neither NaN nor infinity. 398 399 amin, fmin, minimum 400 401 Notes 402 ----- 403 NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic 404 (IEEE 754). This means that Not a Number is not equivalent to infinity. 405 Positive infinity is treated as a very large number and negative 406 infinity is treated as a very small (i.e. negative) number. 407 408 If the input has a integer type the function is equivalent to np.max. 409 410 Examples 411 -------- 412 >>> a = np.array([[1, 2], [3, np.nan]]) 413 >>> np.nanmax(a) 414 3.0 415 >>> np.nanmax(a, axis=0) 416 array([3., 2.]) 417 >>> np.nanmax(a, axis=1) 418 array([2., 3.]) 419 420 When positive infinity and negative infinity are present: 421 422 >>> np.nanmax([1, 2, np.nan, np.NINF]) 423 2.0 424 >>> np.nanmax([1, 2, np.nan, np.inf]) 425 inf 426 427 """ 428 kwargs = {} 429 if keepdims is not np._NoValue: 430 kwargs['keepdims'] = keepdims 431 if type(a) is np.ndarray and a.dtype != np.object_: 432 # Fast, but not safe for subclasses of ndarray, or object arrays, 433 # which do not implement isnan (gh-9009), or fmax correctly (gh-8975) 434 res = np.fmax.reduce(a, axis=axis, out=out, **kwargs) 435 if np.isnan(res).any(): 436 warnings.warn("All-NaN slice encountered", RuntimeWarning, 437 stacklevel=3) 438 else: 439 # Slow, but safe for subclasses of ndarray 440 a, mask = _replace_nan(a, -np.inf) 441 res = np.amax(a, axis=axis, out=out, **kwargs) 442 if mask is None: 443 return res 444 445 # Check for all-NaN axis 446 mask = np.all(mask, axis=axis, **kwargs) 447 if np.any(mask): 448 res = _copyto(res, np.nan, mask) 449 warnings.warn("All-NaN axis encountered", RuntimeWarning, 450 stacklevel=3) 451 return res 452 453 454def _nanargmin_dispatcher(a, axis=None): 455 return (a,) 456 457 458@array_function_dispatch(_nanargmin_dispatcher) 459def nanargmin(a, axis=None): 460 """ 461 Return the indices of the minimum values in the specified axis ignoring 462 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results 463 cannot be trusted if a slice contains only NaNs and Infs. 464 465 Parameters 466 ---------- 467 a : array_like 468 Input data. 469 axis : int, optional 470 Axis along which to operate. By default flattened input is used. 471 472 Returns 473 ------- 474 index_array : ndarray 475 An array of indices or a single index value. 476 477 See Also 478 -------- 479 argmin, nanargmax 480 481 Examples 482 -------- 483 >>> a = np.array([[np.nan, 4], [2, 3]]) 484 >>> np.argmin(a) 485 0 486 >>> np.nanargmin(a) 487 2 488 >>> np.nanargmin(a, axis=0) 489 array([1, 1]) 490 >>> np.nanargmin(a, axis=1) 491 array([1, 0]) 492 493 """ 494 a, mask = _replace_nan(a, np.inf) 495 res = np.argmin(a, axis=axis) 496 if mask is not None: 497 mask = np.all(mask, axis=axis) 498 if np.any(mask): 499 raise ValueError("All-NaN slice encountered") 500 return res 501 502 503def _nanargmax_dispatcher(a, axis=None): 504 return (a,) 505 506 507@array_function_dispatch(_nanargmax_dispatcher) 508def nanargmax(a, axis=None): 509 """ 510 Return the indices of the maximum values in the specified axis ignoring 511 NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the 512 results cannot be trusted if a slice contains only NaNs and -Infs. 513 514 515 Parameters 516 ---------- 517 a : array_like 518 Input data. 519 axis : int, optional 520 Axis along which to operate. By default flattened input is used. 521 522 Returns 523 ------- 524 index_array : ndarray 525 An array of indices or a single index value. 526 527 See Also 528 -------- 529 argmax, nanargmin 530 531 Examples 532 -------- 533 >>> a = np.array([[np.nan, 4], [2, 3]]) 534 >>> np.argmax(a) 535 0 536 >>> np.nanargmax(a) 537 1 538 >>> np.nanargmax(a, axis=0) 539 array([1, 0]) 540 >>> np.nanargmax(a, axis=1) 541 array([1, 1]) 542 543 """ 544 a, mask = _replace_nan(a, -np.inf) 545 res = np.argmax(a, axis=axis) 546 if mask is not None: 547 mask = np.all(mask, axis=axis) 548 if np.any(mask): 549 raise ValueError("All-NaN slice encountered") 550 return res 551 552 553def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None): 554 return (a, out) 555 556 557@array_function_dispatch(_nansum_dispatcher) 558def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): 559 """ 560 Return the sum of array elements over a given axis treating Not a 561 Numbers (NaNs) as zero. 562 563 In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or 564 empty. In later versions zero is returned. 565 566 Parameters 567 ---------- 568 a : array_like 569 Array containing numbers whose sum is desired. If `a` is not an 570 array, a conversion is attempted. 571 axis : {int, tuple of int, None}, optional 572 Axis or axes along which the sum is computed. The default is to compute the 573 sum of the flattened array. 574 dtype : data-type, optional 575 The type of the returned array and of the accumulator in which the 576 elements are summed. By default, the dtype of `a` is used. An 577 exception is when `a` has an integer type with less precision than 578 the platform (u)intp. In that case, the default will be either 579 (u)int32 or (u)int64 depending on whether the platform is 32 or 64 580 bits. For inexact inputs, dtype must be inexact. 581 582 .. versionadded:: 1.8.0 583 out : ndarray, optional 584 Alternate output array in which to place the result. The default 585 is ``None``. If provided, it must have the same shape as the 586 expected output, but the type will be cast if necessary. See 587 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer 588 can yield unexpected results. 589 590 .. versionadded:: 1.8.0 591 keepdims : bool, optional 592 If this is set to True, the axes which are reduced are left 593 in the result as dimensions with size one. With this option, 594 the result will broadcast correctly against the original `a`. 595 596 597 If the value is anything but the default, then 598 `keepdims` will be passed through to the `mean` or `sum` methods 599 of sub-classes of `ndarray`. If the sub-classes methods 600 does not implement `keepdims` any exceptions will be raised. 601 602 .. versionadded:: 1.8.0 603 604 Returns 605 ------- 606 nansum : ndarray. 607 A new array holding the result is returned unless `out` is 608 specified, in which it is returned. The result has the same 609 size as `a`, and the same shape as `a` if `axis` is not None 610 or `a` is a 1-d array. 611 612 See Also 613 -------- 614 numpy.sum : Sum across array propagating NaNs. 615 isnan : Show which elements are NaN. 616 isfinite: Show which elements are not NaN or +/-inf. 617 618 Notes 619 ----- 620 If both positive and negative infinity are present, the sum will be Not 621 A Number (NaN). 622 623 Examples 624 -------- 625 >>> np.nansum(1) 626 1 627 >>> np.nansum([1]) 628 1 629 >>> np.nansum([1, np.nan]) 630 1.0 631 >>> a = np.array([[1, 1], [1, np.nan]]) 632 >>> np.nansum(a) 633 3.0 634 >>> np.nansum(a, axis=0) 635 array([2., 1.]) 636 >>> np.nansum([1, np.nan, np.inf]) 637 inf 638 >>> np.nansum([1, np.nan, np.NINF]) 639 -inf 640 >>> from numpy.testing import suppress_warnings 641 >>> with suppress_warnings() as sup: 642 ... sup.filter(RuntimeWarning) 643 ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present 644 nan 645 646 """ 647 a, mask = _replace_nan(a, 0) 648 return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) 649 650 651def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None): 652 return (a, out) 653 654 655@array_function_dispatch(_nanprod_dispatcher) 656def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): 657 """ 658 Return the product of array elements over a given axis treating Not a 659 Numbers (NaNs) as ones. 660 661 One is returned for slices that are all-NaN or empty. 662 663 .. versionadded:: 1.10.0 664 665 Parameters 666 ---------- 667 a : array_like 668 Array containing numbers whose product is desired. If `a` is not an 669 array, a conversion is attempted. 670 axis : {int, tuple of int, None}, optional 671 Axis or axes along which the product is computed. The default is to compute 672 the product of the flattened array. 673 dtype : data-type, optional 674 The type of the returned array and of the accumulator in which the 675 elements are summed. By default, the dtype of `a` is used. An 676 exception is when `a` has an integer type with less precision than 677 the platform (u)intp. In that case, the default will be either 678 (u)int32 or (u)int64 depending on whether the platform is 32 or 64 679 bits. For inexact inputs, dtype must be inexact. 680 out : ndarray, optional 681 Alternate output array in which to place the result. The default 682 is ``None``. If provided, it must have the same shape as the 683 expected output, but the type will be cast if necessary. See 684 :ref:`ufuncs-output-type` for more details. The casting of NaN to integer 685 can yield unexpected results. 686 keepdims : bool, optional 687 If True, the axes which are reduced are left in the result as 688 dimensions with size one. With this option, the result will 689 broadcast correctly against the original `arr`. 690 691 Returns 692 ------- 693 nanprod : ndarray 694 A new array holding the result is returned unless `out` is 695 specified, in which case it is returned. 696 697 See Also 698 -------- 699 numpy.prod : Product across array propagating NaNs. 700 isnan : Show which elements are NaN. 701 702 Examples 703 -------- 704 >>> np.nanprod(1) 705 1 706 >>> np.nanprod([1]) 707 1 708 >>> np.nanprod([1, np.nan]) 709 1.0 710 >>> a = np.array([[1, 2], [3, np.nan]]) 711 >>> np.nanprod(a) 712 6.0 713 >>> np.nanprod(a, axis=0) 714 array([3., 2.]) 715 716 """ 717 a, mask = _replace_nan(a, 1) 718 return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) 719 720 721def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None): 722 return (a, out) 723 724 725@array_function_dispatch(_nancumsum_dispatcher) 726def nancumsum(a, axis=None, dtype=None, out=None): 727 """ 728 Return the cumulative sum of array elements over a given axis treating Not a 729 Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are 730 encountered and leading NaNs are replaced by zeros. 731 732 Zeros are returned for slices that are all-NaN or empty. 733 734 .. versionadded:: 1.12.0 735 736 Parameters 737 ---------- 738 a : array_like 739 Input array. 740 axis : int, optional 741 Axis along which the cumulative sum is computed. The default 742 (None) is to compute the cumsum over the flattened array. 743 dtype : dtype, optional 744 Type of the returned array and of the accumulator in which the 745 elements are summed. If `dtype` is not specified, it defaults 746 to the dtype of `a`, unless `a` has an integer dtype with a 747 precision less than that of the default platform integer. In 748 that case, the default platform integer is used. 749 out : ndarray, optional 750 Alternative output array in which to place the result. It must 751 have the same shape and buffer length as the expected output 752 but the type will be cast if necessary. See :ref:`ufuncs-output-type` for 753 more details. 754 755 Returns 756 ------- 757 nancumsum : ndarray. 758 A new array holding the result is returned unless `out` is 759 specified, in which it is returned. The result has the same 760 size as `a`, and the same shape as `a` if `axis` is not None 761 or `a` is a 1-d array. 762 763 See Also 764 -------- 765 numpy.cumsum : Cumulative sum across array propagating NaNs. 766 isnan : Show which elements are NaN. 767 768 Examples 769 -------- 770 >>> np.nancumsum(1) 771 array([1]) 772 >>> np.nancumsum([1]) 773 array([1]) 774 >>> np.nancumsum([1, np.nan]) 775 array([1., 1.]) 776 >>> a = np.array([[1, 2], [3, np.nan]]) 777 >>> np.nancumsum(a) 778 array([1., 3., 6., 6.]) 779 >>> np.nancumsum(a, axis=0) 780 array([[1., 2.], 781 [4., 2.]]) 782 >>> np.nancumsum(a, axis=1) 783 array([[1., 3.], 784 [3., 3.]]) 785 786 """ 787 a, mask = _replace_nan(a, 0) 788 return np.cumsum(a, axis=axis, dtype=dtype, out=out) 789 790 791def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None): 792 return (a, out) 793 794 795@array_function_dispatch(_nancumprod_dispatcher) 796def nancumprod(a, axis=None, dtype=None, out=None): 797 """ 798 Return the cumulative product of array elements over a given axis treating Not a 799 Numbers (NaNs) as one. The cumulative product does not change when NaNs are 800 encountered and leading NaNs are replaced by ones. 801 802 Ones are returned for slices that are all-NaN or empty. 803 804 .. versionadded:: 1.12.0 805 806 Parameters 807 ---------- 808 a : array_like 809 Input array. 810 axis : int, optional 811 Axis along which the cumulative product is computed. By default 812 the input is flattened. 813 dtype : dtype, optional 814 Type of the returned array, as well as of the accumulator in which 815 the elements are multiplied. If *dtype* is not specified, it 816 defaults to the dtype of `a`, unless `a` has an integer dtype with 817 a precision less than that of the default platform integer. In 818 that case, the default platform integer is used instead. 819 out : ndarray, optional 820 Alternative output array in which to place the result. It must 821 have the same shape and buffer length as the expected output 822 but the type of the resulting values will be cast if necessary. 823 824 Returns 825 ------- 826 nancumprod : ndarray 827 A new array holding the result is returned unless `out` is 828 specified, in which case it is returned. 829 830 See Also 831 -------- 832 numpy.cumprod : Cumulative product across array propagating NaNs. 833 isnan : Show which elements are NaN. 834 835 Examples 836 -------- 837 >>> np.nancumprod(1) 838 array([1]) 839 >>> np.nancumprod([1]) 840 array([1]) 841 >>> np.nancumprod([1, np.nan]) 842 array([1., 1.]) 843 >>> a = np.array([[1, 2], [3, np.nan]]) 844 >>> np.nancumprod(a) 845 array([1., 2., 6., 6.]) 846 >>> np.nancumprod(a, axis=0) 847 array([[1., 2.], 848 [3., 2.]]) 849 >>> np.nancumprod(a, axis=1) 850 array([[1., 2.], 851 [3., 3.]]) 852 853 """ 854 a, mask = _replace_nan(a, 1) 855 return np.cumprod(a, axis=axis, dtype=dtype, out=out) 856 857 858def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None): 859 return (a, out) 860 861 862@array_function_dispatch(_nanmean_dispatcher) 863def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): 864 """ 865 Compute the arithmetic mean along the specified axis, ignoring NaNs. 866 867 Returns the average of the array elements. The average is taken over 868 the flattened array by default, otherwise over the specified axis. 869 `float64` intermediate and return values are used for integer inputs. 870 871 For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised. 872 873 .. versionadded:: 1.8.0 874 875 Parameters 876 ---------- 877 a : array_like 878 Array containing numbers whose mean is desired. If `a` is not an 879 array, a conversion is attempted. 880 axis : {int, tuple of int, None}, optional 881 Axis or axes along which the means are computed. The default is to compute 882 the mean of the flattened array. 883 dtype : data-type, optional 884 Type to use in computing the mean. For integer inputs, the default 885 is `float64`; for inexact inputs, it is the same as the input 886 dtype. 887 out : ndarray, optional 888 Alternate output array in which to place the result. The default 889 is ``None``; if provided, it must have the same shape as the 890 expected output, but the type will be cast if necessary. See 891 :ref:`ufuncs-output-type` for more details. 892 keepdims : bool, optional 893 If this is set to True, the axes which are reduced are left 894 in the result as dimensions with size one. With this option, 895 the result will broadcast correctly against the original `a`. 896 897 If the value is anything but the default, then 898 `keepdims` will be passed through to the `mean` or `sum` methods 899 of sub-classes of `ndarray`. If the sub-classes methods 900 does not implement `keepdims` any exceptions will be raised. 901 902 Returns 903 ------- 904 m : ndarray, see dtype parameter above 905 If `out=None`, returns a new array containing the mean values, 906 otherwise a reference to the output array is returned. Nan is 907 returned for slices that contain only NaNs. 908 909 See Also 910 -------- 911 average : Weighted average 912 mean : Arithmetic mean taken while not ignoring NaNs 913 var, nanvar 914 915 Notes 916 ----- 917 The arithmetic mean is the sum of the non-NaN elements along the axis 918 divided by the number of non-NaN elements. 919 920 Note that for floating-point input, the mean is computed using the same 921 precision the input has. Depending on the input data, this can cause 922 the results to be inaccurate, especially for `float32`. Specifying a 923 higher-precision accumulator using the `dtype` keyword can alleviate 924 this issue. 925 926 Examples 927 -------- 928 >>> a = np.array([[1, np.nan], [3, 4]]) 929 >>> np.nanmean(a) 930 2.6666666666666665 931 >>> np.nanmean(a, axis=0) 932 array([2., 4.]) 933 >>> np.nanmean(a, axis=1) 934 array([1., 3.5]) # may vary 935 936 """ 937 arr, mask = _replace_nan(a, 0) 938 if mask is None: 939 return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) 940 941 if dtype is not None: 942 dtype = np.dtype(dtype) 943 if dtype is not None and not issubclass(dtype.type, np.inexact): 944 raise TypeError("If a is inexact, then dtype must be inexact") 945 if out is not None and not issubclass(out.dtype.type, np.inexact): 946 raise TypeError("If a is inexact, then out must be inexact") 947 948 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims) 949 tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) 950 avg = _divide_by_count(tot, cnt, out=out) 951 952 isbad = (cnt == 0) 953 if isbad.any(): 954 warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=3) 955 # NaN is the only possible bad value, so no further 956 # action is needed to handle bad results. 957 return avg 958 959 960def _nanmedian1d(arr1d, overwrite_input=False): 961 """ 962 Private function for rank 1 arrays. Compute the median ignoring NaNs. 963 See nanmedian for parameter usage 964 """ 965 arr1d, overwrite_input = _remove_nan_1d(arr1d, 966 overwrite_input=overwrite_input) 967 if arr1d.size == 0: 968 return np.nan 969 970 return np.median(arr1d, overwrite_input=overwrite_input) 971 972 973def _nanmedian(a, axis=None, out=None, overwrite_input=False): 974 """ 975 Private function that doesn't support extended axis or keepdims. 976 These methods are extended to this function using _ureduce 977 See nanmedian for parameter usage 978 979 """ 980 if axis is None or a.ndim == 1: 981 part = a.ravel() 982 if out is None: 983 return _nanmedian1d(part, overwrite_input) 984 else: 985 out[...] = _nanmedian1d(part, overwrite_input) 986 return out 987 else: 988 # for small medians use sort + indexing which is still faster than 989 # apply_along_axis 990 # benchmarked with shuffled (50, 50, x) containing a few NaN 991 if a.shape[axis] < 600: 992 return _nanmedian_small(a, axis, out, overwrite_input) 993 result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) 994 if out is not None: 995 out[...] = result 996 return result 997 998 999def _nanmedian_small(a, axis=None, out=None, overwrite_input=False): 1000 """ 1001 sort + indexing median, faster for small medians along multiple 1002 dimensions due to the high overhead of apply_along_axis 1003 1004 see nanmedian for parameter usage 1005 """ 1006 a = np.ma.masked_array(a, np.isnan(a)) 1007 m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input) 1008 for i in range(np.count_nonzero(m.mask.ravel())): 1009 warnings.warn("All-NaN slice encountered", RuntimeWarning, 1010 stacklevel=4) 1011 if out is not None: 1012 out[...] = m.filled(np.nan) 1013 return out 1014 return m.filled(np.nan) 1015 1016 1017def _nanmedian_dispatcher( 1018 a, axis=None, out=None, overwrite_input=None, keepdims=None): 1019 return (a, out) 1020 1021 1022@array_function_dispatch(_nanmedian_dispatcher) 1023def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue): 1024 """ 1025 Compute the median along the specified axis, while ignoring NaNs. 1026 1027 Returns the median of the array elements. 1028 1029 .. versionadded:: 1.9.0 1030 1031 Parameters 1032 ---------- 1033 a : array_like 1034 Input array or object that can be converted to an array. 1035 axis : {int, sequence of int, None}, optional 1036 Axis or axes along which the medians are computed. The default 1037 is to compute the median along a flattened version of the array. 1038 A sequence of axes is supported since version 1.9.0. 1039 out : ndarray, optional 1040 Alternative output array in which to place the result. It must 1041 have the same shape and buffer length as the expected output, 1042 but the type (of the output) will be cast if necessary. 1043 overwrite_input : bool, optional 1044 If True, then allow use of memory of input array `a` for 1045 calculations. The input array will be modified by the call to 1046 `median`. This will save memory when you do not need to preserve 1047 the contents of the input array. Treat the input as undefined, 1048 but it will probably be fully or partially sorted. Default is 1049 False. If `overwrite_input` is ``True`` and `a` is not already an 1050 `ndarray`, an error will be raised. 1051 keepdims : bool, optional 1052 If this is set to True, the axes which are reduced are left 1053 in the result as dimensions with size one. With this option, 1054 the result will broadcast correctly against the original `a`. 1055 1056 If this is anything but the default value it will be passed 1057 through (in the special case of an empty array) to the 1058 `mean` function of the underlying array. If the array is 1059 a sub-class and `mean` does not have the kwarg `keepdims` this 1060 will raise a RuntimeError. 1061 1062 Returns 1063 ------- 1064 median : ndarray 1065 A new array holding the result. If the input contains integers 1066 or floats smaller than ``float64``, then the output data-type is 1067 ``np.float64``. Otherwise, the data-type of the output is the 1068 same as that of the input. If `out` is specified, that array is 1069 returned instead. 1070 1071 See Also 1072 -------- 1073 mean, median, percentile 1074 1075 Notes 1076 ----- 1077 Given a vector ``V`` of length ``N``, the median of ``V`` is the 1078 middle value of a sorted copy of ``V``, ``V_sorted`` - i.e., 1079 ``V_sorted[(N-1)/2]``, when ``N`` is odd and the average of the two 1080 middle values of ``V_sorted`` when ``N`` is even. 1081 1082 Examples 1083 -------- 1084 >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) 1085 >>> a[0, 1] = np.nan 1086 >>> a 1087 array([[10., nan, 4.], 1088 [ 3., 2., 1.]]) 1089 >>> np.median(a) 1090 nan 1091 >>> np.nanmedian(a) 1092 3.0 1093 >>> np.nanmedian(a, axis=0) 1094 array([6.5, 2. , 2.5]) 1095 >>> np.median(a, axis=1) 1096 array([nan, 2.]) 1097 >>> b = a.copy() 1098 >>> np.nanmedian(b, axis=1, overwrite_input=True) 1099 array([7., 2.]) 1100 >>> assert not np.all(a==b) 1101 >>> b = a.copy() 1102 >>> np.nanmedian(b, axis=None, overwrite_input=True) 1103 3.0 1104 >>> assert not np.all(a==b) 1105 1106 """ 1107 a = np.asanyarray(a) 1108 # apply_along_axis in _nanmedian doesn't handle empty arrays well, 1109 # so deal them upfront 1110 if a.size == 0: 1111 return np.nanmean(a, axis, out=out, keepdims=keepdims) 1112 1113 r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out, 1114 overwrite_input=overwrite_input) 1115 if keepdims and keepdims is not np._NoValue: 1116 return r.reshape(k) 1117 else: 1118 return r 1119 1120 1121def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, 1122 interpolation=None, keepdims=None): 1123 return (a, q, out) 1124 1125 1126@array_function_dispatch(_nanpercentile_dispatcher) 1127def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, 1128 interpolation='linear', keepdims=np._NoValue): 1129 """ 1130 Compute the qth percentile of the data along the specified axis, 1131 while ignoring nan values. 1132 1133 Returns the qth percentile(s) of the array elements. 1134 1135 .. versionadded:: 1.9.0 1136 1137 Parameters 1138 ---------- 1139 a : array_like 1140 Input array or object that can be converted to an array, containing 1141 nan values to be ignored. 1142 q : array_like of float 1143 Percentile or sequence of percentiles to compute, which must be between 1144 0 and 100 inclusive. 1145 axis : {int, tuple of int, None}, optional 1146 Axis or axes along which the percentiles are computed. The 1147 default is to compute the percentile(s) along a flattened 1148 version of the array. 1149 out : ndarray, optional 1150 Alternative output array in which to place the result. It must 1151 have the same shape and buffer length as the expected output, 1152 but the type (of the output) will be cast if necessary. 1153 overwrite_input : bool, optional 1154 If True, then allow the input array `a` to be modified by intermediate 1155 calculations, to save memory. In this case, the contents of the input 1156 `a` after this function completes is undefined. 1157 interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} 1158 This optional parameter specifies the interpolation method to 1159 use when the desired percentile lies between two data points 1160 ``i < j``: 1161 1162 * 'linear': ``i + (j - i) * fraction``, where ``fraction`` 1163 is the fractional part of the index surrounded by ``i`` 1164 and ``j``. 1165 * 'lower': ``i``. 1166 * 'higher': ``j``. 1167 * 'nearest': ``i`` or ``j``, whichever is nearest. 1168 * 'midpoint': ``(i + j) / 2``. 1169 keepdims : bool, optional 1170 If this is set to True, the axes which are reduced are left in 1171 the result as dimensions with size one. With this option, the 1172 result will broadcast correctly against the original array `a`. 1173 1174 If this is anything but the default value it will be passed 1175 through (in the special case of an empty array) to the 1176 `mean` function of the underlying array. If the array is 1177 a sub-class and `mean` does not have the kwarg `keepdims` this 1178 will raise a RuntimeError. 1179 1180 Returns 1181 ------- 1182 percentile : scalar or ndarray 1183 If `q` is a single percentile and `axis=None`, then the result 1184 is a scalar. If multiple percentiles are given, first axis of 1185 the result corresponds to the percentiles. The other axes are 1186 the axes that remain after the reduction of `a`. If the input 1187 contains integers or floats smaller than ``float64``, the output 1188 data-type is ``float64``. Otherwise, the output data-type is the 1189 same as that of the input. If `out` is specified, that array is 1190 returned instead. 1191 1192 See Also 1193 -------- 1194 nanmean 1195 nanmedian : equivalent to ``nanpercentile(..., 50)`` 1196 percentile, median, mean 1197 nanquantile : equivalent to nanpercentile, but with q in the range [0, 1]. 1198 1199 Notes 1200 ----- 1201 Given a vector ``V`` of length ``N``, the ``q``-th percentile of 1202 ``V`` is the value ``q/100`` of the way from the minimum to the 1203 maximum in a sorted copy of ``V``. The values and distances of 1204 the two nearest neighbors as well as the `interpolation` parameter 1205 will determine the percentile if the normalized ranking does not 1206 match the location of ``q`` exactly. This function is the same as 1207 the median if ``q=50``, the same as the minimum if ``q=0`` and the 1208 same as the maximum if ``q=100``. 1209 1210 Examples 1211 -------- 1212 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) 1213 >>> a[0][1] = np.nan 1214 >>> a 1215 array([[10., nan, 4.], 1216 [ 3., 2., 1.]]) 1217 >>> np.percentile(a, 50) 1218 nan 1219 >>> np.nanpercentile(a, 50) 1220 3.0 1221 >>> np.nanpercentile(a, 50, axis=0) 1222 array([6.5, 2. , 2.5]) 1223 >>> np.nanpercentile(a, 50, axis=1, keepdims=True) 1224 array([[7.], 1225 [2.]]) 1226 >>> m = np.nanpercentile(a, 50, axis=0) 1227 >>> out = np.zeros_like(m) 1228 >>> np.nanpercentile(a, 50, axis=0, out=out) 1229 array([6.5, 2. , 2.5]) 1230 >>> m 1231 array([6.5, 2. , 2.5]) 1232 1233 >>> b = a.copy() 1234 >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True) 1235 array([7., 2.]) 1236 >>> assert not np.all(a==b) 1237 1238 """ 1239 a = np.asanyarray(a) 1240 q = np.true_divide(q, 100.0) # handles the asarray for us too 1241 if not function_base._quantile_is_valid(q): 1242 raise ValueError("Percentiles must be in the range [0, 100]") 1243 return _nanquantile_unchecked( 1244 a, q, axis, out, overwrite_input, interpolation, keepdims) 1245 1246 1247def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None, 1248 interpolation=None, keepdims=None): 1249 return (a, q, out) 1250 1251 1252@array_function_dispatch(_nanquantile_dispatcher) 1253def nanquantile(a, q, axis=None, out=None, overwrite_input=False, 1254 interpolation='linear', keepdims=np._NoValue): 1255 """ 1256 Compute the qth quantile of the data along the specified axis, 1257 while ignoring nan values. 1258 Returns the qth quantile(s) of the array elements. 1259 1260 .. versionadded:: 1.15.0 1261 1262 Parameters 1263 ---------- 1264 a : array_like 1265 Input array or object that can be converted to an array, containing 1266 nan values to be ignored 1267 q : array_like of float 1268 Quantile or sequence of quantiles to compute, which must be between 1269 0 and 1 inclusive. 1270 axis : {int, tuple of int, None}, optional 1271 Axis or axes along which the quantiles are computed. The 1272 default is to compute the quantile(s) along a flattened 1273 version of the array. 1274 out : ndarray, optional 1275 Alternative output array in which to place the result. It must 1276 have the same shape and buffer length as the expected output, 1277 but the type (of the output) will be cast if necessary. 1278 overwrite_input : bool, optional 1279 If True, then allow the input array `a` to be modified by intermediate 1280 calculations, to save memory. In this case, the contents of the input 1281 `a` after this function completes is undefined. 1282 interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} 1283 This optional parameter specifies the interpolation method to 1284 use when the desired quantile lies between two data points 1285 ``i < j``: 1286 1287 * linear: ``i + (j - i) * fraction``, where ``fraction`` 1288 is the fractional part of the index surrounded by ``i`` 1289 and ``j``. 1290 * lower: ``i``. 1291 * higher: ``j``. 1292 * nearest: ``i`` or ``j``, whichever is nearest. 1293 * midpoint: ``(i + j) / 2``. 1294 1295 keepdims : bool, optional 1296 If this is set to True, the axes which are reduced are left in 1297 the result as dimensions with size one. With this option, the 1298 result will broadcast correctly against the original array `a`. 1299 1300 If this is anything but the default value it will be passed 1301 through (in the special case of an empty array) to the 1302 `mean` function of the underlying array. If the array is 1303 a sub-class and `mean` does not have the kwarg `keepdims` this 1304 will raise a RuntimeError. 1305 1306 Returns 1307 ------- 1308 quantile : scalar or ndarray 1309 If `q` is a single percentile and `axis=None`, then the result 1310 is a scalar. If multiple quantiles are given, first axis of 1311 the result corresponds to the quantiles. The other axes are 1312 the axes that remain after the reduction of `a`. If the input 1313 contains integers or floats smaller than ``float64``, the output 1314 data-type is ``float64``. Otherwise, the output data-type is the 1315 same as that of the input. If `out` is specified, that array is 1316 returned instead. 1317 1318 See Also 1319 -------- 1320 quantile 1321 nanmean, nanmedian 1322 nanmedian : equivalent to ``nanquantile(..., 0.5)`` 1323 nanpercentile : same as nanquantile, but with q in the range [0, 100]. 1324 1325 Examples 1326 -------- 1327 >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) 1328 >>> a[0][1] = np.nan 1329 >>> a 1330 array([[10., nan, 4.], 1331 [ 3., 2., 1.]]) 1332 >>> np.quantile(a, 0.5) 1333 nan 1334 >>> np.nanquantile(a, 0.5) 1335 3.0 1336 >>> np.nanquantile(a, 0.5, axis=0) 1337 array([6.5, 2. , 2.5]) 1338 >>> np.nanquantile(a, 0.5, axis=1, keepdims=True) 1339 array([[7.], 1340 [2.]]) 1341 >>> m = np.nanquantile(a, 0.5, axis=0) 1342 >>> out = np.zeros_like(m) 1343 >>> np.nanquantile(a, 0.5, axis=0, out=out) 1344 array([6.5, 2. , 2.5]) 1345 >>> m 1346 array([6.5, 2. , 2.5]) 1347 >>> b = a.copy() 1348 >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) 1349 array([7., 2.]) 1350 >>> assert not np.all(a==b) 1351 """ 1352 a = np.asanyarray(a) 1353 q = np.asanyarray(q) 1354 if not function_base._quantile_is_valid(q): 1355 raise ValueError("Quantiles must be in the range [0, 1]") 1356 return _nanquantile_unchecked( 1357 a, q, axis, out, overwrite_input, interpolation, keepdims) 1358 1359 1360def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, 1361 interpolation='linear', keepdims=np._NoValue): 1362 """Assumes that q is in [0, 1], and is an ndarray""" 1363 # apply_along_axis in _nanpercentile doesn't handle empty arrays well, 1364 # so deal them upfront 1365 if a.size == 0: 1366 return np.nanmean(a, axis, out=out, keepdims=keepdims) 1367 1368 r, k = function_base._ureduce( 1369 a, func=_nanquantile_ureduce_func, q=q, axis=axis, out=out, 1370 overwrite_input=overwrite_input, interpolation=interpolation 1371 ) 1372 if keepdims and keepdims is not np._NoValue: 1373 return r.reshape(q.shape + k) 1374 else: 1375 return r 1376 1377 1378def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False, 1379 interpolation='linear'): 1380 """ 1381 Private function that doesn't support extended axis or keepdims. 1382 These methods are extended to this function using _ureduce 1383 See nanpercentile for parameter usage 1384 """ 1385 if axis is None or a.ndim == 1: 1386 part = a.ravel() 1387 result = _nanquantile_1d(part, q, overwrite_input, interpolation) 1388 else: 1389 result = np.apply_along_axis(_nanquantile_1d, axis, a, q, 1390 overwrite_input, interpolation) 1391 # apply_along_axis fills in collapsed axis with results. 1392 # Move that axis to the beginning to match percentile's 1393 # convention. 1394 if q.ndim != 0: 1395 result = np.moveaxis(result, axis, 0) 1396 1397 if out is not None: 1398 out[...] = result 1399 return result 1400 1401 1402def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'): 1403 """ 1404 Private function for rank 1 arrays. Compute quantile ignoring NaNs. 1405 See nanpercentile for parameter usage 1406 """ 1407 arr1d, overwrite_input = _remove_nan_1d(arr1d, 1408 overwrite_input=overwrite_input) 1409 if arr1d.size == 0: 1410 return np.full(q.shape, np.nan)[()] # convert to scalar 1411 1412 return function_base._quantile_unchecked( 1413 arr1d, q, overwrite_input=overwrite_input, interpolation=interpolation) 1414 1415 1416def _nanvar_dispatcher( 1417 a, axis=None, dtype=None, out=None, ddof=None, keepdims=None): 1418 return (a, out) 1419 1420 1421@array_function_dispatch(_nanvar_dispatcher) 1422def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): 1423 """ 1424 Compute the variance along the specified axis, while ignoring NaNs. 1425 1426 Returns the variance of the array elements, a measure of the spread of 1427 a distribution. The variance is computed for the flattened array by 1428 default, otherwise over the specified axis. 1429 1430 For all-NaN slices or slices with zero degrees of freedom, NaN is 1431 returned and a `RuntimeWarning` is raised. 1432 1433 .. versionadded:: 1.8.0 1434 1435 Parameters 1436 ---------- 1437 a : array_like 1438 Array containing numbers whose variance is desired. If `a` is not an 1439 array, a conversion is attempted. 1440 axis : {int, tuple of int, None}, optional 1441 Axis or axes along which the variance is computed. The default is to compute 1442 the variance of the flattened array. 1443 dtype : data-type, optional 1444 Type to use in computing the variance. For arrays of integer type 1445 the default is `float64`; for arrays of float types it is the same as 1446 the array type. 1447 out : ndarray, optional 1448 Alternate output array in which to place the result. It must have 1449 the same shape as the expected output, but the type is cast if 1450 necessary. 1451 ddof : int, optional 1452 "Delta Degrees of Freedom": the divisor used in the calculation is 1453 ``N - ddof``, where ``N`` represents the number of non-NaN 1454 elements. By default `ddof` is zero. 1455 keepdims : bool, optional 1456 If this is set to True, the axes which are reduced are left 1457 in the result as dimensions with size one. With this option, 1458 the result will broadcast correctly against the original `a`. 1459 1460 1461 Returns 1462 ------- 1463 variance : ndarray, see dtype parameter above 1464 If `out` is None, return a new array containing the variance, 1465 otherwise return a reference to the output array. If ddof is >= the 1466 number of non-NaN elements in a slice or the slice contains only 1467 NaNs, then the result for that slice is NaN. 1468 1469 See Also 1470 -------- 1471 std : Standard deviation 1472 mean : Average 1473 var : Variance while not ignoring NaNs 1474 nanstd, nanmean 1475 :ref:`ufuncs-output-type` 1476 1477 Notes 1478 ----- 1479 The variance is the average of the squared deviations from the mean, 1480 i.e., ``var = mean(abs(x - x.mean())**2)``. 1481 1482 The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. 1483 If, however, `ddof` is specified, the divisor ``N - ddof`` is used 1484 instead. In standard statistical practice, ``ddof=1`` provides an 1485 unbiased estimator of the variance of a hypothetical infinite 1486 population. ``ddof=0`` provides a maximum likelihood estimate of the 1487 variance for normally distributed variables. 1488 1489 Note that for complex numbers, the absolute value is taken before 1490 squaring, so that the result is always real and nonnegative. 1491 1492 For floating-point input, the variance is computed using the same 1493 precision the input has. Depending on the input data, this can cause 1494 the results to be inaccurate, especially for `float32` (see example 1495 below). Specifying a higher-accuracy accumulator using the ``dtype`` 1496 keyword can alleviate this issue. 1497 1498 For this function to work on sub-classes of ndarray, they must define 1499 `sum` with the kwarg `keepdims` 1500 1501 Examples 1502 -------- 1503 >>> a = np.array([[1, np.nan], [3, 4]]) 1504 >>> np.nanvar(a) 1505 1.5555555555555554 1506 >>> np.nanvar(a, axis=0) 1507 array([1., 0.]) 1508 >>> np.nanvar(a, axis=1) 1509 array([0., 0.25]) # may vary 1510 1511 """ 1512 arr, mask = _replace_nan(a, 0) 1513 if mask is None: 1514 return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof, 1515 keepdims=keepdims) 1516 1517 if dtype is not None: 1518 dtype = np.dtype(dtype) 1519 if dtype is not None and not issubclass(dtype.type, np.inexact): 1520 raise TypeError("If a is inexact, then dtype must be inexact") 1521 if out is not None and not issubclass(out.dtype.type, np.inexact): 1522 raise TypeError("If a is inexact, then out must be inexact") 1523 1524 # Compute mean 1525 if type(arr) is np.matrix: 1526 _keepdims = np._NoValue 1527 else: 1528 _keepdims = True 1529 # we need to special case matrix for reverse compatibility 1530 # in order for this to work, these sums need to be called with 1531 # keepdims=True, however matrix now raises an error in this case, but 1532 # the reason that it drops the keepdims kwarg is to force keepdims=True 1533 # so this used to work by serendipity. 1534 cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims) 1535 avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims) 1536 avg = _divide_by_count(avg, cnt) 1537 1538 # Compute squared deviation from mean. 1539 np.subtract(arr, avg, out=arr, casting='unsafe') 1540 arr = _copyto(arr, 0, mask) 1541 if issubclass(arr.dtype.type, np.complexfloating): 1542 sqr = np.multiply(arr, arr.conj(), out=arr).real 1543 else: 1544 sqr = np.multiply(arr, arr, out=arr) 1545 1546 # Compute variance. 1547 var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims) 1548 if var.ndim < cnt.ndim: 1549 # Subclasses of ndarray may ignore keepdims, so check here. 1550 cnt = cnt.squeeze(axis) 1551 dof = cnt - ddof 1552 var = _divide_by_count(var, dof) 1553 1554 isbad = (dof <= 0) 1555 if np.any(isbad): 1556 warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning, 1557 stacklevel=3) 1558 # NaN, inf, or negative numbers are all possible bad 1559 # values, so explicitly replace them with NaN. 1560 var = _copyto(var, np.nan, isbad) 1561 return var 1562 1563 1564def _nanstd_dispatcher( 1565 a, axis=None, dtype=None, out=None, ddof=None, keepdims=None): 1566 return (a, out) 1567 1568 1569@array_function_dispatch(_nanstd_dispatcher) 1570def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): 1571 """ 1572 Compute the standard deviation along the specified axis, while 1573 ignoring NaNs. 1574 1575 Returns the standard deviation, a measure of the spread of a 1576 distribution, of the non-NaN array elements. The standard deviation is 1577 computed for the flattened array by default, otherwise over the 1578 specified axis. 1579 1580 For all-NaN slices or slices with zero degrees of freedom, NaN is 1581 returned and a `RuntimeWarning` is raised. 1582 1583 .. versionadded:: 1.8.0 1584 1585 Parameters 1586 ---------- 1587 a : array_like 1588 Calculate the standard deviation of the non-NaN values. 1589 axis : {int, tuple of int, None}, optional 1590 Axis or axes along which the standard deviation is computed. The default is 1591 to compute the standard deviation of the flattened array. 1592 dtype : dtype, optional 1593 Type to use in computing the standard deviation. For arrays of 1594 integer type the default is float64, for arrays of float types it 1595 is the same as the array type. 1596 out : ndarray, optional 1597 Alternative output array in which to place the result. It must have 1598 the same shape as the expected output but the type (of the 1599 calculated values) will be cast if necessary. 1600 ddof : int, optional 1601 Means Delta Degrees of Freedom. The divisor used in calculations 1602 is ``N - ddof``, where ``N`` represents the number of non-NaN 1603 elements. By default `ddof` is zero. 1604 1605 keepdims : bool, optional 1606 If this is set to True, the axes which are reduced are left 1607 in the result as dimensions with size one. With this option, 1608 the result will broadcast correctly against the original `a`. 1609 1610 If this value is anything but the default it is passed through 1611 as-is to the relevant functions of the sub-classes. If these 1612 functions do not have a `keepdims` kwarg, a RuntimeError will 1613 be raised. 1614 1615 Returns 1616 ------- 1617 standard_deviation : ndarray, see dtype parameter above. 1618 If `out` is None, return a new array containing the standard 1619 deviation, otherwise return a reference to the output array. If 1620 ddof is >= the number of non-NaN elements in a slice or the slice 1621 contains only NaNs, then the result for that slice is NaN. 1622 1623 See Also 1624 -------- 1625 var, mean, std 1626 nanvar, nanmean 1627 :ref:`ufuncs-output-type` 1628 1629 Notes 1630 ----- 1631 The standard deviation is the square root of the average of the squared 1632 deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``. 1633 1634 The average squared deviation is normally calculated as 1635 ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is 1636 specified, the divisor ``N - ddof`` is used instead. In standard 1637 statistical practice, ``ddof=1`` provides an unbiased estimator of the 1638 variance of the infinite population. ``ddof=0`` provides a maximum 1639 likelihood estimate of the variance for normally distributed variables. 1640 The standard deviation computed in this function is the square root of 1641 the estimated variance, so even with ``ddof=1``, it will not be an 1642 unbiased estimate of the standard deviation per se. 1643 1644 Note that, for complex numbers, `std` takes the absolute value before 1645 squaring, so that the result is always real and nonnegative. 1646 1647 For floating-point input, the *std* is computed using the same 1648 precision the input has. Depending on the input data, this can cause 1649 the results to be inaccurate, especially for float32 (see example 1650 below). Specifying a higher-accuracy accumulator using the `dtype` 1651 keyword can alleviate this issue. 1652 1653 Examples 1654 -------- 1655 >>> a = np.array([[1, np.nan], [3, 4]]) 1656 >>> np.nanstd(a) 1657 1.247219128924647 1658 >>> np.nanstd(a, axis=0) 1659 array([1., 0.]) 1660 >>> np.nanstd(a, axis=1) 1661 array([0., 0.5]) # may vary 1662 1663 """ 1664 var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, 1665 keepdims=keepdims) 1666 if isinstance(var, np.ndarray): 1667 std = np.sqrt(var, out=var) 1668 else: 1669 std = var.dtype.type(np.sqrt(var)) 1670 return std 1671